summaryrefslogtreecommitdiff
path: root/chromium/components/visitedlink/browser
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/visitedlink/browser')
-rw-r--r--chromium/components/visitedlink/browser/DEPS3
-rw-r--r--chromium/components/visitedlink/browser/visitedlink_delegate.h51
-rw-r--r--chromium/components/visitedlink/browser/visitedlink_event_listener.cc219
-rw-r--r--chromium/components/visitedlink/browser/visitedlink_event_listener.h70
-rw-r--r--chromium/components/visitedlink/browser/visitedlink_master.cc989
-rw-r--r--chromium/components/visitedlink/browser/visitedlink_master.h445
6 files changed, 1777 insertions, 0 deletions
diff --git a/chromium/components/visitedlink/browser/DEPS b/chromium/components/visitedlink/browser/DEPS
new file mode 100644
index 00000000000..1c35d9ca694
--- /dev/null
+++ b/chromium/components/visitedlink/browser/DEPS
@@ -0,0 +1,3 @@
+include_rules = [
+ "+content/public/browser",
+]
diff --git a/chromium/components/visitedlink/browser/visitedlink_delegate.h b/chromium/components/visitedlink/browser/visitedlink_delegate.h
new file mode 100644
index 00000000000..6adf7de5718
--- /dev/null
+++ b/chromium/components/visitedlink/browser/visitedlink_delegate.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_DELEGATE_H_
+#define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_DELEGATE_H_
+
+#include "base/memory/ref_counted.h"
+
+class GURL;
+
+namespace content {
+class BrowserContext;
+}
+
+namespace visitedlink {
+
+// Delegate class that clients of VisitedLinkMaster must implement.
+class VisitedLinkDelegate {
+ public:
+ // See RebuildTable.
+ class URLEnumerator : public base::RefCountedThreadSafe<URLEnumerator> {
+ public:
+ // Call this with each URL to rebuild the table.
+ virtual void OnURL(const GURL& url) = 0;
+
+ // This must be called by Delegate after RebuildTable is called. |success|
+ // indicates all URLs have been returned successfully. The URLEnumerator
+ // object cannot be used by the delegate after this call.
+ virtual void OnComplete(bool success) = 0;
+
+ protected:
+ virtual ~URLEnumerator() {}
+
+ private:
+ friend class base::RefCountedThreadSafe<URLEnumerator>;
+ };
+
+ // Delegate class is responsible for persisting the list of visited URLs
+ // across browser runs. This is called by VisitedLinkMaster to repopulate
+ // its internal table. Note that methods on enumerator can be called on any
+ // thread but the delegate is responsible for synchronizating the calls.
+ virtual void RebuildTable(const scoped_refptr<URLEnumerator>& enumerator) = 0;
+
+ protected:
+ virtual ~VisitedLinkDelegate() {}
+};
+
+} // namespace visitedlink
+
+#endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_DELEGATE_H_
diff --git a/chromium/components/visitedlink/browser/visitedlink_event_listener.cc b/chromium/components/visitedlink/browser/visitedlink_event_listener.cc
new file mode 100644
index 00000000000..a96a0e3aa55
--- /dev/null
+++ b/chromium/components/visitedlink/browser/visitedlink_event_listener.cc
@@ -0,0 +1,219 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/visitedlink/browser/visitedlink_event_listener.h"
+
+#include "base/memory/shared_memory.h"
+#include "components/visitedlink/browser/visitedlink_delegate.h"
+#include "components/visitedlink/common/visitedlink_messages.h"
+#include "content/public/browser/notification_service.h"
+#include "content/public/browser/notification_types.h"
+#include "content/public/browser/render_process_host.h"
+#include "content/public/browser/render_widget_host.h"
+
+using base::Time;
+using base::TimeDelta;
+using content::RenderWidgetHost;
+
+namespace {
+
+// The amount of time we wait to accumulate visited link additions.
+const int kCommitIntervalMs = 100;
+
+// Size of the buffer after which individual link updates deemed not warranted
+// and the overall update should be used instead.
+const unsigned kVisitedLinkBufferThreshold = 50;
+
+} // namespace
+
+namespace visitedlink {
+
+// This class manages buffering and sending visited link hashes (fingerprints)
+// to renderer based on widget visibility.
+// As opposed to the VisitedLinkEventListener, which coalesces to
+// reduce the rate of messages being sent to render processes, this class
+// ensures that the updates occur only when explicitly requested. This is
+// used for RenderProcessHostImpl to only send Add/Reset link events to the
+// renderers when their tabs are visible and the corresponding RenderViews are
+// created.
+class VisitedLinkUpdater {
+ public:
+ explicit VisitedLinkUpdater(int render_process_id)
+ : reset_needed_(false), render_process_id_(render_process_id) {
+ }
+
+ // Informs the renderer about a new visited link table.
+ void SendVisitedLinkTable(base::SharedMemory* table_memory) {
+ content::RenderProcessHost* process =
+ content::RenderProcessHost::FromID(render_process_id_);
+ if (!process)
+ return; // Happens in tests
+ base::SharedMemoryHandle handle_for_process;
+ table_memory->ShareToProcess(process->GetHandle(), &handle_for_process);
+ if (base::SharedMemory::IsHandleValid(handle_for_process))
+ process->Send(new ChromeViewMsg_VisitedLink_NewTable(
+ handle_for_process));
+ }
+
+ // Buffers |links| to update, but doesn't actually relay them.
+ void AddLinks(const VisitedLinkCommon::Fingerprints& links) {
+ if (reset_needed_)
+ return;
+
+ if (pending_.size() + links.size() > kVisitedLinkBufferThreshold) {
+ // Once the threshold is reached, there's no need to store pending visited
+ // link updates -- we opt for resetting the state for all links.
+ AddReset();
+ return;
+ }
+
+ pending_.insert(pending_.end(), links.begin(), links.end());
+ }
+
+ // Tells the updater that sending individual link updates is no longer
+ // necessary and the visited state for all links should be reset.
+ void AddReset() {
+ reset_needed_ = true;
+ pending_.clear();
+ }
+
+ // Sends visited link update messages: a list of links whose visited state
+ // changed or reset of visited state for all links.
+ void Update() {
+ content::RenderProcessHost* process =
+ content::RenderProcessHost::FromID(render_process_id_);
+ if (!process)
+ return; // Happens in tests
+
+ if (!process->VisibleWidgetCount())
+ return;
+
+ if (reset_needed_) {
+ process->Send(new ChromeViewMsg_VisitedLink_Reset());
+ reset_needed_ = false;
+ return;
+ }
+
+ if (pending_.empty())
+ return;
+
+ process->Send(new ChromeViewMsg_VisitedLink_Add(pending_));
+
+ pending_.clear();
+ }
+
+ private:
+ bool reset_needed_;
+ int render_process_id_;
+ VisitedLinkCommon::Fingerprints pending_;
+};
+
+VisitedLinkEventListener::VisitedLinkEventListener(
+ VisitedLinkMaster* master,
+ content::BrowserContext* browser_context)
+ : master_(master),
+ browser_context_(browser_context) {
+ registrar_.Add(this, content::NOTIFICATION_RENDERER_PROCESS_CREATED,
+ content::NotificationService::AllBrowserContextsAndSources());
+ registrar_.Add(this, content::NOTIFICATION_RENDERER_PROCESS_TERMINATED,
+ content::NotificationService::AllBrowserContextsAndSources());
+ registrar_.Add(this, content::NOTIFICATION_RENDER_WIDGET_VISIBILITY_CHANGED,
+ content::NotificationService::AllBrowserContextsAndSources());
+}
+
+VisitedLinkEventListener::~VisitedLinkEventListener() {
+ if (!pending_visited_links_.empty())
+ pending_visited_links_.clear();
+}
+
+void VisitedLinkEventListener::NewTable(base::SharedMemory* table_memory) {
+ if (!table_memory)
+ return;
+
+ // Send to all RenderProcessHosts.
+ for (Updaters::iterator i = updaters_.begin(); i != updaters_.end(); ++i) {
+ // Make sure to not send to incognito renderers.
+ content::RenderProcessHost* process =
+ content::RenderProcessHost::FromID(i->first);
+ if (!process)
+ continue;
+
+ i->second->SendVisitedLinkTable(table_memory);
+ }
+}
+
+void VisitedLinkEventListener::Add(VisitedLinkMaster::Fingerprint fingerprint) {
+ pending_visited_links_.push_back(fingerprint);
+
+ if (!coalesce_timer_.IsRunning()) {
+ coalesce_timer_.Start(FROM_HERE,
+ TimeDelta::FromMilliseconds(kCommitIntervalMs), this,
+ &VisitedLinkEventListener::CommitVisitedLinks);
+ }
+}
+
+void VisitedLinkEventListener::Reset() {
+ pending_visited_links_.clear();
+ coalesce_timer_.Stop();
+
+ for (Updaters::iterator i = updaters_.begin(); i != updaters_.end(); ++i) {
+ i->second->AddReset();
+ i->second->Update();
+ }
+}
+
+void VisitedLinkEventListener::CommitVisitedLinks() {
+ // Send to all RenderProcessHosts.
+ for (Updaters::iterator i = updaters_.begin(); i != updaters_.end(); ++i) {
+ i->second->AddLinks(pending_visited_links_);
+ i->second->Update();
+ }
+
+ pending_visited_links_.clear();
+}
+
+void VisitedLinkEventListener::Observe(
+ int type,
+ const content::NotificationSource& source,
+ const content::NotificationDetails& details) {
+ switch (type) {
+ case content::NOTIFICATION_RENDERER_PROCESS_CREATED: {
+ content::RenderProcessHost* process =
+ content::Source<content::RenderProcessHost>(source).ptr();
+ if (browser_context_ != process->GetBrowserContext())
+ return;
+
+ // Happens on browser start up.
+ if (!master_->shared_memory())
+ return;
+
+ updaters_[process->GetID()] =
+ make_linked_ptr(new VisitedLinkUpdater(process->GetID()));
+ updaters_[process->GetID()]->SendVisitedLinkTable(
+ master_->shared_memory());
+ break;
+ }
+ case content::NOTIFICATION_RENDERER_PROCESS_TERMINATED: {
+ content::RenderProcessHost* process =
+ content::Source<content::RenderProcessHost>(source).ptr();
+ if (updaters_.count(process->GetID())) {
+ updaters_.erase(process->GetID());
+ }
+ break;
+ }
+ case content::NOTIFICATION_RENDER_WIDGET_VISIBILITY_CHANGED: {
+ RenderWidgetHost* widget =
+ content::Source<RenderWidgetHost>(source).ptr();
+ int child_id = widget->GetProcess()->GetID();
+ if (updaters_.count(child_id))
+ updaters_[child_id]->Update();
+ break;
+ }
+ default:
+ NOTREACHED();
+ break;
+ }
+}
+
+} // namespace visitedlink
diff --git a/chromium/components/visitedlink/browser/visitedlink_event_listener.h b/chromium/components/visitedlink/browser/visitedlink_event_listener.h
new file mode 100644
index 00000000000..c2de5d205d0
--- /dev/null
+++ b/chromium/components/visitedlink/browser/visitedlink_event_listener.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_EVENT_LISTENER_H_
+#define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_EVENT_LISTENER_H_
+
+#include <map>
+
+#include "base/memory/linked_ptr.h"
+#include "base/timer/timer.h"
+#include "components/visitedlink/browser/visitedlink_master.h"
+#include "content/public/browser/notification_observer.h"
+#include "content/public/browser/notification_registrar.h"
+
+namespace base {
+class SharedMemory;
+}
+
+namespace content {
+class BrowserContext;
+}
+
+namespace visitedlink {
+
+class VisitedLinkUpdater;
+
+// VisitedLinkEventListener broadcasts link coloring database updates to all
+// processes. It also coalesces the updates to avoid excessive broadcasting of
+// messages to the renderers.
+class VisitedLinkEventListener : public VisitedLinkMaster::Listener,
+ public content::NotificationObserver {
+ public:
+ VisitedLinkEventListener(VisitedLinkMaster* master,
+ content::BrowserContext* browser_context);
+ virtual ~VisitedLinkEventListener();
+
+ virtual void NewTable(base::SharedMemory* table_memory) OVERRIDE;
+ virtual void Add(VisitedLinkMaster::Fingerprint fingerprint) OVERRIDE;
+ virtual void Reset() OVERRIDE;
+
+ private:
+ void CommitVisitedLinks();
+
+ // content::NotificationObserver implementation.
+ virtual void Observe(int type,
+ const content::NotificationSource& source,
+ const content::NotificationDetails& details) OVERRIDE;
+
+ base::OneShotTimer<VisitedLinkEventListener> coalesce_timer_;
+ VisitedLinkCommon::Fingerprints pending_visited_links_;
+
+ content::NotificationRegistrar registrar_;
+
+ // Map between renderer child ids and their VisitedLinkUpdater.
+ typedef std::map<int, linked_ptr<VisitedLinkUpdater> > Updaters;
+ Updaters updaters_;
+
+ VisitedLinkMaster* master_;
+
+ // Used to filter RENDERER_PROCESS_CREATED notifications to renderers that
+ // belong to this BrowserContext.
+ content::BrowserContext* browser_context_;
+
+ DISALLOW_COPY_AND_ASSIGN(VisitedLinkEventListener);
+};
+
+} // namespace visitedlink
+
+#endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_EVENT_LISTENER_H_
diff --git a/chromium/components/visitedlink/browser/visitedlink_master.cc b/chromium/components/visitedlink/browser/visitedlink_master.cc
new file mode 100644
index 00000000000..7cf68911af7
--- /dev/null
+++ b/chromium/components/visitedlink/browser/visitedlink_master.cc
@@ -0,0 +1,989 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/visitedlink/browser/visitedlink_master.h"
+
+#if defined(OS_WIN)
+#include <windows.h>
+#include <io.h>
+#include <shlobj.h>
+#endif // defined(OS_WIN)
+#include <stdio.h>
+
+#include <algorithm>
+
+#include "base/bind.h"
+#include "base/bind_helpers.h"
+#include "base/containers/stack_container.h"
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/message_loop/message_loop.h"
+#include "base/path_service.h"
+#include "base/rand_util.h"
+#include "base/strings/string_util.h"
+#include "base/threading/thread_restrictions.h"
+#include "components/visitedlink/browser/visitedlink_delegate.h"
+#include "components/visitedlink/browser/visitedlink_event_listener.h"
+#include "content/public/browser/browser_context.h"
+#include "content/public/browser/browser_thread.h"
+#include "url/gurl.h"
+
+using content::BrowserThread;
+using file_util::ScopedFILE;
+using file_util::OpenFile;
+using file_util::TruncateFile;
+
+namespace visitedlink {
+
+const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
+const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
+const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
+const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
+const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
+
+const int32 VisitedLinkMaster::kFileCurrentVersion = 3;
+
+// the signature at the beginning of the URL table = "VLnk" (visited links)
+const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
+const size_t VisitedLinkMaster::kFileHeaderSize =
+ kFileHeaderSaltOffset + LINK_SALT_LENGTH;
+
+// This value should also be the same as the smallest size in the lookup
+// table in NewTableSizeForCount (prime number).
+const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
+
+const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
+
+namespace {
+
+// Fills the given salt structure with some quasi-random values
+// It is not necessary to generate a cryptographically strong random string,
+// only that it be reasonably different for different users.
+void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
+ DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
+ uint64 randval = base::RandUint64();
+ memcpy(salt, &randval, 8);
+}
+
+// Opens file on a background thread to not block UI thread.
+void AsyncOpen(FILE** file, const base::FilePath& filename) {
+ *file = OpenFile(filename, "wb+");
+ DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value();
+}
+
+// Returns true if the write was complete.
+static bool WriteToFile(FILE* file,
+ off_t offset,
+ const void* data,
+ size_t data_len) {
+ if (fseek(file, offset, SEEK_SET) != 0)
+ return false; // Don't write to an invalid part of the file.
+
+ size_t num_written = fwrite(data, 1, data_len, file);
+
+ // The write may not make it to the kernel (stdlib may buffer the write)
+ // until the next fseek/fclose call. If we crash, it's easy for our used
+ // item count to be out of sync with the number of hashes we write.
+ // Protect against this by calling fflush.
+ int ret = fflush(file);
+ DCHECK_EQ(0, ret);
+ return num_written == data_len;
+}
+
+// This task executes on a background thread and executes a write. This
+// prevents us from blocking the UI thread doing I/O. Double pointer to FILE
+// is used because file may still not be opened by the time of scheduling
+// the task for execution.
+void AsyncWrite(FILE** file, int32 offset, const std::string& data) {
+ if (*file)
+ WriteToFile(*file, offset, data.data(), data.size());
+}
+
+// Truncates the file to the current position asynchronously on a background
+// thread. Double pointer to FILE is used because file may still not be opened
+// by the time of scheduling the task for execution.
+void AsyncTruncate(FILE** file) {
+ if (*file)
+ base::IgnoreResult(TruncateFile(*file));
+}
+
+// Closes the file on a background thread and releases memory used for storage
+// of FILE* value. Double pointer to FILE is used because file may still not
+// be opened by the time of scheduling the task for execution.
+void AsyncClose(FILE** file) {
+ if (*file)
+ base::IgnoreResult(fclose(*file));
+ free(file);
+}
+
+} // namespace
+
+// TableBuilder ---------------------------------------------------------------
+
+// How rebuilding from history works
+// ---------------------------------
+//
+// We mark that we're rebuilding from history by setting the table_builder_
+// member in VisitedLinkMaster to the TableBuilder we create. This builder
+// will be called on the history thread by the history system for every URL
+// in the database.
+//
+// The builder will store the fingerprints for those URLs, and then marshalls
+// back to the main thread where the VisitedLinkMaster will be notified. The
+// master then replaces its table with a new table containing the computed
+// fingerprints.
+//
+// The builder must remain active while the history system is using it.
+// Sometimes, the master will be deleted before the rebuild is complete, in
+// which case it notifies the builder via DisownMaster(). The builder will
+// delete itself once rebuilding is complete, and not execute any callback.
+class VisitedLinkMaster::TableBuilder
+ : public VisitedLinkDelegate::URLEnumerator {
+ public:
+ TableBuilder(VisitedLinkMaster* master,
+ const uint8 salt[LINK_SALT_LENGTH]);
+
+ // Called on the main thread when the master is being destroyed. This will
+ // prevent a crash when the query completes and the master is no longer
+ // around. We can not actually do anything but mark this fact, since the
+ // table will be being rebuilt simultaneously on the other thread.
+ void DisownMaster();
+
+ // VisitedLinkDelegate::URLEnumerator
+ virtual void OnURL(const GURL& url) OVERRIDE;
+ virtual void OnComplete(bool succeed) OVERRIDE;
+
+ private:
+ virtual ~TableBuilder() {}
+
+ // OnComplete mashals to this function on the main thread to do the
+ // notification.
+ void OnCompleteMainThread();
+
+ // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
+ VisitedLinkMaster* master_;
+
+ // Indicates whether the operation has failed or not.
+ bool success_;
+
+ // Salt for this new table.
+ uint8 salt_[LINK_SALT_LENGTH];
+
+ // Stores the fingerprints we computed on the background thread.
+ VisitedLinkCommon::Fingerprints fingerprints_;
+
+ DISALLOW_COPY_AND_ASSIGN(TableBuilder);
+};
+
+// VisitedLinkMaster ----------------------------------------------------------
+
+VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context,
+ VisitedLinkDelegate* delegate,
+ bool persist_to_disk)
+ : browser_context_(browser_context),
+ delegate_(delegate),
+ listener_(new VisitedLinkEventListener(this, browser_context)),
+ persist_to_disk_(persist_to_disk) {
+ InitMembers();
+}
+
+VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
+ VisitedLinkDelegate* delegate,
+ bool persist_to_disk,
+ bool suppress_rebuild,
+ const base::FilePath& filename,
+ int32 default_table_size)
+ : browser_context_(NULL),
+ delegate_(delegate),
+ persist_to_disk_(persist_to_disk) {
+ listener_.reset(listener);
+ DCHECK(listener_.get());
+ InitMembers();
+
+ database_name_override_ = filename;
+ table_size_override_ = default_table_size;
+ suppress_rebuild_ = suppress_rebuild;
+}
+
+VisitedLinkMaster::~VisitedLinkMaster() {
+ if (table_builder_.get()) {
+ // Prevent the table builder from calling us back now that we're being
+ // destroyed. Note that we DON'T delete the object, since the history
+ // system is still writing into it. When that is complete, the table
+ // builder will destroy itself when it finds we are gone.
+ table_builder_->DisownMaster();
+ }
+ FreeURLTable();
+ // FreeURLTable() will schedule closing of the file and deletion of |file_|.
+ // So nothing should be done here.
+}
+
+void VisitedLinkMaster::InitMembers() {
+ file_ = NULL;
+ shared_memory_ = NULL;
+ shared_memory_serial_ = 0;
+ used_items_ = 0;
+ table_size_override_ = 0;
+ suppress_rebuild_ = false;
+ sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken();
+
+#ifndef NDEBUG
+ posted_asynchronous_operation_ = false;
+#endif
+}
+
+bool VisitedLinkMaster::Init() {
+ // We probably shouldn't be loading this from the UI thread,
+ // but it does need to happen early on in startup.
+ // http://code.google.com/p/chromium/issues/detail?id=24163
+ base::ThreadRestrictions::ScopedAllowIO allow_io;
+
+ if (persist_to_disk_) {
+ if (InitFromFile())
+ return true;
+ }
+ return InitFromScratch(suppress_rebuild_);
+}
+
+VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
+ // Extra check that we are not incognito. This should not happen.
+ // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
+ // removed from BrowserContext.
+ if (browser_context_ && browser_context_->IsOffTheRecord()) {
+ NOTREACHED();
+ return null_hash_;
+ }
+
+ if (!url.is_valid())
+ return null_hash_; // Don't add invalid URLs.
+
+ Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
+ url.spec().size(),
+ salt_);
+ if (table_builder_.get()) {
+ // If we have a pending delete for this fingerprint, cancel it.
+ std::set<Fingerprint>::iterator found =
+ deleted_since_rebuild_.find(fingerprint);
+ if (found != deleted_since_rebuild_.end())
+ deleted_since_rebuild_.erase(found);
+
+ // A rebuild is in progress, save this addition in the temporary list so
+ // it can be added once rebuild is complete.
+ added_since_rebuild_.insert(fingerprint);
+ }
+
+ // If the table is "full", we don't add URLs and just drop them on the floor.
+ // This can happen if we get thousands of new URLs and something causes
+ // the table resizing to fail. This check prevents a hang in that case. Note
+ // that this is *not* the resize limit, this is just a sanity check.
+ if (used_items_ / 8 > table_length_ / 10)
+ return null_hash_; // Table is more than 80% full.
+
+ return AddFingerprint(fingerprint, true);
+}
+
+void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here,
+ const base::Closure& task) {
+ DCHECK(persist_to_disk_);
+ BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_,
+ from_here, task);
+}
+
+void VisitedLinkMaster::AddURL(const GURL& url) {
+ Hash index = TryToAddURL(url);
+ if (!table_builder_.get() && index != null_hash_) {
+ // Not rebuilding, so we want to keep the file on disk up-to-date.
+ if (persist_to_disk_) {
+ WriteUsedItemCountToFile();
+ WriteHashRangeToFile(index, index);
+ }
+ ResizeTableIfNecessary();
+ }
+}
+
+void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
+ for (std::vector<GURL>::const_iterator i = url.begin();
+ i != url.end(); ++i) {
+ Hash index = TryToAddURL(*i);
+ if (!table_builder_.get() && index != null_hash_)
+ ResizeTableIfNecessary();
+ }
+
+ // Keeps the file on disk up-to-date.
+ if (!table_builder_.get() && persist_to_disk_)
+ WriteFullTable();
+}
+
+void VisitedLinkMaster::DeleteAllURLs() {
+ // Any pending modifications are invalid.
+ added_since_rebuild_.clear();
+ deleted_since_rebuild_.clear();
+
+ // Clear the hash table.
+ used_items_ = 0;
+ memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
+
+ // Resize it if it is now too empty. Resize may write the new table out for
+ // us, otherwise, schedule writing the new table to disk ourselves.
+ if (!ResizeTableIfNecessary() && persist_to_disk_)
+ WriteFullTable();
+
+ listener_->Reset();
+}
+
+VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() {
+ return delegate_;
+}
+
+void VisitedLinkMaster::DeleteURLs(URLIterator* urls) {
+ if (!urls->HasNextURL())
+ return;
+
+ listener_->Reset();
+
+ if (table_builder_.get()) {
+ // A rebuild is in progress, save this deletion in the temporary list so
+ // it can be added once rebuild is complete.
+ while (urls->HasNextURL()) {
+ const GURL& url(urls->NextURL());
+ if (!url.is_valid())
+ continue;
+
+ Fingerprint fingerprint =
+ ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_);
+ deleted_since_rebuild_.insert(fingerprint);
+
+ // If the URL was just added and now we're deleting it, it may be in the
+ // list of things added since the last rebuild. Delete it from that list.
+ std::set<Fingerprint>::iterator found =
+ added_since_rebuild_.find(fingerprint);
+ if (found != added_since_rebuild_.end())
+ added_since_rebuild_.erase(found);
+
+ // Delete the URLs from the in-memory table, but don't bother writing
+ // to disk since it will be replaced soon.
+ DeleteFingerprint(fingerprint, false);
+ }
+ return;
+ }
+
+ // Compute the deleted URLs' fingerprints and delete them
+ std::set<Fingerprint> deleted_fingerprints;
+ while (urls->HasNextURL()) {
+ const GURL& url(urls->NextURL());
+ if (!url.is_valid())
+ continue;
+ deleted_fingerprints.insert(
+ ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_));
+ }
+ DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
+}
+
+// See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
+VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
+ Fingerprint fingerprint,
+ bool send_notifications) {
+ if (!hash_table_ || table_length_ == 0) {
+ NOTREACHED(); // Not initialized.
+ return null_hash_;
+ }
+
+ Hash cur_hash = HashFingerprint(fingerprint);
+ Hash first_hash = cur_hash;
+ while (true) {
+ Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
+ if (cur_fingerprint == fingerprint)
+ return null_hash_; // This fingerprint is already in there, do nothing.
+
+ if (cur_fingerprint == null_fingerprint_) {
+ // End of probe sequence found, insert here.
+ hash_table_[cur_hash] = fingerprint;
+ used_items_++;
+ // If allowed, notify listener that a new visited link was added.
+ if (send_notifications)
+ listener_->Add(fingerprint);
+ return cur_hash;
+ }
+
+ // Advance in the probe sequence.
+ cur_hash = IncrementHash(cur_hash);
+ if (cur_hash == first_hash) {
+ // This means that we've wrapped around and are about to go into an
+ // infinite loop. Something was wrong with the hashtable resizing
+ // logic, so stop here.
+ NOTREACHED();
+ return null_hash_;
+ }
+ }
+}
+
+void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
+ const std::set<Fingerprint>& fingerprints) {
+ bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
+
+ // Delete the URLs from the table.
+ for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
+ i != fingerprints.end(); ++i)
+ DeleteFingerprint(*i, !bulk_write);
+
+ // These deleted fingerprints may make us shrink the table.
+ if (ResizeTableIfNecessary())
+ return; // The resize function wrote the new table to disk for us.
+
+ // Nobody wrote this out for us, write the full file to disk.
+ if (bulk_write && persist_to_disk_)
+ WriteFullTable();
+}
+
+bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
+ bool update_file) {
+ if (!hash_table_ || table_length_ == 0) {
+ NOTREACHED(); // Not initialized.
+ return false;
+ }
+ if (!IsVisited(fingerprint))
+ return false; // Not in the database to delete.
+
+ // First update the header used count.
+ used_items_--;
+ if (update_file && persist_to_disk_)
+ WriteUsedItemCountToFile();
+
+ Hash deleted_hash = HashFingerprint(fingerprint);
+
+ // Find the range of "stuff" in the hash table that is adjacent to this
+ // fingerprint. These are things that could be affected by the change in
+ // the hash table. Since we use linear probing, anything after the deleted
+ // item up until an empty item could be affected.
+ Hash end_range = deleted_hash;
+ while (true) {
+ Hash next_hash = IncrementHash(end_range);
+ if (next_hash == deleted_hash)
+ break; // We wrapped around and the whole table is full.
+ if (!hash_table_[next_hash])
+ break; // Found the last spot.
+ end_range = next_hash;
+ }
+
+ // We could get all fancy and move the affected fingerprints around, but
+ // instead we just remove them all and re-add them (minus our deleted one).
+ // This will mean there's a small window of time where the affected links
+ // won't be marked visited.
+ base::StackVector<Fingerprint, 32> shuffled_fingerprints;
+ Hash stop_loop = IncrementHash(end_range); // The end range is inclusive.
+ for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
+ if (hash_table_[i] != fingerprint) {
+ // Don't save the one we're deleting!
+ shuffled_fingerprints->push_back(hash_table_[i]);
+
+ // This will balance the increment of this value in AddFingerprint below
+ // so there is no net change.
+ used_items_--;
+ }
+ hash_table_[i] = null_fingerprint_;
+ }
+
+ if (!shuffled_fingerprints->empty()) {
+ // Need to add the new items back.
+ for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
+ AddFingerprint(shuffled_fingerprints[i], false);
+ }
+
+ // Write the affected range to disk [deleted_hash, end_range].
+ if (update_file && persist_to_disk_)
+ WriteHashRangeToFile(deleted_hash, end_range);
+
+ return true;
+}
+
+void VisitedLinkMaster::WriteFullTable() {
+ // This function can get called when the file is open, for example, when we
+ // resize the table. We must handle this case and not try to reopen the file,
+ // since there may be write operations pending on the file I/O thread.
+ //
+ // Note that once we start writing, we do not delete on error. This means
+ // there can be a partial file, but the short file will be detected next time
+ // we start, and will be replaced.
+ //
+ // This might possibly get corrupted if we crash in the middle of writing.
+ // We should pick up the most common types of these failures when we notice
+ // that the file size is different when we load it back in, and then we will
+ // regenerate the table.
+ DCHECK(persist_to_disk_);
+
+ if (!file_) {
+ file_ = static_cast<FILE**>(calloc(1, sizeof(*file_)));
+ base::FilePath filename;
+ GetDatabaseFileName(&filename);
+ PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename));
+ }
+
+ // Write the new header.
+ int32 header[4];
+ header[0] = kFileSignature;
+ header[1] = kFileCurrentVersion;
+ header[2] = table_length_;
+ header[3] = used_items_;
+ WriteToFile(file_, 0, header, sizeof(header));
+ WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
+
+ // Write the hash data.
+ WriteToFile(file_, kFileHeaderSize,
+ hash_table_, table_length_ * sizeof(Fingerprint));
+
+ // The hash table may have shrunk, so make sure this is the end.
+ PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_));
+}
+
+bool VisitedLinkMaster::InitFromFile() {
+ DCHECK(file_ == NULL);
+ DCHECK(persist_to_disk_);
+
+ base::FilePath filename;
+ GetDatabaseFileName(&filename);
+ ScopedFILE file_closer(OpenFile(filename, "rb+"));
+ if (!file_closer.get())
+ return false;
+
+ int32 num_entries, used_count;
+ if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
+ return false; // Header isn't valid.
+
+ // Allocate and read the table.
+ if (!CreateURLTable(num_entries, false))
+ return false;
+ if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
+ hash_table_, num_entries * sizeof(Fingerprint))) {
+ FreeURLTable();
+ return false;
+ }
+ used_items_ = used_count;
+
+#ifndef NDEBUG
+ DebugValidate();
+#endif
+
+ file_ = static_cast<FILE**>(malloc(sizeof(*file_)));
+ *file_ = file_closer.release();
+ return true;
+}
+
+bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
+ int32 table_size = kDefaultTableSize;
+ if (table_size_override_)
+ table_size = table_size_override_;
+
+ // The salt must be generated before the table so that it can be copied to
+ // the shared memory.
+ GenerateSalt(salt_);
+ if (!CreateURLTable(table_size, true))
+ return false;
+
+#ifndef NDEBUG
+ DebugValidate();
+#endif
+
+ if (suppress_rebuild && persist_to_disk_) {
+ // When we disallow rebuilds (normally just unit tests), just use the
+ // current empty table.
+ WriteFullTable();
+ return true;
+ }
+
+ // This will build the table from history. On the first run, history will
+ // be empty, so this will be correct. This will also write the new table
+ // to disk. We don't want to save explicitly here, since the rebuild may
+ // not complete, leaving us with an empty but valid visited link database.
+ // In the future, we won't know we need to try rebuilding again.
+ return RebuildTableFromDelegate();
+}
+
+bool VisitedLinkMaster::ReadFileHeader(FILE* file,
+ int32* num_entries,
+ int32* used_count,
+ uint8 salt[LINK_SALT_LENGTH]) {
+ DCHECK(persist_to_disk_);
+
+ // Get file size.
+ // Note that there is no need to seek back to the original location in the
+ // file since ReadFromFile() [which is the next call accessing the file]
+ // seeks before reading.
+ if (fseek(file, 0, SEEK_END) == -1)
+ return false;
+ size_t file_size = ftell(file);
+
+ if (file_size <= kFileHeaderSize)
+ return false;
+
+ uint8 header[kFileHeaderSize];
+ if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
+ return false;
+
+ // Verify the signature.
+ int32 signature;
+ memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
+ if (signature != kFileSignature)
+ return false;
+
+ // Verify the version is up-to-date. As with other read errors, a version
+ // mistmatch will trigger a rebuild of the database from history, which will
+ // have the effect of migrating the database.
+ int32 version;
+ memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
+ if (version != kFileCurrentVersion)
+ return false; // Bad version.
+
+ // Read the table size and make sure it matches the file size.
+ memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
+ if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
+ return false; // Bad size.
+
+ // Read the used item count.
+ memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
+ if (*used_count > *num_entries)
+ return false; // Bad used item count;
+
+ // Read the salt.
+ memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
+
+ // This file looks OK from the header's perspective.
+ return true;
+}
+
+bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) {
+ if (!database_name_override_.empty()) {
+ // use this filename, the directory must exist
+ *filename = database_name_override_;
+ return true;
+ }
+
+ if (!browser_context_ || browser_context_->GetPath().empty())
+ return false;
+
+ base::FilePath profile_dir = browser_context_->GetPath();
+ *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
+ return true;
+}
+
+// Initializes the shared memory structure. The salt should already be filled
+// in so that it can be written to the shared memory
+bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
+ // The table is the size of the table followed by the entries.
+ uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
+
+ // Create the shared memory object.
+ shared_memory_ = new base::SharedMemory();
+ if (!shared_memory_)
+ return false;
+
+ if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) {
+ delete shared_memory_;
+ shared_memory_ = NULL;
+ return false;
+ }
+
+ if (init_to_empty) {
+ memset(shared_memory_->memory(), 0, alloc_size);
+ used_items_ = 0;
+ }
+ table_length_ = num_entries;
+
+ // Save the header for other processes to read.
+ SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
+ header->length = table_length_;
+ memcpy(header->salt, salt_, LINK_SALT_LENGTH);
+
+ // Our table pointer is just the data immediately following the size.
+ hash_table_ = reinterpret_cast<Fingerprint*>(
+ static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
+
+ return true;
+}
+
+bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
+ base::SharedMemory *old_shared_memory = shared_memory_;
+ Fingerprint* old_hash_table = hash_table_;
+ int32 old_table_length = table_length_;
+ if (!CreateURLTable(num_entries, true)) {
+ // Try to put back the old state.
+ shared_memory_ = old_shared_memory;
+ hash_table_ = old_hash_table;
+ table_length_ = old_table_length;
+ return false;
+ }
+
+#ifndef NDEBUG
+ DebugValidate();
+#endif
+
+ return true;
+}
+
+void VisitedLinkMaster::FreeURLTable() {
+ if (shared_memory_) {
+ delete shared_memory_;
+ shared_memory_ = NULL;
+ }
+ if (!persist_to_disk_ || !file_)
+ return;
+ PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_));
+ // AsyncClose() will close the file and free the memory pointed by |file_|.
+ file_ = NULL;
+}
+
+bool VisitedLinkMaster::ResizeTableIfNecessary() {
+ DCHECK(table_length_ > 0) << "Must have a table";
+
+ // Load limits for good performance/space. We are pretty conservative about
+ // keeping the table not very full. This is because we use linear probing
+ // which increases the likelihood of clumps of entries which will reduce
+ // performance.
+ const float max_table_load = 0.5f; // Grow when we're > this full.
+ const float min_table_load = 0.2f; // Shrink when we're < this full.
+
+ float load = ComputeTableLoad();
+ if (load < max_table_load &&
+ (table_length_ <= static_cast<float>(kDefaultTableSize) ||
+ load > min_table_load))
+ return false;
+
+ // Table needs to grow or shrink.
+ int new_size = NewTableSizeForCount(used_items_);
+ DCHECK(new_size > used_items_);
+ DCHECK(load <= min_table_load || new_size > table_length_);
+ ResizeTable(new_size);
+ return true;
+}
+
+void VisitedLinkMaster::ResizeTable(int32 new_size) {
+ DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
+ shared_memory_serial_++;
+
+#ifndef NDEBUG
+ DebugValidate();
+#endif
+
+ base::SharedMemory* old_shared_memory = shared_memory_;
+ Fingerprint* old_hash_table = hash_table_;
+ int32 old_table_length = table_length_;
+ if (!BeginReplaceURLTable(new_size))
+ return;
+
+ // Now we have two tables, our local copy which is the old one, and the new
+ // one loaded into this object where we need to copy the data.
+ for (int32 i = 0; i < old_table_length; i++) {
+ Fingerprint cur = old_hash_table[i];
+ if (cur)
+ AddFingerprint(cur, false);
+ }
+
+ // On error unmapping, just forget about it since we can't do anything
+ // else to release it.
+ delete old_shared_memory;
+
+ // Send an update notification to all child processes so they read the new
+ // table.
+ listener_->NewTable(shared_memory_);
+
+#ifndef NDEBUG
+ DebugValidate();
+#endif
+
+ // The new table needs to be written to disk.
+ if (persist_to_disk_)
+ WriteFullTable();
+}
+
+uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
+ // These table sizes are selected to be the maximum prime number less than
+ // a "convenient" multiple of 1K.
+ static const int table_sizes[] = {
+ 16381, // 16K = 16384 <- don't shrink below this table size
+ // (should be == default_table_size)
+ 32767, // 32K = 32768
+ 65521, // 64K = 65536
+ 130051, // 128K = 131072
+ 262127, // 256K = 262144
+ 524269, // 512K = 524288
+ 1048549, // 1M = 1048576
+ 2097143, // 2M = 2097152
+ 4194301, // 4M = 4194304
+ 8388571, // 8M = 8388608
+ 16777199, // 16M = 16777216
+ 33554347}; // 32M = 33554432
+
+ // Try to leave the table 33% full.
+ int desired = item_count * 3;
+
+ // Find the closest prime.
+ for (size_t i = 0; i < arraysize(table_sizes); i ++) {
+ if (table_sizes[i] > desired)
+ return table_sizes[i];
+ }
+
+ // Growing very big, just approximate a "good" number, not growing as much
+ // as normal.
+ return item_count * 2 - 1;
+}
+
+// See the TableBuilder definition in the header file for how this works.
+bool VisitedLinkMaster::RebuildTableFromDelegate() {
+ DCHECK(!table_builder_.get());
+
+ // TODO(brettw) make sure we have reasonable salt!
+ table_builder_ = new TableBuilder(this, salt_);
+ delegate_->RebuildTable(table_builder_);
+ return true;
+}
+
+// See the TableBuilder declaration above for how this works.
+void VisitedLinkMaster::OnTableRebuildComplete(
+ bool success,
+ const std::vector<Fingerprint>& fingerprints) {
+ if (success) {
+ // Replace the old table with a new blank one.
+ shared_memory_serial_++;
+
+ // We are responsible for freeing it AFTER it has been replaced if
+ // replacement succeeds.
+ base::SharedMemory* old_shared_memory = shared_memory_;
+
+ int new_table_size = NewTableSizeForCount(
+ static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
+ if (BeginReplaceURLTable(new_table_size)) {
+ // Free the old table.
+ delete old_shared_memory;
+
+ // Add the stored fingerprints to the hash table.
+ for (size_t i = 0; i < fingerprints.size(); i++)
+ AddFingerprint(fingerprints[i], false);
+
+ // Also add anything that was added while we were asynchronously
+ // generating the new table.
+ for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
+ i != added_since_rebuild_.end(); ++i)
+ AddFingerprint(*i, false);
+ added_since_rebuild_.clear();
+
+ // Now handle deletions.
+ DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
+ deleted_since_rebuild_.clear();
+
+ // Send an update notification to all child processes.
+ listener_->NewTable(shared_memory_);
+
+ if (persist_to_disk_)
+ WriteFullTable();
+ }
+ }
+ table_builder_ = NULL; // Will release our reference to the builder.
+
+ // Notify the unit test that the rebuild is complete (will be NULL in prod.)
+ if (!rebuild_complete_task_.is_null()) {
+ rebuild_complete_task_.Run();
+ rebuild_complete_task_.Reset();
+ }
+}
+
+void VisitedLinkMaster::WriteToFile(FILE** file,
+ off_t offset,
+ void* data,
+ int32 data_size) {
+ DCHECK(persist_to_disk_);
+#ifndef NDEBUG
+ posted_asynchronous_operation_ = true;
+#endif
+ PostIOTask(FROM_HERE,
+ base::Bind(&AsyncWrite, file, offset,
+ std::string(static_cast<const char*>(data), data_size)));
+}
+
+void VisitedLinkMaster::WriteUsedItemCountToFile() {
+ DCHECK(persist_to_disk_);
+ if (!file_)
+ return; // See comment on the file_ variable for why this might happen.
+ WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
+}
+
+void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
+ DCHECK(persist_to_disk_);
+
+ if (!file_)
+ return; // See comment on the file_ variable for why this might happen.
+ if (last_hash < first_hash) {
+ // Handle wraparound at 0. This first write is first_hash->EOF
+ WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
+ &hash_table_[first_hash],
+ (table_length_ - first_hash + 1) * sizeof(Fingerprint));
+
+ // Now do 0->last_lash.
+ WriteToFile(file_, kFileHeaderSize, hash_table_,
+ (last_hash + 1) * sizeof(Fingerprint));
+ } else {
+ // Normal case, just write the range.
+ WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
+ &hash_table_[first_hash],
+ (last_hash - first_hash + 1) * sizeof(Fingerprint));
+ }
+}
+
+bool VisitedLinkMaster::ReadFromFile(FILE* file,
+ off_t offset,
+ void* data,
+ size_t data_size) {
+ DCHECK(persist_to_disk_);
+#ifndef NDEBUG
+ // Since this function is synchronous, we require that no asynchronous
+ // operations could possibly be pending.
+ DCHECK(!posted_asynchronous_operation_);
+#endif
+
+ if (fseek(file, offset, SEEK_SET) != 0)
+ return false;
+
+ size_t num_read = fread(data, 1, data_size, file);
+ return num_read == data_size;
+}
+
+// VisitedLinkTableBuilder ----------------------------------------------------
+
+VisitedLinkMaster::TableBuilder::TableBuilder(
+ VisitedLinkMaster* master,
+ const uint8 salt[LINK_SALT_LENGTH])
+ : master_(master),
+ success_(true) {
+ fingerprints_.reserve(4096);
+ memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8));
+}
+
+// TODO(brettw): Do we want to try to cancel the request if this happens? It
+// could delay shutdown if there are a lot of URLs.
+void VisitedLinkMaster::TableBuilder::DisownMaster() {
+ master_ = NULL;
+}
+
+void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
+ if (!url.is_empty()) {
+ fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
+ url.spec().data(), url.spec().length(), salt_));
+ }
+}
+
+void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
+ success_ = success;
+ DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
+
+ // Marshal to the main thread to notify the VisitedLinkMaster that the
+ // rebuild is complete.
+ BrowserThread::PostTask(
+ BrowserThread::UI, FROM_HERE,
+ base::Bind(&TableBuilder::OnCompleteMainThread, this));
+}
+
+void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
+ if (master_)
+ master_->OnTableRebuildComplete(success_, fingerprints_);
+}
+
+} // namespace visitedlink
diff --git a/chromium/components/visitedlink/browser/visitedlink_master.h b/chromium/components/visitedlink/browser/visitedlink_master.h
new file mode 100644
index 00000000000..aff4e498a7c
--- /dev/null
+++ b/chromium/components/visitedlink/browser/visitedlink_master.h
@@ -0,0 +1,445 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
+#define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
+
+#if defined(OS_WIN)
+#include <windows.h>
+#endif
+#include <set>
+#include <vector>
+
+#include "base/callback.h"
+#include "base/callback_forward.h"
+#include "base/files/file_path.h"
+#include "base/gtest_prod_util.h"
+#include "base/memory/shared_memory.h"
+#include "base/threading/sequenced_worker_pool.h"
+#include "components/visitedlink/common/visitedlink_common.h"
+
+#if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
+#include "base/logging.h"
+#endif
+
+class GURL;
+
+namespace content {
+class BrowserContext;
+}
+
+namespace visitedlink {
+
+class VisitedLinkDelegate;
+
+// Controls the link coloring database. The master controls all writing to the
+// database as well as disk I/O. There should be only one master.
+//
+// This class will defer writing operations to the file thread. This means that
+// class destruction, the file may still be open since operations are pending on
+// another thread.
+class VisitedLinkMaster : public VisitedLinkCommon {
+ public:
+ // Listens to the link coloring database events. The master is given this
+ // event as a constructor argument and dispatches events using it.
+ class Listener {
+ public:
+ virtual ~Listener() {}
+
+ // Called when link coloring database has been created or replaced. The
+ // argument is the new table handle.
+ virtual void NewTable(base::SharedMemory*) = 0;
+
+ // Called when new link has been added. The argument is the fingerprint
+ // (hash) of the link.
+ virtual void Add(Fingerprint fingerprint) = 0;
+
+ // Called when link coloring state has been reset. This may occur when
+ // entire or parts of history were deleted.
+ virtual void Reset() = 0;
+ };
+
+ VisitedLinkMaster(content::BrowserContext* browser_context,
+ VisitedLinkDelegate* delegate,
+ bool persist_to_disk);
+
+ // In unit test mode, we allow the caller to optionally specify the database
+ // filename so that it can be run from a unit test. The directory where this
+ // file resides must exist in this mode. You can also specify the default
+ // table size to test table resizing. If this parameter is 0, we will use the
+ // defaults.
+ //
+ // In the unit test mode, we also allow the caller to provide a history
+ // service pointer (the history service can't be fetched from the browser
+ // process when we're in unit test mode). This can be NULL to try to access
+ // the main version, which will probably fail (which can be good for testing
+ // this failure mode).
+ //
+ // When |suppress_rebuild| is set, we'll not attempt to load data from
+ // history if the file can't be loaded. This should generally be set for
+ // testing except when you want to test the rebuild process explicitly.
+ VisitedLinkMaster(Listener* listener,
+ VisitedLinkDelegate* delegate,
+ bool persist_to_disk,
+ bool suppress_rebuild,
+ const base::FilePath& filename,
+ int32 default_table_size);
+ virtual ~VisitedLinkMaster();
+
+ // Must be called immediately after object creation. Nothing else will work
+ // until this is called. Returns true on success, false means that this
+ // object won't work.
+ bool Init();
+
+ base::SharedMemory* shared_memory() { return shared_memory_; }
+
+ // Adds a URL to the table.
+ void AddURL(const GURL& url);
+
+ // Adds a set of URLs to the table.
+ void AddURLs(const std::vector<GURL>& url);
+
+ // See DeleteURLs.
+ class URLIterator {
+ public:
+ // HasNextURL must return true when this is called. Returns the next URL
+ // then advances the iterator. Note that the returned reference is only
+ // valid until the next call of NextURL.
+ virtual const GURL& NextURL() = 0;
+
+ // Returns true if still has URLs to be iterated.
+ virtual bool HasNextURL() const = 0;
+
+ protected:
+ virtual ~URLIterator() {}
+ };
+
+ // Deletes the specified URLs from |rows| from the table.
+ void DeleteURLs(URLIterator* iterator);
+
+ // Clears the visited links table by deleting the file from disk. Used as
+ // part of history clearing.
+ void DeleteAllURLs();
+
+ // Returns the Delegate of this Master.
+ VisitedLinkDelegate* GetDelegate();
+
+#if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST)
+ // This is a debugging function that can be called to double-check internal
+ // data structures. It will assert if the check fails.
+ void DebugValidate();
+
+ // Sets a task to execute when the next rebuild from history is complete.
+ // This is used by unit tests to wait for the rebuild to complete before
+ // they continue. The pointer will be owned by this object after the call.
+ void set_rebuild_complete_task(const base::Closure& task) {
+ DCHECK(rebuild_complete_task_.is_null());
+ rebuild_complete_task_ = task;
+ }
+
+ // returns the number of items in the table for testing verification
+ int32 GetUsedCount() const {
+ return used_items_;
+ }
+
+ // Returns the listener.
+ VisitedLinkMaster::Listener* GetListener() const {
+ return listener_.get();
+ }
+
+ // Call to cause the entire database file to be re-written from scratch
+ // to disk. Used by the performance tester.
+ void RewriteFile() {
+ WriteFullTable();
+ }
+#endif
+
+ private:
+ FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete);
+ FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete);
+ FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport);
+
+ // Object to rebuild the table on the history thread (see the .cc file).
+ class TableBuilder;
+
+ // Byte offsets of values in the header.
+ static const int32 kFileHeaderSignatureOffset;
+ static const int32 kFileHeaderVersionOffset;
+ static const int32 kFileHeaderLengthOffset;
+ static const int32 kFileHeaderUsedOffset;
+ static const int32 kFileHeaderSaltOffset;
+
+ // The signature at the beginning of a file.
+ static const int32 kFileSignature;
+
+ // version of the file format this module currently uses
+ static const int32 kFileCurrentVersion;
+
+ // Bytes in the file header, including the salt.
+ static const size_t kFileHeaderSize;
+
+ // When creating a fresh new table, we use this many entries.
+ static const unsigned kDefaultTableSize;
+
+ // When the user is deleting a boatload of URLs, we don't really want to do
+ // individual writes for each of them. When the count exceeds this threshold,
+ // we will write the whole table to disk at once instead of individual items.
+ static const size_t kBigDeleteThreshold;
+
+ // Backend for the constructors initializing the members.
+ void InitMembers();
+
+ // If a rebuild is in progress, we save the URL in the temporary list.
+ // Otherwise, we add this to the table. Returns the index of the
+ // inserted fingerprint or null_hash_ on failure.
+ Hash TryToAddURL(const GURL& url);
+
+ // File I/O functions
+ // ------------------
+ // These functions are only called if |persist_to_disk_| is true.
+
+ // Posts the given task to the blocking worker pool with our options.
+ void PostIOTask(const tracked_objects::Location& from_here,
+ const base::Closure& task);
+
+ // Writes the entire table to disk. It will leave the table file open and
+ // the handle to it will be stored in file_.
+ void WriteFullTable();
+
+ // Try to load the table from the database file. If the file doesn't exist or
+ // is corrupt, this will return failure.
+ bool InitFromFile();
+
+ // Reads the header of the link coloring database from disk. Assumes the
+ // file pointer is at the beginning of the file and that there are no pending
+ // asynchronous I/O operations.
+ //
+ // Returns true on success and places the size of the table in num_entries
+ // and the number of nonzero fingerprints in used_count. This will fail if
+ // the version of the file is not the current version of the database.
+ bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count,
+ uint8 salt[LINK_SALT_LENGTH]);
+
+ // Fills *filename with the name of the link database filename
+ bool GetDatabaseFileName(base::FilePath* filename);
+
+ // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy
+ // the write to a background thread.
+ void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size);
+
+ // Helper function to schedule and asynchronous write of the used count to
+ // disk (this is a common operation).
+ void WriteUsedItemCountToFile();
+
+ // Helper function to schedule an asynchronous write of the given range of
+ // hash functions to disk. The range is inclusive on both ends. The range can
+ // wrap around at 0 and this function will handle it.
+ void WriteHashRangeToFile(Hash first_hash, Hash last_hash);
+
+ // Synchronous read from the file. Assumes there are no pending asynchronous
+ // I/O functions. Returns true if the entire buffer was successfully filled.
+ bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size);
+
+ // General table handling
+ // ----------------------
+
+ // Called to add a fingerprint to the table. If |send_notifications| is true
+ // and the item is added successfully, Listener::Add will be invoked.
+ // Returns the index of the inserted fingerprint or null_hash_ if there was a
+ // duplicate and this item was skippped.
+ Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications);
+
+ // Deletes all fingerprints from the given vector from the current hash table
+ // and syncs it to disk if there are changes. This does not update the
+ // deleted_since_rebuild_ list, the caller must update this itself if there
+ // is an update pending.
+ void DeleteFingerprintsFromCurrentTable(
+ const std::set<Fingerprint>& fingerprints);
+
+ // Removes the indicated fingerprint from the table. If the update_file flag
+ // is set, the changes will also be written to disk. Returns true if the
+ // fingerprint was deleted, false if it was not in the table to delete.
+ bool DeleteFingerprint(Fingerprint fingerprint, bool update_file);
+
+ // Creates a new empty table, call if InitFromFile() fails. Normally, when
+ // |suppress_rebuild| is false, the table will be rebuilt from history,
+ // keeping us in sync. When |suppress_rebuild| is true, the new table will be
+ // empty and we will not consult history. This is used when clearing the
+ // database and for unit tests.
+ bool InitFromScratch(bool suppress_rebuild);
+
+ // Allocates the Fingerprint structure and length. When init_to_empty is set,
+ // the table will be filled with 0s and used_items_ will be set to 0 as well.
+ // If the flag is not set, these things are untouched and it is the
+ // responsibility of the caller to fill them (like when we are reading from
+ // a file).
+ bool CreateURLTable(int32 num_entries, bool init_to_empty);
+
+ // A wrapper for CreateURLTable, this will allocate a new table, initialized
+ // to empty. The caller is responsible for saving the shared memory pointer
+ // and handles before this call (they will be replaced with new ones) and
+ // releasing them later. This is designed for callers that make a new table
+ // and then copy values from the old table to the new one, then release the
+ // old table.
+ //
+ // Returns true on success. On failure, the old table will be restored. The
+ // caller should not attemp to release the pointer/handle in this case.
+ bool BeginReplaceURLTable(int32 num_entries);
+
+ // unallocates the Fingerprint table
+ void FreeURLTable();
+
+ // For growing the table. ResizeTableIfNecessary will check to see if the
+ // table should be resized and calls ResizeTable if needed. Returns true if
+ // we decided to resize the table.
+ bool ResizeTableIfNecessary();
+
+ // Resizes the table (growing or shrinking) as necessary to accomodate the
+ // current count.
+ void ResizeTable(int32 new_size);
+
+ // Returns the desired table size for |item_count| URLs.
+ uint32 NewTableSizeForCount(int32 item_count) const;
+
+ // Computes the table load as fraction. For example, if 1/4 of the entries are
+ // full, this value will be 0.25
+ float ComputeTableLoad() const {
+ return static_cast<float>(used_items_) / static_cast<float>(table_length_);
+ }
+
+ // Initializes a rebuild of the visited link database based on the browser
+ // history. This will set table_builder_ while working, and there should not
+ // already be a rebuild in place when called. See the definition for more
+ // details on how this works.
+ //
+ // Returns true on success. Failure means we're not attempting to rebuild
+ // the database because something failed.
+ bool RebuildTableFromDelegate();
+
+ // Callback that the table rebuilder uses when the rebuild is complete.
+ // |success| is true if the fingerprint generation succeeded, in which case
+ // |fingerprints| will contain the computed fingerprints. On failure, there
+ // will be no fingerprints.
+ void OnTableRebuildComplete(bool success,
+ const std::vector<Fingerprint>& fingerprints);
+
+ // Increases or decreases the given hash value by one, wrapping around as
+ // necessary. Used for probing.
+ inline Hash IncrementHash(Hash hash) {
+ if (hash >= table_length_ - 1)
+ return 0; // Wrap around.
+ return hash + 1;
+ }
+ inline Hash DecrementHash(Hash hash) {
+ if (hash <= 0)
+ return table_length_ - 1; // Wrap around.
+ return hash - 1;
+ }
+
+#ifndef NDEBUG
+ // Indicates whether any asynchronous operation has ever been completed.
+ // We do some synchronous reads that require that no asynchronous operations
+ // are pending, yet we don't track whether they have been completed. This
+ // flag is a sanity check that these reads only happen before any
+ // asynchronous writes have been fired.
+ bool posted_asynchronous_operation_;
+#endif
+
+ // Reference to the browser context that this object belongs to
+ // (it knows the path to where the data is stored)
+ content::BrowserContext* browser_context_;
+
+ // Client owns the delegate and is responsible for it being valid through
+ // the life time this VisitedLinkMaster.
+ VisitedLinkDelegate* delegate_;
+
+ // VisitedLinkEventListener to handle incoming events.
+ scoped_ptr<Listener> listener_;
+
+ // Lazily initialized sequence token for posting file tasks.
+ base::SequencedWorkerPool::SequenceToken sequence_token_;
+
+ // When non-NULL, indicates we are in database rebuild mode and points to
+ // the class collecting fingerprint information from the history system.
+ // The pointer is owned by this class, but it must remain valid while the
+ // history query is running. We must only delete it when the query is done.
+ scoped_refptr<TableBuilder> table_builder_;
+
+ // Indicates URLs added and deleted since we started rebuilding the table.
+ std::set<Fingerprint> added_since_rebuild_;
+ std::set<Fingerprint> deleted_since_rebuild_;
+
+ // TODO(brettw) Support deletion, we need to track whether anything was
+ // deleted during the rebuild here. Then we should delete any of these
+ // entries from the complete table later.
+ // std::vector<Fingerprint> removed_since_rebuild_;
+
+ // The currently open file with the table in it. This may be NULL if we're
+ // rebuilding and haven't written a new version yet or if |persist_to_disk_|
+ // is false. Writing to the file may be safely ignored in this case. Also
+ // |file_| may be non-NULL but point to a NULL pointer. That would mean that
+ // opening of the file is already scheduled in a background thread and any
+ // writing to the file can also be scheduled to the background thread as it's
+ // guaranteed to be executed after the opening.
+ // The class owns both the |file_| pointer and the pointer pointed
+ // by |*file_|.
+ FILE** file_;
+
+ // If true, will try to persist the hash table to disk. Will rebuild from
+ // VisitedLinkDelegate::RebuildTable if there are disk corruptions.
+ bool persist_to_disk_;
+
+ // Shared memory consists of a SharedHeader followed by the table.
+ base::SharedMemory *shared_memory_;
+
+ // When we generate new tables, we increment the serial number of the
+ // shared memory object.
+ int32 shared_memory_serial_;
+
+ // Number of non-empty items in the table, used to compute fullness.
+ int32 used_items_;
+
+ // Testing values -----------------------------------------------------------
+ //
+ // The following fields exist for testing purposes. They are not used in
+ // release builds. It'd be nice to eliminate them in release builds, but we
+ // don't want to change the signature of the object between the unit test and
+ // regular builds. Instead, we just have "default" values that these take
+ // in release builds that give "regular" behavior.
+
+ // Overridden database file name for testing
+ base::FilePath database_name_override_;
+
+ // When nonzero, overrides the table size for new databases for testing
+ int32 table_size_override_;
+
+ // When set, indicates the task that should be run after the next rebuild from
+ // history is complete.
+ base::Closure rebuild_complete_task_;
+
+ // Set to prevent us from attempting to rebuild the database from global
+ // history if we have an error opening the file. This is used for testing,
+ // will be false in production.
+ bool suppress_rebuild_;
+
+ DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster);
+};
+
+// NOTE: These methods are defined inline here, so we can share the compilation
+// of visitedlink_master.cc between the browser and the unit/perf tests.
+
+#if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
+inline void VisitedLinkMaster::DebugValidate() {
+ int32 used_count = 0;
+ for (int32 i = 0; i < table_length_; i++) {
+ if (hash_table_[i])
+ used_count++;
+ }
+ DCHECK_EQ(used_count, used_items_);
+}
+#endif
+
+} // namespace visitedlink
+
+#endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_