diff options
Diffstat (limited to 'chromium/components/visitedlink/browser')
6 files changed, 1777 insertions, 0 deletions
diff --git a/chromium/components/visitedlink/browser/DEPS b/chromium/components/visitedlink/browser/DEPS new file mode 100644 index 00000000000..1c35d9ca694 --- /dev/null +++ b/chromium/components/visitedlink/browser/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+content/public/browser", +] diff --git a/chromium/components/visitedlink/browser/visitedlink_delegate.h b/chromium/components/visitedlink/browser/visitedlink_delegate.h new file mode 100644 index 00000000000..6adf7de5718 --- /dev/null +++ b/chromium/components/visitedlink/browser/visitedlink_delegate.h @@ -0,0 +1,51 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_DELEGATE_H_ +#define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_DELEGATE_H_ + +#include "base/memory/ref_counted.h" + +class GURL; + +namespace content { +class BrowserContext; +} + +namespace visitedlink { + +// Delegate class that clients of VisitedLinkMaster must implement. +class VisitedLinkDelegate { + public: + // See RebuildTable. + class URLEnumerator : public base::RefCountedThreadSafe<URLEnumerator> { + public: + // Call this with each URL to rebuild the table. + virtual void OnURL(const GURL& url) = 0; + + // This must be called by Delegate after RebuildTable is called. |success| + // indicates all URLs have been returned successfully. The URLEnumerator + // object cannot be used by the delegate after this call. + virtual void OnComplete(bool success) = 0; + + protected: + virtual ~URLEnumerator() {} + + private: + friend class base::RefCountedThreadSafe<URLEnumerator>; + }; + + // Delegate class is responsible for persisting the list of visited URLs + // across browser runs. This is called by VisitedLinkMaster to repopulate + // its internal table. Note that methods on enumerator can be called on any + // thread but the delegate is responsible for synchronizating the calls. + virtual void RebuildTable(const scoped_refptr<URLEnumerator>& enumerator) = 0; + + protected: + virtual ~VisitedLinkDelegate() {} +}; + +} // namespace visitedlink + +#endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_DELEGATE_H_ diff --git a/chromium/components/visitedlink/browser/visitedlink_event_listener.cc b/chromium/components/visitedlink/browser/visitedlink_event_listener.cc new file mode 100644 index 00000000000..a96a0e3aa55 --- /dev/null +++ b/chromium/components/visitedlink/browser/visitedlink_event_listener.cc @@ -0,0 +1,219 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/visitedlink/browser/visitedlink_event_listener.h" + +#include "base/memory/shared_memory.h" +#include "components/visitedlink/browser/visitedlink_delegate.h" +#include "components/visitedlink/common/visitedlink_messages.h" +#include "content/public/browser/notification_service.h" +#include "content/public/browser/notification_types.h" +#include "content/public/browser/render_process_host.h" +#include "content/public/browser/render_widget_host.h" + +using base::Time; +using base::TimeDelta; +using content::RenderWidgetHost; + +namespace { + +// The amount of time we wait to accumulate visited link additions. +const int kCommitIntervalMs = 100; + +// Size of the buffer after which individual link updates deemed not warranted +// and the overall update should be used instead. +const unsigned kVisitedLinkBufferThreshold = 50; + +} // namespace + +namespace visitedlink { + +// This class manages buffering and sending visited link hashes (fingerprints) +// to renderer based on widget visibility. +// As opposed to the VisitedLinkEventListener, which coalesces to +// reduce the rate of messages being sent to render processes, this class +// ensures that the updates occur only when explicitly requested. This is +// used for RenderProcessHostImpl to only send Add/Reset link events to the +// renderers when their tabs are visible and the corresponding RenderViews are +// created. +class VisitedLinkUpdater { + public: + explicit VisitedLinkUpdater(int render_process_id) + : reset_needed_(false), render_process_id_(render_process_id) { + } + + // Informs the renderer about a new visited link table. + void SendVisitedLinkTable(base::SharedMemory* table_memory) { + content::RenderProcessHost* process = + content::RenderProcessHost::FromID(render_process_id_); + if (!process) + return; // Happens in tests + base::SharedMemoryHandle handle_for_process; + table_memory->ShareToProcess(process->GetHandle(), &handle_for_process); + if (base::SharedMemory::IsHandleValid(handle_for_process)) + process->Send(new ChromeViewMsg_VisitedLink_NewTable( + handle_for_process)); + } + + // Buffers |links| to update, but doesn't actually relay them. + void AddLinks(const VisitedLinkCommon::Fingerprints& links) { + if (reset_needed_) + return; + + if (pending_.size() + links.size() > kVisitedLinkBufferThreshold) { + // Once the threshold is reached, there's no need to store pending visited + // link updates -- we opt for resetting the state for all links. + AddReset(); + return; + } + + pending_.insert(pending_.end(), links.begin(), links.end()); + } + + // Tells the updater that sending individual link updates is no longer + // necessary and the visited state for all links should be reset. + void AddReset() { + reset_needed_ = true; + pending_.clear(); + } + + // Sends visited link update messages: a list of links whose visited state + // changed or reset of visited state for all links. + void Update() { + content::RenderProcessHost* process = + content::RenderProcessHost::FromID(render_process_id_); + if (!process) + return; // Happens in tests + + if (!process->VisibleWidgetCount()) + return; + + if (reset_needed_) { + process->Send(new ChromeViewMsg_VisitedLink_Reset()); + reset_needed_ = false; + return; + } + + if (pending_.empty()) + return; + + process->Send(new ChromeViewMsg_VisitedLink_Add(pending_)); + + pending_.clear(); + } + + private: + bool reset_needed_; + int render_process_id_; + VisitedLinkCommon::Fingerprints pending_; +}; + +VisitedLinkEventListener::VisitedLinkEventListener( + VisitedLinkMaster* master, + content::BrowserContext* browser_context) + : master_(master), + browser_context_(browser_context) { + registrar_.Add(this, content::NOTIFICATION_RENDERER_PROCESS_CREATED, + content::NotificationService::AllBrowserContextsAndSources()); + registrar_.Add(this, content::NOTIFICATION_RENDERER_PROCESS_TERMINATED, + content::NotificationService::AllBrowserContextsAndSources()); + registrar_.Add(this, content::NOTIFICATION_RENDER_WIDGET_VISIBILITY_CHANGED, + content::NotificationService::AllBrowserContextsAndSources()); +} + +VisitedLinkEventListener::~VisitedLinkEventListener() { + if (!pending_visited_links_.empty()) + pending_visited_links_.clear(); +} + +void VisitedLinkEventListener::NewTable(base::SharedMemory* table_memory) { + if (!table_memory) + return; + + // Send to all RenderProcessHosts. + for (Updaters::iterator i = updaters_.begin(); i != updaters_.end(); ++i) { + // Make sure to not send to incognito renderers. + content::RenderProcessHost* process = + content::RenderProcessHost::FromID(i->first); + if (!process) + continue; + + i->second->SendVisitedLinkTable(table_memory); + } +} + +void VisitedLinkEventListener::Add(VisitedLinkMaster::Fingerprint fingerprint) { + pending_visited_links_.push_back(fingerprint); + + if (!coalesce_timer_.IsRunning()) { + coalesce_timer_.Start(FROM_HERE, + TimeDelta::FromMilliseconds(kCommitIntervalMs), this, + &VisitedLinkEventListener::CommitVisitedLinks); + } +} + +void VisitedLinkEventListener::Reset() { + pending_visited_links_.clear(); + coalesce_timer_.Stop(); + + for (Updaters::iterator i = updaters_.begin(); i != updaters_.end(); ++i) { + i->second->AddReset(); + i->second->Update(); + } +} + +void VisitedLinkEventListener::CommitVisitedLinks() { + // Send to all RenderProcessHosts. + for (Updaters::iterator i = updaters_.begin(); i != updaters_.end(); ++i) { + i->second->AddLinks(pending_visited_links_); + i->second->Update(); + } + + pending_visited_links_.clear(); +} + +void VisitedLinkEventListener::Observe( + int type, + const content::NotificationSource& source, + const content::NotificationDetails& details) { + switch (type) { + case content::NOTIFICATION_RENDERER_PROCESS_CREATED: { + content::RenderProcessHost* process = + content::Source<content::RenderProcessHost>(source).ptr(); + if (browser_context_ != process->GetBrowserContext()) + return; + + // Happens on browser start up. + if (!master_->shared_memory()) + return; + + updaters_[process->GetID()] = + make_linked_ptr(new VisitedLinkUpdater(process->GetID())); + updaters_[process->GetID()]->SendVisitedLinkTable( + master_->shared_memory()); + break; + } + case content::NOTIFICATION_RENDERER_PROCESS_TERMINATED: { + content::RenderProcessHost* process = + content::Source<content::RenderProcessHost>(source).ptr(); + if (updaters_.count(process->GetID())) { + updaters_.erase(process->GetID()); + } + break; + } + case content::NOTIFICATION_RENDER_WIDGET_VISIBILITY_CHANGED: { + RenderWidgetHost* widget = + content::Source<RenderWidgetHost>(source).ptr(); + int child_id = widget->GetProcess()->GetID(); + if (updaters_.count(child_id)) + updaters_[child_id]->Update(); + break; + } + default: + NOTREACHED(); + break; + } +} + +} // namespace visitedlink diff --git a/chromium/components/visitedlink/browser/visitedlink_event_listener.h b/chromium/components/visitedlink/browser/visitedlink_event_listener.h new file mode 100644 index 00000000000..c2de5d205d0 --- /dev/null +++ b/chromium/components/visitedlink/browser/visitedlink_event_listener.h @@ -0,0 +1,70 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_EVENT_LISTENER_H_ +#define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_EVENT_LISTENER_H_ + +#include <map> + +#include "base/memory/linked_ptr.h" +#include "base/timer/timer.h" +#include "components/visitedlink/browser/visitedlink_master.h" +#include "content/public/browser/notification_observer.h" +#include "content/public/browser/notification_registrar.h" + +namespace base { +class SharedMemory; +} + +namespace content { +class BrowserContext; +} + +namespace visitedlink { + +class VisitedLinkUpdater; + +// VisitedLinkEventListener broadcasts link coloring database updates to all +// processes. It also coalesces the updates to avoid excessive broadcasting of +// messages to the renderers. +class VisitedLinkEventListener : public VisitedLinkMaster::Listener, + public content::NotificationObserver { + public: + VisitedLinkEventListener(VisitedLinkMaster* master, + content::BrowserContext* browser_context); + virtual ~VisitedLinkEventListener(); + + virtual void NewTable(base::SharedMemory* table_memory) OVERRIDE; + virtual void Add(VisitedLinkMaster::Fingerprint fingerprint) OVERRIDE; + virtual void Reset() OVERRIDE; + + private: + void CommitVisitedLinks(); + + // content::NotificationObserver implementation. + virtual void Observe(int type, + const content::NotificationSource& source, + const content::NotificationDetails& details) OVERRIDE; + + base::OneShotTimer<VisitedLinkEventListener> coalesce_timer_; + VisitedLinkCommon::Fingerprints pending_visited_links_; + + content::NotificationRegistrar registrar_; + + // Map between renderer child ids and their VisitedLinkUpdater. + typedef std::map<int, linked_ptr<VisitedLinkUpdater> > Updaters; + Updaters updaters_; + + VisitedLinkMaster* master_; + + // Used to filter RENDERER_PROCESS_CREATED notifications to renderers that + // belong to this BrowserContext. + content::BrowserContext* browser_context_; + + DISALLOW_COPY_AND_ASSIGN(VisitedLinkEventListener); +}; + +} // namespace visitedlink + +#endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_EVENT_LISTENER_H_ diff --git a/chromium/components/visitedlink/browser/visitedlink_master.cc b/chromium/components/visitedlink/browser/visitedlink_master.cc new file mode 100644 index 00000000000..7cf68911af7 --- /dev/null +++ b/chromium/components/visitedlink/browser/visitedlink_master.cc @@ -0,0 +1,989 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/visitedlink/browser/visitedlink_master.h" + +#if defined(OS_WIN) +#include <windows.h> +#include <io.h> +#include <shlobj.h> +#endif // defined(OS_WIN) +#include <stdio.h> + +#include <algorithm> + +#include "base/bind.h" +#include "base/bind_helpers.h" +#include "base/containers/stack_container.h" +#include "base/file_util.h" +#include "base/logging.h" +#include "base/message_loop/message_loop.h" +#include "base/path_service.h" +#include "base/rand_util.h" +#include "base/strings/string_util.h" +#include "base/threading/thread_restrictions.h" +#include "components/visitedlink/browser/visitedlink_delegate.h" +#include "components/visitedlink/browser/visitedlink_event_listener.h" +#include "content/public/browser/browser_context.h" +#include "content/public/browser/browser_thread.h" +#include "url/gurl.h" + +using content::BrowserThread; +using file_util::ScopedFILE; +using file_util::OpenFile; +using file_util::TruncateFile; + +namespace visitedlink { + +const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; +const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; +const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; +const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; +const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; + +const int32 VisitedLinkMaster::kFileCurrentVersion = 3; + +// the signature at the beginning of the URL table = "VLnk" (visited links) +const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; +const size_t VisitedLinkMaster::kFileHeaderSize = + kFileHeaderSaltOffset + LINK_SALT_LENGTH; + +// This value should also be the same as the smallest size in the lookup +// table in NewTableSizeForCount (prime number). +const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; + +const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; + +namespace { + +// Fills the given salt structure with some quasi-random values +// It is not necessary to generate a cryptographically strong random string, +// only that it be reasonably different for different users. +void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { + DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; + uint64 randval = base::RandUint64(); + memcpy(salt, &randval, 8); +} + +// Opens file on a background thread to not block UI thread. +void AsyncOpen(FILE** file, const base::FilePath& filename) { + *file = OpenFile(filename, "wb+"); + DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value(); +} + +// Returns true if the write was complete. +static bool WriteToFile(FILE* file, + off_t offset, + const void* data, + size_t data_len) { + if (fseek(file, offset, SEEK_SET) != 0) + return false; // Don't write to an invalid part of the file. + + size_t num_written = fwrite(data, 1, data_len, file); + + // The write may not make it to the kernel (stdlib may buffer the write) + // until the next fseek/fclose call. If we crash, it's easy for our used + // item count to be out of sync with the number of hashes we write. + // Protect against this by calling fflush. + int ret = fflush(file); + DCHECK_EQ(0, ret); + return num_written == data_len; +} + +// This task executes on a background thread and executes a write. This +// prevents us from blocking the UI thread doing I/O. Double pointer to FILE +// is used because file may still not be opened by the time of scheduling +// the task for execution. +void AsyncWrite(FILE** file, int32 offset, const std::string& data) { + if (*file) + WriteToFile(*file, offset, data.data(), data.size()); +} + +// Truncates the file to the current position asynchronously on a background +// thread. Double pointer to FILE is used because file may still not be opened +// by the time of scheduling the task for execution. +void AsyncTruncate(FILE** file) { + if (*file) + base::IgnoreResult(TruncateFile(*file)); +} + +// Closes the file on a background thread and releases memory used for storage +// of FILE* value. Double pointer to FILE is used because file may still not +// be opened by the time of scheduling the task for execution. +void AsyncClose(FILE** file) { + if (*file) + base::IgnoreResult(fclose(*file)); + free(file); +} + +} // namespace + +// TableBuilder --------------------------------------------------------------- + +// How rebuilding from history works +// --------------------------------- +// +// We mark that we're rebuilding from history by setting the table_builder_ +// member in VisitedLinkMaster to the TableBuilder we create. This builder +// will be called on the history thread by the history system for every URL +// in the database. +// +// The builder will store the fingerprints for those URLs, and then marshalls +// back to the main thread where the VisitedLinkMaster will be notified. The +// master then replaces its table with a new table containing the computed +// fingerprints. +// +// The builder must remain active while the history system is using it. +// Sometimes, the master will be deleted before the rebuild is complete, in +// which case it notifies the builder via DisownMaster(). The builder will +// delete itself once rebuilding is complete, and not execute any callback. +class VisitedLinkMaster::TableBuilder + : public VisitedLinkDelegate::URLEnumerator { + public: + TableBuilder(VisitedLinkMaster* master, + const uint8 salt[LINK_SALT_LENGTH]); + + // Called on the main thread when the master is being destroyed. This will + // prevent a crash when the query completes and the master is no longer + // around. We can not actually do anything but mark this fact, since the + // table will be being rebuilt simultaneously on the other thread. + void DisownMaster(); + + // VisitedLinkDelegate::URLEnumerator + virtual void OnURL(const GURL& url) OVERRIDE; + virtual void OnComplete(bool succeed) OVERRIDE; + + private: + virtual ~TableBuilder() {} + + // OnComplete mashals to this function on the main thread to do the + // notification. + void OnCompleteMainThread(); + + // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! + VisitedLinkMaster* master_; + + // Indicates whether the operation has failed or not. + bool success_; + + // Salt for this new table. + uint8 salt_[LINK_SALT_LENGTH]; + + // Stores the fingerprints we computed on the background thread. + VisitedLinkCommon::Fingerprints fingerprints_; + + DISALLOW_COPY_AND_ASSIGN(TableBuilder); +}; + +// VisitedLinkMaster ---------------------------------------------------------- + +VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context, + VisitedLinkDelegate* delegate, + bool persist_to_disk) + : browser_context_(browser_context), + delegate_(delegate), + listener_(new VisitedLinkEventListener(this, browser_context)), + persist_to_disk_(persist_to_disk) { + InitMembers(); +} + +VisitedLinkMaster::VisitedLinkMaster(Listener* listener, + VisitedLinkDelegate* delegate, + bool persist_to_disk, + bool suppress_rebuild, + const base::FilePath& filename, + int32 default_table_size) + : browser_context_(NULL), + delegate_(delegate), + persist_to_disk_(persist_to_disk) { + listener_.reset(listener); + DCHECK(listener_.get()); + InitMembers(); + + database_name_override_ = filename; + table_size_override_ = default_table_size; + suppress_rebuild_ = suppress_rebuild; +} + +VisitedLinkMaster::~VisitedLinkMaster() { + if (table_builder_.get()) { + // Prevent the table builder from calling us back now that we're being + // destroyed. Note that we DON'T delete the object, since the history + // system is still writing into it. When that is complete, the table + // builder will destroy itself when it finds we are gone. + table_builder_->DisownMaster(); + } + FreeURLTable(); + // FreeURLTable() will schedule closing of the file and deletion of |file_|. + // So nothing should be done here. +} + +void VisitedLinkMaster::InitMembers() { + file_ = NULL; + shared_memory_ = NULL; + shared_memory_serial_ = 0; + used_items_ = 0; + table_size_override_ = 0; + suppress_rebuild_ = false; + sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken(); + +#ifndef NDEBUG + posted_asynchronous_operation_ = false; +#endif +} + +bool VisitedLinkMaster::Init() { + // We probably shouldn't be loading this from the UI thread, + // but it does need to happen early on in startup. + // http://code.google.com/p/chromium/issues/detail?id=24163 + base::ThreadRestrictions::ScopedAllowIO allow_io; + + if (persist_to_disk_) { + if (InitFromFile()) + return true; + } + return InitFromScratch(suppress_rebuild_); +} + +VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { + // Extra check that we are not incognito. This should not happen. + // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is + // removed from BrowserContext. + if (browser_context_ && browser_context_->IsOffTheRecord()) { + NOTREACHED(); + return null_hash_; + } + + if (!url.is_valid()) + return null_hash_; // Don't add invalid URLs. + + Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), + url.spec().size(), + salt_); + if (table_builder_.get()) { + // If we have a pending delete for this fingerprint, cancel it. + std::set<Fingerprint>::iterator found = + deleted_since_rebuild_.find(fingerprint); + if (found != deleted_since_rebuild_.end()) + deleted_since_rebuild_.erase(found); + + // A rebuild is in progress, save this addition in the temporary list so + // it can be added once rebuild is complete. + added_since_rebuild_.insert(fingerprint); + } + + // If the table is "full", we don't add URLs and just drop them on the floor. + // This can happen if we get thousands of new URLs and something causes + // the table resizing to fail. This check prevents a hang in that case. Note + // that this is *not* the resize limit, this is just a sanity check. + if (used_items_ / 8 > table_length_ / 10) + return null_hash_; // Table is more than 80% full. + + return AddFingerprint(fingerprint, true); +} + +void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here, + const base::Closure& task) { + DCHECK(persist_to_disk_); + BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_, + from_here, task); +} + +void VisitedLinkMaster::AddURL(const GURL& url) { + Hash index = TryToAddURL(url); + if (!table_builder_.get() && index != null_hash_) { + // Not rebuilding, so we want to keep the file on disk up-to-date. + if (persist_to_disk_) { + WriteUsedItemCountToFile(); + WriteHashRangeToFile(index, index); + } + ResizeTableIfNecessary(); + } +} + +void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { + for (std::vector<GURL>::const_iterator i = url.begin(); + i != url.end(); ++i) { + Hash index = TryToAddURL(*i); + if (!table_builder_.get() && index != null_hash_) + ResizeTableIfNecessary(); + } + + // Keeps the file on disk up-to-date. + if (!table_builder_.get() && persist_to_disk_) + WriteFullTable(); +} + +void VisitedLinkMaster::DeleteAllURLs() { + // Any pending modifications are invalid. + added_since_rebuild_.clear(); + deleted_since_rebuild_.clear(); + + // Clear the hash table. + used_items_ = 0; + memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); + + // Resize it if it is now too empty. Resize may write the new table out for + // us, otherwise, schedule writing the new table to disk ourselves. + if (!ResizeTableIfNecessary() && persist_to_disk_) + WriteFullTable(); + + listener_->Reset(); +} + +VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() { + return delegate_; +} + +void VisitedLinkMaster::DeleteURLs(URLIterator* urls) { + if (!urls->HasNextURL()) + return; + + listener_->Reset(); + + if (table_builder_.get()) { + // A rebuild is in progress, save this deletion in the temporary list so + // it can be added once rebuild is complete. + while (urls->HasNextURL()) { + const GURL& url(urls->NextURL()); + if (!url.is_valid()) + continue; + + Fingerprint fingerprint = + ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_); + deleted_since_rebuild_.insert(fingerprint); + + // If the URL was just added and now we're deleting it, it may be in the + // list of things added since the last rebuild. Delete it from that list. + std::set<Fingerprint>::iterator found = + added_since_rebuild_.find(fingerprint); + if (found != added_since_rebuild_.end()) + added_since_rebuild_.erase(found); + + // Delete the URLs from the in-memory table, but don't bother writing + // to disk since it will be replaced soon. + DeleteFingerprint(fingerprint, false); + } + return; + } + + // Compute the deleted URLs' fingerprints and delete them + std::set<Fingerprint> deleted_fingerprints; + while (urls->HasNextURL()) { + const GURL& url(urls->NextURL()); + if (!url.is_valid()) + continue; + deleted_fingerprints.insert( + ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_)); + } + DeleteFingerprintsFromCurrentTable(deleted_fingerprints); +} + +// See VisitedLinkCommon::IsVisited which should be in sync with this algorithm +VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( + Fingerprint fingerprint, + bool send_notifications) { + if (!hash_table_ || table_length_ == 0) { + NOTREACHED(); // Not initialized. + return null_hash_; + } + + Hash cur_hash = HashFingerprint(fingerprint); + Hash first_hash = cur_hash; + while (true) { + Fingerprint cur_fingerprint = FingerprintAt(cur_hash); + if (cur_fingerprint == fingerprint) + return null_hash_; // This fingerprint is already in there, do nothing. + + if (cur_fingerprint == null_fingerprint_) { + // End of probe sequence found, insert here. + hash_table_[cur_hash] = fingerprint; + used_items_++; + // If allowed, notify listener that a new visited link was added. + if (send_notifications) + listener_->Add(fingerprint); + return cur_hash; + } + + // Advance in the probe sequence. + cur_hash = IncrementHash(cur_hash); + if (cur_hash == first_hash) { + // This means that we've wrapped around and are about to go into an + // infinite loop. Something was wrong with the hashtable resizing + // logic, so stop here. + NOTREACHED(); + return null_hash_; + } + } +} + +void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( + const std::set<Fingerprint>& fingerprints) { + bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); + + // Delete the URLs from the table. + for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); + i != fingerprints.end(); ++i) + DeleteFingerprint(*i, !bulk_write); + + // These deleted fingerprints may make us shrink the table. + if (ResizeTableIfNecessary()) + return; // The resize function wrote the new table to disk for us. + + // Nobody wrote this out for us, write the full file to disk. + if (bulk_write && persist_to_disk_) + WriteFullTable(); +} + +bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, + bool update_file) { + if (!hash_table_ || table_length_ == 0) { + NOTREACHED(); // Not initialized. + return false; + } + if (!IsVisited(fingerprint)) + return false; // Not in the database to delete. + + // First update the header used count. + used_items_--; + if (update_file && persist_to_disk_) + WriteUsedItemCountToFile(); + + Hash deleted_hash = HashFingerprint(fingerprint); + + // Find the range of "stuff" in the hash table that is adjacent to this + // fingerprint. These are things that could be affected by the change in + // the hash table. Since we use linear probing, anything after the deleted + // item up until an empty item could be affected. + Hash end_range = deleted_hash; + while (true) { + Hash next_hash = IncrementHash(end_range); + if (next_hash == deleted_hash) + break; // We wrapped around and the whole table is full. + if (!hash_table_[next_hash]) + break; // Found the last spot. + end_range = next_hash; + } + + // We could get all fancy and move the affected fingerprints around, but + // instead we just remove them all and re-add them (minus our deleted one). + // This will mean there's a small window of time where the affected links + // won't be marked visited. + base::StackVector<Fingerprint, 32> shuffled_fingerprints; + Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. + for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { + if (hash_table_[i] != fingerprint) { + // Don't save the one we're deleting! + shuffled_fingerprints->push_back(hash_table_[i]); + + // This will balance the increment of this value in AddFingerprint below + // so there is no net change. + used_items_--; + } + hash_table_[i] = null_fingerprint_; + } + + if (!shuffled_fingerprints->empty()) { + // Need to add the new items back. + for (size_t i = 0; i < shuffled_fingerprints->size(); i++) + AddFingerprint(shuffled_fingerprints[i], false); + } + + // Write the affected range to disk [deleted_hash, end_range]. + if (update_file && persist_to_disk_) + WriteHashRangeToFile(deleted_hash, end_range); + + return true; +} + +void VisitedLinkMaster::WriteFullTable() { + // This function can get called when the file is open, for example, when we + // resize the table. We must handle this case and not try to reopen the file, + // since there may be write operations pending on the file I/O thread. + // + // Note that once we start writing, we do not delete on error. This means + // there can be a partial file, but the short file will be detected next time + // we start, and will be replaced. + // + // This might possibly get corrupted if we crash in the middle of writing. + // We should pick up the most common types of these failures when we notice + // that the file size is different when we load it back in, and then we will + // regenerate the table. + DCHECK(persist_to_disk_); + + if (!file_) { + file_ = static_cast<FILE**>(calloc(1, sizeof(*file_))); + base::FilePath filename; + GetDatabaseFileName(&filename); + PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename)); + } + + // Write the new header. + int32 header[4]; + header[0] = kFileSignature; + header[1] = kFileCurrentVersion; + header[2] = table_length_; + header[3] = used_items_; + WriteToFile(file_, 0, header, sizeof(header)); + WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); + + // Write the hash data. + WriteToFile(file_, kFileHeaderSize, + hash_table_, table_length_ * sizeof(Fingerprint)); + + // The hash table may have shrunk, so make sure this is the end. + PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_)); +} + +bool VisitedLinkMaster::InitFromFile() { + DCHECK(file_ == NULL); + DCHECK(persist_to_disk_); + + base::FilePath filename; + GetDatabaseFileName(&filename); + ScopedFILE file_closer(OpenFile(filename, "rb+")); + if (!file_closer.get()) + return false; + + int32 num_entries, used_count; + if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) + return false; // Header isn't valid. + + // Allocate and read the table. + if (!CreateURLTable(num_entries, false)) + return false; + if (!ReadFromFile(file_closer.get(), kFileHeaderSize, + hash_table_, num_entries * sizeof(Fingerprint))) { + FreeURLTable(); + return false; + } + used_items_ = used_count; + +#ifndef NDEBUG + DebugValidate(); +#endif + + file_ = static_cast<FILE**>(malloc(sizeof(*file_))); + *file_ = file_closer.release(); + return true; +} + +bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { + int32 table_size = kDefaultTableSize; + if (table_size_override_) + table_size = table_size_override_; + + // The salt must be generated before the table so that it can be copied to + // the shared memory. + GenerateSalt(salt_); + if (!CreateURLTable(table_size, true)) + return false; + +#ifndef NDEBUG + DebugValidate(); +#endif + + if (suppress_rebuild && persist_to_disk_) { + // When we disallow rebuilds (normally just unit tests), just use the + // current empty table. + WriteFullTable(); + return true; + } + + // This will build the table from history. On the first run, history will + // be empty, so this will be correct. This will also write the new table + // to disk. We don't want to save explicitly here, since the rebuild may + // not complete, leaving us with an empty but valid visited link database. + // In the future, we won't know we need to try rebuilding again. + return RebuildTableFromDelegate(); +} + +bool VisitedLinkMaster::ReadFileHeader(FILE* file, + int32* num_entries, + int32* used_count, + uint8 salt[LINK_SALT_LENGTH]) { + DCHECK(persist_to_disk_); + + // Get file size. + // Note that there is no need to seek back to the original location in the + // file since ReadFromFile() [which is the next call accessing the file] + // seeks before reading. + if (fseek(file, 0, SEEK_END) == -1) + return false; + size_t file_size = ftell(file); + + if (file_size <= kFileHeaderSize) + return false; + + uint8 header[kFileHeaderSize]; + if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) + return false; + + // Verify the signature. + int32 signature; + memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); + if (signature != kFileSignature) + return false; + + // Verify the version is up-to-date. As with other read errors, a version + // mistmatch will trigger a rebuild of the database from history, which will + // have the effect of migrating the database. + int32 version; + memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); + if (version != kFileCurrentVersion) + return false; // Bad version. + + // Read the table size and make sure it matches the file size. + memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); + if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) + return false; // Bad size. + + // Read the used item count. + memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); + if (*used_count > *num_entries) + return false; // Bad used item count; + + // Read the salt. + memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); + + // This file looks OK from the header's perspective. + return true; +} + +bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) { + if (!database_name_override_.empty()) { + // use this filename, the directory must exist + *filename = database_name_override_; + return true; + } + + if (!browser_context_ || browser_context_->GetPath().empty()) + return false; + + base::FilePath profile_dir = browser_context_->GetPath(); + *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); + return true; +} + +// Initializes the shared memory structure. The salt should already be filled +// in so that it can be written to the shared memory +bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { + // The table is the size of the table followed by the entries. + uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); + + // Create the shared memory object. + shared_memory_ = new base::SharedMemory(); + if (!shared_memory_) + return false; + + if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) { + delete shared_memory_; + shared_memory_ = NULL; + return false; + } + + if (init_to_empty) { + memset(shared_memory_->memory(), 0, alloc_size); + used_items_ = 0; + } + table_length_ = num_entries; + + // Save the header for other processes to read. + SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); + header->length = table_length_; + memcpy(header->salt, salt_, LINK_SALT_LENGTH); + + // Our table pointer is just the data immediately following the size. + hash_table_ = reinterpret_cast<Fingerprint*>( + static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); + + return true; +} + +bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { + base::SharedMemory *old_shared_memory = shared_memory_; + Fingerprint* old_hash_table = hash_table_; + int32 old_table_length = table_length_; + if (!CreateURLTable(num_entries, true)) { + // Try to put back the old state. + shared_memory_ = old_shared_memory; + hash_table_ = old_hash_table; + table_length_ = old_table_length; + return false; + } + +#ifndef NDEBUG + DebugValidate(); +#endif + + return true; +} + +void VisitedLinkMaster::FreeURLTable() { + if (shared_memory_) { + delete shared_memory_; + shared_memory_ = NULL; + } + if (!persist_to_disk_ || !file_) + return; + PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_)); + // AsyncClose() will close the file and free the memory pointed by |file_|. + file_ = NULL; +} + +bool VisitedLinkMaster::ResizeTableIfNecessary() { + DCHECK(table_length_ > 0) << "Must have a table"; + + // Load limits for good performance/space. We are pretty conservative about + // keeping the table not very full. This is because we use linear probing + // which increases the likelihood of clumps of entries which will reduce + // performance. + const float max_table_load = 0.5f; // Grow when we're > this full. + const float min_table_load = 0.2f; // Shrink when we're < this full. + + float load = ComputeTableLoad(); + if (load < max_table_load && + (table_length_ <= static_cast<float>(kDefaultTableSize) || + load > min_table_load)) + return false; + + // Table needs to grow or shrink. + int new_size = NewTableSizeForCount(used_items_); + DCHECK(new_size > used_items_); + DCHECK(load <= min_table_load || new_size > table_length_); + ResizeTable(new_size); + return true; +} + +void VisitedLinkMaster::ResizeTable(int32 new_size) { + DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); + shared_memory_serial_++; + +#ifndef NDEBUG + DebugValidate(); +#endif + + base::SharedMemory* old_shared_memory = shared_memory_; + Fingerprint* old_hash_table = hash_table_; + int32 old_table_length = table_length_; + if (!BeginReplaceURLTable(new_size)) + return; + + // Now we have two tables, our local copy which is the old one, and the new + // one loaded into this object where we need to copy the data. + for (int32 i = 0; i < old_table_length; i++) { + Fingerprint cur = old_hash_table[i]; + if (cur) + AddFingerprint(cur, false); + } + + // On error unmapping, just forget about it since we can't do anything + // else to release it. + delete old_shared_memory; + + // Send an update notification to all child processes so they read the new + // table. + listener_->NewTable(shared_memory_); + +#ifndef NDEBUG + DebugValidate(); +#endif + + // The new table needs to be written to disk. + if (persist_to_disk_) + WriteFullTable(); +} + +uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { + // These table sizes are selected to be the maximum prime number less than + // a "convenient" multiple of 1K. + static const int table_sizes[] = { + 16381, // 16K = 16384 <- don't shrink below this table size + // (should be == default_table_size) + 32767, // 32K = 32768 + 65521, // 64K = 65536 + 130051, // 128K = 131072 + 262127, // 256K = 262144 + 524269, // 512K = 524288 + 1048549, // 1M = 1048576 + 2097143, // 2M = 2097152 + 4194301, // 4M = 4194304 + 8388571, // 8M = 8388608 + 16777199, // 16M = 16777216 + 33554347}; // 32M = 33554432 + + // Try to leave the table 33% full. + int desired = item_count * 3; + + // Find the closest prime. + for (size_t i = 0; i < arraysize(table_sizes); i ++) { + if (table_sizes[i] > desired) + return table_sizes[i]; + } + + // Growing very big, just approximate a "good" number, not growing as much + // as normal. + return item_count * 2 - 1; +} + +// See the TableBuilder definition in the header file for how this works. +bool VisitedLinkMaster::RebuildTableFromDelegate() { + DCHECK(!table_builder_.get()); + + // TODO(brettw) make sure we have reasonable salt! + table_builder_ = new TableBuilder(this, salt_); + delegate_->RebuildTable(table_builder_); + return true; +} + +// See the TableBuilder declaration above for how this works. +void VisitedLinkMaster::OnTableRebuildComplete( + bool success, + const std::vector<Fingerprint>& fingerprints) { + if (success) { + // Replace the old table with a new blank one. + shared_memory_serial_++; + + // We are responsible for freeing it AFTER it has been replaced if + // replacement succeeds. + base::SharedMemory* old_shared_memory = shared_memory_; + + int new_table_size = NewTableSizeForCount( + static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); + if (BeginReplaceURLTable(new_table_size)) { + // Free the old table. + delete old_shared_memory; + + // Add the stored fingerprints to the hash table. + for (size_t i = 0; i < fingerprints.size(); i++) + AddFingerprint(fingerprints[i], false); + + // Also add anything that was added while we were asynchronously + // generating the new table. + for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); + i != added_since_rebuild_.end(); ++i) + AddFingerprint(*i, false); + added_since_rebuild_.clear(); + + // Now handle deletions. + DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); + deleted_since_rebuild_.clear(); + + // Send an update notification to all child processes. + listener_->NewTable(shared_memory_); + + if (persist_to_disk_) + WriteFullTable(); + } + } + table_builder_ = NULL; // Will release our reference to the builder. + + // Notify the unit test that the rebuild is complete (will be NULL in prod.) + if (!rebuild_complete_task_.is_null()) { + rebuild_complete_task_.Run(); + rebuild_complete_task_.Reset(); + } +} + +void VisitedLinkMaster::WriteToFile(FILE** file, + off_t offset, + void* data, + int32 data_size) { + DCHECK(persist_to_disk_); +#ifndef NDEBUG + posted_asynchronous_operation_ = true; +#endif + PostIOTask(FROM_HERE, + base::Bind(&AsyncWrite, file, offset, + std::string(static_cast<const char*>(data), data_size))); +} + +void VisitedLinkMaster::WriteUsedItemCountToFile() { + DCHECK(persist_to_disk_); + if (!file_) + return; // See comment on the file_ variable for why this might happen. + WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); +} + +void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { + DCHECK(persist_to_disk_); + + if (!file_) + return; // See comment on the file_ variable for why this might happen. + if (last_hash < first_hash) { + // Handle wraparound at 0. This first write is first_hash->EOF + WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, + &hash_table_[first_hash], + (table_length_ - first_hash + 1) * sizeof(Fingerprint)); + + // Now do 0->last_lash. + WriteToFile(file_, kFileHeaderSize, hash_table_, + (last_hash + 1) * sizeof(Fingerprint)); + } else { + // Normal case, just write the range. + WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, + &hash_table_[first_hash], + (last_hash - first_hash + 1) * sizeof(Fingerprint)); + } +} + +bool VisitedLinkMaster::ReadFromFile(FILE* file, + off_t offset, + void* data, + size_t data_size) { + DCHECK(persist_to_disk_); +#ifndef NDEBUG + // Since this function is synchronous, we require that no asynchronous + // operations could possibly be pending. + DCHECK(!posted_asynchronous_operation_); +#endif + + if (fseek(file, offset, SEEK_SET) != 0) + return false; + + size_t num_read = fread(data, 1, data_size, file); + return num_read == data_size; +} + +// VisitedLinkTableBuilder ---------------------------------------------------- + +VisitedLinkMaster::TableBuilder::TableBuilder( + VisitedLinkMaster* master, + const uint8 salt[LINK_SALT_LENGTH]) + : master_(master), + success_(true) { + fingerprints_.reserve(4096); + memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8)); +} + +// TODO(brettw): Do we want to try to cancel the request if this happens? It +// could delay shutdown if there are a lot of URLs. +void VisitedLinkMaster::TableBuilder::DisownMaster() { + master_ = NULL; +} + +void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { + if (!url.is_empty()) { + fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( + url.spec().data(), url.spec().length(), salt_)); + } +} + +void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { + success_ = success; + DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; + + // Marshal to the main thread to notify the VisitedLinkMaster that the + // rebuild is complete. + BrowserThread::PostTask( + BrowserThread::UI, FROM_HERE, + base::Bind(&TableBuilder::OnCompleteMainThread, this)); +} + +void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { + if (master_) + master_->OnTableRebuildComplete(success_, fingerprints_); +} + +} // namespace visitedlink diff --git a/chromium/components/visitedlink/browser/visitedlink_master.h b/chromium/components/visitedlink/browser/visitedlink_master.h new file mode 100644 index 00000000000..aff4e498a7c --- /dev/null +++ b/chromium/components/visitedlink/browser/visitedlink_master.h @@ -0,0 +1,445 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ +#define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ + +#if defined(OS_WIN) +#include <windows.h> +#endif +#include <set> +#include <vector> + +#include "base/callback.h" +#include "base/callback_forward.h" +#include "base/files/file_path.h" +#include "base/gtest_prod_util.h" +#include "base/memory/shared_memory.h" +#include "base/threading/sequenced_worker_pool.h" +#include "components/visitedlink/common/visitedlink_common.h" + +#if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) +#include "base/logging.h" +#endif + +class GURL; + +namespace content { +class BrowserContext; +} + +namespace visitedlink { + +class VisitedLinkDelegate; + +// Controls the link coloring database. The master controls all writing to the +// database as well as disk I/O. There should be only one master. +// +// This class will defer writing operations to the file thread. This means that +// class destruction, the file may still be open since operations are pending on +// another thread. +class VisitedLinkMaster : public VisitedLinkCommon { + public: + // Listens to the link coloring database events. The master is given this + // event as a constructor argument and dispatches events using it. + class Listener { + public: + virtual ~Listener() {} + + // Called when link coloring database has been created or replaced. The + // argument is the new table handle. + virtual void NewTable(base::SharedMemory*) = 0; + + // Called when new link has been added. The argument is the fingerprint + // (hash) of the link. + virtual void Add(Fingerprint fingerprint) = 0; + + // Called when link coloring state has been reset. This may occur when + // entire or parts of history were deleted. + virtual void Reset() = 0; + }; + + VisitedLinkMaster(content::BrowserContext* browser_context, + VisitedLinkDelegate* delegate, + bool persist_to_disk); + + // In unit test mode, we allow the caller to optionally specify the database + // filename so that it can be run from a unit test. The directory where this + // file resides must exist in this mode. You can also specify the default + // table size to test table resizing. If this parameter is 0, we will use the + // defaults. + // + // In the unit test mode, we also allow the caller to provide a history + // service pointer (the history service can't be fetched from the browser + // process when we're in unit test mode). This can be NULL to try to access + // the main version, which will probably fail (which can be good for testing + // this failure mode). + // + // When |suppress_rebuild| is set, we'll not attempt to load data from + // history if the file can't be loaded. This should generally be set for + // testing except when you want to test the rebuild process explicitly. + VisitedLinkMaster(Listener* listener, + VisitedLinkDelegate* delegate, + bool persist_to_disk, + bool suppress_rebuild, + const base::FilePath& filename, + int32 default_table_size); + virtual ~VisitedLinkMaster(); + + // Must be called immediately after object creation. Nothing else will work + // until this is called. Returns true on success, false means that this + // object won't work. + bool Init(); + + base::SharedMemory* shared_memory() { return shared_memory_; } + + // Adds a URL to the table. + void AddURL(const GURL& url); + + // Adds a set of URLs to the table. + void AddURLs(const std::vector<GURL>& url); + + // See DeleteURLs. + class URLIterator { + public: + // HasNextURL must return true when this is called. Returns the next URL + // then advances the iterator. Note that the returned reference is only + // valid until the next call of NextURL. + virtual const GURL& NextURL() = 0; + + // Returns true if still has URLs to be iterated. + virtual bool HasNextURL() const = 0; + + protected: + virtual ~URLIterator() {} + }; + + // Deletes the specified URLs from |rows| from the table. + void DeleteURLs(URLIterator* iterator); + + // Clears the visited links table by deleting the file from disk. Used as + // part of history clearing. + void DeleteAllURLs(); + + // Returns the Delegate of this Master. + VisitedLinkDelegate* GetDelegate(); + +#if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST) + // This is a debugging function that can be called to double-check internal + // data structures. It will assert if the check fails. + void DebugValidate(); + + // Sets a task to execute when the next rebuild from history is complete. + // This is used by unit tests to wait for the rebuild to complete before + // they continue. The pointer will be owned by this object after the call. + void set_rebuild_complete_task(const base::Closure& task) { + DCHECK(rebuild_complete_task_.is_null()); + rebuild_complete_task_ = task; + } + + // returns the number of items in the table for testing verification + int32 GetUsedCount() const { + return used_items_; + } + + // Returns the listener. + VisitedLinkMaster::Listener* GetListener() const { + return listener_.get(); + } + + // Call to cause the entire database file to be re-written from scratch + // to disk. Used by the performance tester. + void RewriteFile() { + WriteFullTable(); + } +#endif + + private: + FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete); + FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete); + FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport); + + // Object to rebuild the table on the history thread (see the .cc file). + class TableBuilder; + + // Byte offsets of values in the header. + static const int32 kFileHeaderSignatureOffset; + static const int32 kFileHeaderVersionOffset; + static const int32 kFileHeaderLengthOffset; + static const int32 kFileHeaderUsedOffset; + static const int32 kFileHeaderSaltOffset; + + // The signature at the beginning of a file. + static const int32 kFileSignature; + + // version of the file format this module currently uses + static const int32 kFileCurrentVersion; + + // Bytes in the file header, including the salt. + static const size_t kFileHeaderSize; + + // When creating a fresh new table, we use this many entries. + static const unsigned kDefaultTableSize; + + // When the user is deleting a boatload of URLs, we don't really want to do + // individual writes for each of them. When the count exceeds this threshold, + // we will write the whole table to disk at once instead of individual items. + static const size_t kBigDeleteThreshold; + + // Backend for the constructors initializing the members. + void InitMembers(); + + // If a rebuild is in progress, we save the URL in the temporary list. + // Otherwise, we add this to the table. Returns the index of the + // inserted fingerprint or null_hash_ on failure. + Hash TryToAddURL(const GURL& url); + + // File I/O functions + // ------------------ + // These functions are only called if |persist_to_disk_| is true. + + // Posts the given task to the blocking worker pool with our options. + void PostIOTask(const tracked_objects::Location& from_here, + const base::Closure& task); + + // Writes the entire table to disk. It will leave the table file open and + // the handle to it will be stored in file_. + void WriteFullTable(); + + // Try to load the table from the database file. If the file doesn't exist or + // is corrupt, this will return failure. + bool InitFromFile(); + + // Reads the header of the link coloring database from disk. Assumes the + // file pointer is at the beginning of the file and that there are no pending + // asynchronous I/O operations. + // + // Returns true on success and places the size of the table in num_entries + // and the number of nonzero fingerprints in used_count. This will fail if + // the version of the file is not the current version of the database. + bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count, + uint8 salt[LINK_SALT_LENGTH]); + + // Fills *filename with the name of the link database filename + bool GetDatabaseFileName(base::FilePath* filename); + + // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy + // the write to a background thread. + void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size); + + // Helper function to schedule and asynchronous write of the used count to + // disk (this is a common operation). + void WriteUsedItemCountToFile(); + + // Helper function to schedule an asynchronous write of the given range of + // hash functions to disk. The range is inclusive on both ends. The range can + // wrap around at 0 and this function will handle it. + void WriteHashRangeToFile(Hash first_hash, Hash last_hash); + + // Synchronous read from the file. Assumes there are no pending asynchronous + // I/O functions. Returns true if the entire buffer was successfully filled. + bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size); + + // General table handling + // ---------------------- + + // Called to add a fingerprint to the table. If |send_notifications| is true + // and the item is added successfully, Listener::Add will be invoked. + // Returns the index of the inserted fingerprint or null_hash_ if there was a + // duplicate and this item was skippped. + Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications); + + // Deletes all fingerprints from the given vector from the current hash table + // and syncs it to disk if there are changes. This does not update the + // deleted_since_rebuild_ list, the caller must update this itself if there + // is an update pending. + void DeleteFingerprintsFromCurrentTable( + const std::set<Fingerprint>& fingerprints); + + // Removes the indicated fingerprint from the table. If the update_file flag + // is set, the changes will also be written to disk. Returns true if the + // fingerprint was deleted, false if it was not in the table to delete. + bool DeleteFingerprint(Fingerprint fingerprint, bool update_file); + + // Creates a new empty table, call if InitFromFile() fails. Normally, when + // |suppress_rebuild| is false, the table will be rebuilt from history, + // keeping us in sync. When |suppress_rebuild| is true, the new table will be + // empty and we will not consult history. This is used when clearing the + // database and for unit tests. + bool InitFromScratch(bool suppress_rebuild); + + // Allocates the Fingerprint structure and length. When init_to_empty is set, + // the table will be filled with 0s and used_items_ will be set to 0 as well. + // If the flag is not set, these things are untouched and it is the + // responsibility of the caller to fill them (like when we are reading from + // a file). + bool CreateURLTable(int32 num_entries, bool init_to_empty); + + // A wrapper for CreateURLTable, this will allocate a new table, initialized + // to empty. The caller is responsible for saving the shared memory pointer + // and handles before this call (they will be replaced with new ones) and + // releasing them later. This is designed for callers that make a new table + // and then copy values from the old table to the new one, then release the + // old table. + // + // Returns true on success. On failure, the old table will be restored. The + // caller should not attemp to release the pointer/handle in this case. + bool BeginReplaceURLTable(int32 num_entries); + + // unallocates the Fingerprint table + void FreeURLTable(); + + // For growing the table. ResizeTableIfNecessary will check to see if the + // table should be resized and calls ResizeTable if needed. Returns true if + // we decided to resize the table. + bool ResizeTableIfNecessary(); + + // Resizes the table (growing or shrinking) as necessary to accomodate the + // current count. + void ResizeTable(int32 new_size); + + // Returns the desired table size for |item_count| URLs. + uint32 NewTableSizeForCount(int32 item_count) const; + + // Computes the table load as fraction. For example, if 1/4 of the entries are + // full, this value will be 0.25 + float ComputeTableLoad() const { + return static_cast<float>(used_items_) / static_cast<float>(table_length_); + } + + // Initializes a rebuild of the visited link database based on the browser + // history. This will set table_builder_ while working, and there should not + // already be a rebuild in place when called. See the definition for more + // details on how this works. + // + // Returns true on success. Failure means we're not attempting to rebuild + // the database because something failed. + bool RebuildTableFromDelegate(); + + // Callback that the table rebuilder uses when the rebuild is complete. + // |success| is true if the fingerprint generation succeeded, in which case + // |fingerprints| will contain the computed fingerprints. On failure, there + // will be no fingerprints. + void OnTableRebuildComplete(bool success, + const std::vector<Fingerprint>& fingerprints); + + // Increases or decreases the given hash value by one, wrapping around as + // necessary. Used for probing. + inline Hash IncrementHash(Hash hash) { + if (hash >= table_length_ - 1) + return 0; // Wrap around. + return hash + 1; + } + inline Hash DecrementHash(Hash hash) { + if (hash <= 0) + return table_length_ - 1; // Wrap around. + return hash - 1; + } + +#ifndef NDEBUG + // Indicates whether any asynchronous operation has ever been completed. + // We do some synchronous reads that require that no asynchronous operations + // are pending, yet we don't track whether they have been completed. This + // flag is a sanity check that these reads only happen before any + // asynchronous writes have been fired. + bool posted_asynchronous_operation_; +#endif + + // Reference to the browser context that this object belongs to + // (it knows the path to where the data is stored) + content::BrowserContext* browser_context_; + + // Client owns the delegate and is responsible for it being valid through + // the life time this VisitedLinkMaster. + VisitedLinkDelegate* delegate_; + + // VisitedLinkEventListener to handle incoming events. + scoped_ptr<Listener> listener_; + + // Lazily initialized sequence token for posting file tasks. + base::SequencedWorkerPool::SequenceToken sequence_token_; + + // When non-NULL, indicates we are in database rebuild mode and points to + // the class collecting fingerprint information from the history system. + // The pointer is owned by this class, but it must remain valid while the + // history query is running. We must only delete it when the query is done. + scoped_refptr<TableBuilder> table_builder_; + + // Indicates URLs added and deleted since we started rebuilding the table. + std::set<Fingerprint> added_since_rebuild_; + std::set<Fingerprint> deleted_since_rebuild_; + + // TODO(brettw) Support deletion, we need to track whether anything was + // deleted during the rebuild here. Then we should delete any of these + // entries from the complete table later. + // std::vector<Fingerprint> removed_since_rebuild_; + + // The currently open file with the table in it. This may be NULL if we're + // rebuilding and haven't written a new version yet or if |persist_to_disk_| + // is false. Writing to the file may be safely ignored in this case. Also + // |file_| may be non-NULL but point to a NULL pointer. That would mean that + // opening of the file is already scheduled in a background thread and any + // writing to the file can also be scheduled to the background thread as it's + // guaranteed to be executed after the opening. + // The class owns both the |file_| pointer and the pointer pointed + // by |*file_|. + FILE** file_; + + // If true, will try to persist the hash table to disk. Will rebuild from + // VisitedLinkDelegate::RebuildTable if there are disk corruptions. + bool persist_to_disk_; + + // Shared memory consists of a SharedHeader followed by the table. + base::SharedMemory *shared_memory_; + + // When we generate new tables, we increment the serial number of the + // shared memory object. + int32 shared_memory_serial_; + + // Number of non-empty items in the table, used to compute fullness. + int32 used_items_; + + // Testing values ----------------------------------------------------------- + // + // The following fields exist for testing purposes. They are not used in + // release builds. It'd be nice to eliminate them in release builds, but we + // don't want to change the signature of the object between the unit test and + // regular builds. Instead, we just have "default" values that these take + // in release builds that give "regular" behavior. + + // Overridden database file name for testing + base::FilePath database_name_override_; + + // When nonzero, overrides the table size for new databases for testing + int32 table_size_override_; + + // When set, indicates the task that should be run after the next rebuild from + // history is complete. + base::Closure rebuild_complete_task_; + + // Set to prevent us from attempting to rebuild the database from global + // history if we have an error opening the file. This is used for testing, + // will be false in production. + bool suppress_rebuild_; + + DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster); +}; + +// NOTE: These methods are defined inline here, so we can share the compilation +// of visitedlink_master.cc between the browser and the unit/perf tests. + +#if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) +inline void VisitedLinkMaster::DebugValidate() { + int32 used_count = 0; + for (int32 i = 0; i < table_length_; i++) { + if (hash_table_[i]) + used_count++; + } + DCHECK_EQ(used_count, used_items_); +} +#endif + +} // namespace visitedlink + +#endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ |