summaryrefslogtreecommitdiff
path: root/chromium/chrome/browser/net/referrer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/chrome/browser/net/referrer.cc')
-rw-r--r--chromium/chrome/browser/net/referrer.cc167
1 files changed, 167 insertions, 0 deletions
diff --git a/chromium/chrome/browser/net/referrer.cc b/chromium/chrome/browser/net/referrer.cc
new file mode 100644
index 00000000000..ea3a6550718
--- /dev/null
+++ b/chromium/chrome/browser/net/referrer.cc
@@ -0,0 +1,167 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/net/referrer.h"
+
+#include <limits.h>
+#include <stddef.h>
+
+#include <memory>
+#include <utility>
+
+#include "base/compiler_specific.h"
+#include "base/logging.h"
+#include "base/values.h"
+
+namespace chrome_browser_net {
+
+//------------------------------------------------------------------------------
+// Smoothing parameter for updating subresource_use_rate_.
+
+// We always combine our old expected value, weighted by some factor W (we use
+// kWeightingForOldConnectsExpectedValue), with the new expected value Enew.
+// The new "expected value" is the number of actual connections made due to the
+// current navigations.
+// That means that IF we end up needing to connect, we should apply the formula:
+// Eupdated = Eold * W + Enew * (1 - W)
+// If we visit the containing url, but don't end up needing a connection, then
+// Enew == 0, so we use the formula:
+// Eupdated = Eold * W
+// To achieve the above updating algorithm, we end up doing the multiplication
+// by W every time we contemplate doing a preconnection (i.e., when we navigate
+// to the containing URL, and consider doing a preconnection), and then IFF we
+// learn that we really needed a connection to the subresource, we complete the
+// above algorithm by adding the (1 - W) for each connection we make.
+
+// We weight the new expected value by a factor which is in the range of 0.0 to
+// 1.0.
+static const double kWeightingForOldConnectsExpectedValue = 0.66;
+
+// To estimate the expected value of the number of connections that we'll need
+// when a referrer is navigated to, we start with the following low initial
+// value.
+// Each time we do indeed (again) need the subresource, this value will get
+// increased.
+// Each time we navigate to the refererrer but never end up needing this
+// subresource, the value will decrease.
+// Very conservative is 0.0, which will mean that we have to wait for a while
+// before doing much speculative acvtivity. We do persist results, so we'll
+// save the asymptotic (correct?) learned answer in the long run.
+// Some browsers blindly make 2 connections all the time, so we'll use that as
+// a starting point.
+static const double kInitialConnectsExpectedValue = 2.0;
+
+Referrer::Referrer() : use_count_(1) {}
+
+void Referrer::SuggestHost(const GURL& url) {
+ // Limit how large our list can get, in case we make mistakes about what
+ // hostnames are in sub-resources (example: Some advertisments have a link to
+ // the ad agency, and then provide a "surprising" redirect to the advertised
+ // entity, which then (mistakenly) appears to be a subresource on the page
+ // hosting the ad).
+ // TODO(jar): Do experiments to optimize the max count of suggestions.
+ static const size_t kMaxSuggestions = 10;
+
+ if (!url.has_host()) // TODO(jar): Is this really needed????
+ return;
+ DCHECK(url == url.GetWithEmptyPath());
+ auto it = find(url);
+ if (it != end()) {
+ it->second.SubresourceIsNeeded();
+ return;
+ }
+
+ if (kMaxSuggestions <= size()) {
+ DeleteLeastUseful();
+ DCHECK(kMaxSuggestions > size());
+ }
+ (*this)[url].SubresourceIsNeeded();
+}
+
+void Referrer::DeleteLeastUseful() {
+ // Find the item with the lowest value. Most important is preconnection_rate,
+ // and least is lifetime (age).
+ GURL least_useful_url;
+ double lowest_rate_seen = 0.0;
+ // We use longs for durations because we will use multiplication on them.
+ int64_t least_useful_lifetime = 0; // Duration in milliseconds.
+
+ const base::Time kNow(base::Time::Now()); // Avoid multiple calls.
+ for (auto it = begin(); it != end(); ++it) {
+ int64_t lifetime = (kNow - it->second.birth_time()).InMilliseconds();
+ double rate = it->second.subresource_use_rate();
+ if (least_useful_url.has_host()) {
+ if (rate > lowest_rate_seen)
+ continue;
+ if (lifetime <= least_useful_lifetime)
+ continue;
+ }
+ least_useful_url = it->first;
+ lowest_rate_seen = rate;
+ least_useful_lifetime = lifetime;
+ }
+ if (least_useful_url.has_host())
+ erase(least_useful_url);
+}
+
+void Referrer::Deserialize(const base::Value& value) {
+ if (value.type() != base::Value::Type::LIST)
+ return;
+ const base::ListValue* subresource_list(
+ static_cast<const base::ListValue*>(&value));
+ size_t index = 0; // Bounds checking is done by subresource_list->Get*().
+ while (true) {
+ std::string url_spec;
+ if (!subresource_list->GetString(index++, &url_spec))
+ return;
+ double rate;
+ if (!subresource_list->GetDouble(index++, &rate))
+ return;
+
+ GURL url(url_spec);
+ // TODO(jar): We could be more direct, and change birth date or similar to
+ // show that this is a resurrected value we're adding in. I'm not yet sure
+ // of how best to optimize the learning and pruning (Trim) algorithm at this
+ // level, so for now, we just suggest subresources, which leaves them all
+ // with the same birth date (typically start of process).
+ SuggestHost(url);
+ (*this)[url].SetSubresourceUseRate(rate);
+ }
+}
+
+std::unique_ptr<base::ListValue> Referrer::Serialize() const {
+ auto subresource_list = std::make_unique<base::ListValue>();
+ for (auto it = begin(); it != end(); ++it) {
+ subresource_list->AppendString(it->first.spec());
+ subresource_list->AppendDouble(it->second.subresource_use_rate());
+ }
+ return subresource_list;
+}
+
+//------------------------------------------------------------------------------
+
+ReferrerValue::ReferrerValue()
+ : birth_time_(base::Time::Now()),
+ navigation_count_(0),
+ preconnection_count_(0),
+ preresolution_count_(0),
+ subresource_use_rate_(kInitialConnectsExpectedValue) {
+}
+
+void ReferrerValue::SubresourceIsNeeded() {
+ DCHECK_GE(kWeightingForOldConnectsExpectedValue, 0);
+ DCHECK_LE(kWeightingForOldConnectsExpectedValue, 1.0);
+ ++navigation_count_;
+ subresource_use_rate_ += 1 - kWeightingForOldConnectsExpectedValue;
+}
+
+void ReferrerValue::ReferrerWasObserved() {
+ subresource_use_rate_ *= kWeightingForOldConnectsExpectedValue;
+ // Note: the use rate is temporarilly possibly incorect, as we need to find
+ // out if we really end up connecting. This will happen in a few hundred
+ // milliseconds (when content arrives, etc.).
+ // Value of subresource_use_rate_ should be sampled before this call.
+}
+
+} // namespace chrome_browser_net