1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
|
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/navigation_metrics/navigation_metrics.h"
#include <iterator>
#include <string>
#include "base/feature_list.h"
#include "base/i18n/rtl.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "components/dom_distiller/core/url_constants.h"
#include "components/profile_metrics/browser_profile_type.h"
#include "components/url_formatter/url_formatter.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "net/base/url_util.h"
#include "url/gurl.h"
#include "url/url_canon.h"
namespace navigation_metrics {
const char kMainFrameScheme[] = "Navigation.MainFrameScheme2";
const char kMainFrameSchemeDifferentPage[] =
"Navigation.MainFrameSchemeDifferentPage2";
const char kMainFrameSchemeOTR[] = "Navigation.MainFrameSchemeOTR2";
const char kMainFrameSchemeDifferentPageOTR[] =
"Navigation.MainFrameSchemeDifferentPageOTR2";
const char kMainFrameHasRTLDomain[] = "Navigation.MainFrameHasRTLDomain2";
const char kMainFrameHasRTLDomainDifferentPage[] =
"Navigation.MainFrameHasRTLDomainDifferentPage2";
const char kMainFrameProfileType[] = "Navigation.MainFrameProfileType2";
namespace {
// Kill switch for crbug.com/1362507.
BASE_FEATURE(kStopRecordingIDNA2008Metrics,
"StopRecordingIDNA2008Metrics",
base::FEATURE_DISABLED_BY_DEFAULT);
const char* const kSchemeNames[] = {
"unknown",
url::kHttpScheme,
url::kHttpsScheme,
url::kFileScheme,
url::kFtpScheme,
url::kDataScheme,
url::kJavaScriptScheme,
url::kAboutScheme,
"chrome",
url::kBlobScheme,
url::kFileSystemScheme,
"chrome-native",
"chrome-search",
dom_distiller::kDomDistillerScheme,
"devtools",
"chrome-extension",
"view-source",
"externalfile",
"isolated-app",
};
static_assert(std::size(kSchemeNames) == static_cast<int>(Scheme::COUNT),
"kSchemeNames should have Scheme::COUNT elements");
// Returns the eTLD+1 of `hostname16`. Excludes private registries such as
// blogspot.com so that test.blogspot.com returns blogspot.com.
std::u16string GetEtldPlusOne16(const std::u16string& hostname16) {
std::string hostname = base::UTF16ToUTF8(hostname16);
DCHECK(!hostname.empty());
std::string etld_plus_one =
net::registry_controlled_domains::GetDomainAndRegistry(
hostname,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
if (etld_plus_one.empty()) {
etld_plus_one = hostname;
}
if (hostname == etld_plus_one) {
return hostname16;
}
// etld_plus_one is normalized and doesn't contain deviation characters so
// we can't use it for computations. Instead, manually extract the eTLD+1 from
// hostname16 using the same number of domain labels as etld_plus_one.
size_t label_count =
base::ranges::count(etld_plus_one.begin(), etld_plus_one.end(), '.') + 1;
// Replace non-standard separators with "." (U002E). Sometimes users may input
// non-standard separators, causing issues when splitting labels based on ".".
// This follows the Unicode IDNA spec:
// https://www.unicode.org/reports/tr46/#TableDerivationStep1
std::u16string separator_replaced_hostname;
base::ReplaceChars(hostname16, u"\uff0e\u3002\uff61", u".",
&separator_replaced_hostname);
// Keeping empty labels is necessary if there is a trailing dot, to make sure
// `label_count` matches the `labels16` vector. See crbug.com/1362507.
std::vector<std::u16string> labels16 =
base::SplitString(separator_replaced_hostname, u".",
base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
// If the canonicalized eTLD+1 has *more* labels than the full
// noncanonicalized hostname, then there are some unexpected characters in the
// noncanonicalized hostname (such as a user inputting %-encoded separators).
// For simplicity (there are limits on how many edge cases it is worth
// accounting for), just drop these cases and return early.
if (label_count > labels16.size()) {
return std::u16string();
}
size_t extra_label_count = labels16.size() - label_count;
labels16.erase(labels16.begin(), labels16.begin() + extra_label_count);
std::u16string noncanon_etld_plus_one = base::JoinString(labels16, u".");
// If the extracted non-canonicalized eTLD+1 doesn't match the canonicalized
// eTLD+1, then something is odd (e.g., mixed "." and "%2e" separators). Drop
// these cases to avoid emitting potentially incorrect metrics.
url::CanonHostInfo host_info;
if (net::CanonicalizeHost(base::UTF16ToUTF8(noncanon_etld_plus_one),
&host_info) != etld_plus_one) {
return std::u16string();
}
return noncanon_etld_plus_one;
}
} // namespace
Scheme GetScheme(const GURL& url) {
for (int i = static_cast<int>(Scheme::HTTP);
i < static_cast<int>(Scheme::COUNT); ++i) {
if (url.SchemeIs(kSchemeNames[i]))
return static_cast<Scheme>(i);
}
return Scheme::UNKNOWN;
}
void RecordPrimaryMainFrameNavigation(
const GURL& url,
bool is_same_document,
bool is_off_the_record,
profile_metrics::BrowserProfileType profile_type) {
Scheme scheme = GetScheme(url);
UMA_HISTOGRAM_ENUMERATION(kMainFrameScheme, scheme, Scheme::COUNT);
if (!is_same_document) {
UMA_HISTOGRAM_ENUMERATION("Navigation.MainFrameSchemeDifferentPage2",
scheme, Scheme::COUNT);
UMA_HISTOGRAM_BOOLEAN("Navigation.MainFrameHasRTLDomainDifferentPage2",
base::i18n::StringContainsStrongRTLChars(
url_formatter::IDNToUnicode(url.host())));
}
UMA_HISTOGRAM_BOOLEAN("Navigation.MainFrameHasRTLDomain2",
base::i18n::StringContainsStrongRTLChars(
url_formatter::IDNToUnicode(url.host())));
if (is_off_the_record) {
UMA_HISTOGRAM_ENUMERATION("Navigation.MainFrameSchemeOTR2", scheme,
Scheme::COUNT);
if (!is_same_document) {
UMA_HISTOGRAM_ENUMERATION("Navigation.MainFrameSchemeDifferentPageOTR2",
scheme, Scheme::COUNT);
}
}
UMA_HISTOGRAM_ENUMERATION("Navigation.MainFrameProfileType2", profile_type);
}
void RecordOmniboxURLNavigation(const GURL& url) {
UMA_HISTOGRAM_ENUMERATION("Omnibox.URLNavigationScheme", GetScheme(url),
Scheme::COUNT);
}
IDNA2008DeviationCharacter RecordIDNA2008Metrics(
const std::u16string& hostname16) {
if (base::FeatureList::IsEnabled(kStopRecordingIDNA2008Metrics)) {
return IDNA2008DeviationCharacter::kNone;
}
if (hostname16.empty()) {
return IDNA2008DeviationCharacter::kNone;
}
if (net::IsHostnameNonUnique(base::UTF16ToUTF8(hostname16))) {
return IDNA2008DeviationCharacter::kNone;
}
std::u16string etld_plus_one = GetEtldPlusOne16(hostname16);
if (etld_plus_one.empty()) {
return IDNA2008DeviationCharacter::kNone;
}
IDNA2008DeviationCharacter c =
url_formatter::GetDeviationCharacter(etld_plus_one);
UMA_HISTOGRAM_BOOLEAN("Navigation.HostnameHasDeviationCharacters",
c != IDNA2008DeviationCharacter::kNone);
return c;
}
} // namespace navigation_metrics
|