diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-01-20 13:40:20 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-01-22 12:41:23 +0000 |
commit | 7961cea6d1041e3e454dae6a1da660b453efd238 (patch) | |
tree | c0eeb4a9ff9ba32986289c1653d9608e53ccb444 /chromium/components/feedback | |
parent | b7034d0803538058e5c9d904ef03cf5eab34f6ef (diff) | |
download | qtwebengine-chromium-7961cea6d1041e3e454dae6a1da660b453efd238.tar.gz |
BASELINE: Update Chromium to 78.0.3904.130
Change-Id: If185e0c0061b3437531c97c9c8c78f239352a68b
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/components/feedback')
13 files changed, 613 insertions, 303 deletions
diff --git a/chromium/components/feedback/anonymizer_tool.cc b/chromium/components/feedback/anonymizer_tool.cc index 4f77281552d..ed0974bb782 100644 --- a/chromium/components/feedback/anonymizer_tool.cc +++ b/chromium/components/feedback/anonymizer_tool.cc @@ -7,6 +7,7 @@ #include <memory> #include <utility> +#include "base/files/file_path.h" #include "base/strings/strcat.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_util.h" @@ -43,24 +44,31 @@ namespace { // (?i) turns on case insensitivity for the remainder of the regex. // (?-s) turns off "dot matches newline" for the remainder of the regex. // (?:regex) denotes non-capturing parentheses group. -constexpr const char* kCustomPatternsWithContext[] = { +CustomPatternWithAlias kCustomPatternsWithContext[] = { // ModemManager - "(\\bCell ID: ')([0-9a-fA-F]+)(')", - "(\\bLocation area code: ')([0-9a-fA-F]+)(')", + {"CellID", "(\\bCell ID: ')([0-9a-fA-F]+)(')"}, + {"LocAC", "(\\bLocation area code: ')([0-9a-fA-F]+)(')"}, // wpa_supplicant - "(?i-s)(\\bssid[= ]')(.+)(')", - "(?-s)(\\bSSID - hexdump\\(len=[0-9]+\\): )(.+)()", + {"SSID", "(?i-s)(\\bssid[= ]')(.+)(')"}, + {"SSIDHex", "(?-s)(\\bSSID - hexdump\\(len=[0-9]+\\): )(.+)()"}, // shill - "(?-s)(\\[SSID=)(.+?)(\\])", - - // Serial numbers - "(?i-s)(serial\\s*(?:number)?\\s*[:=]\\s*)([0-9a-zA-Z\\-\"]+)()", + {"SSID", "(?-s)(\\[SSID=)(.+?)(\\])"}, + + // Serial numbers. The actual serial number itself can include any alphanum + // char as well as dashes, periods, colons, slashes and unprintable ASCII + // chars (except newline). The second one is for a special case in + // edid-decode, where if we genericized it further then we would catch too + // many other cases that we don't want to anonymize. + {"Serial", + "(?i-s)(\\bserial\\s*_?(?:number)?['\"]?\\s*[:=]\\s*['\"]?)" + "([0-9a-zA-Z\\-.:\\/\\\\\\x00-\\x09\\x0B-\\x1F]+)(\\b)"}, + {"Serial", "( Serial Number )(\\d+)(\\b)"}, // GAIA IDs - R"xxx((\"?\bgaia_id\"?[=:]['\"])(\d+)(\b['\"]))xxx", - R"xxx((\{id: )(\d+)(, email:))xxx", + {"GAIA", R"xxx((\"?\bgaia_id\"?[=:]['\"])(\d+)(\b['\"]))xxx"}, + {"GAIA", R"xxx((\{id: )(\d+)(, email:))xxx"}, }; bool MaybeUnmapAddress(net::IPAddress* addr) { @@ -291,7 +299,7 @@ std::string MaybeScrubIPAddress(const std::string& addr) { // The |kCustomPatternWithoutContext| array defines further patterns to match // and anonymize. Each pattern consists of a single capturing group. -CustomPatternWithoutContext kCustomPatternsWithoutContext[] = { +CustomPatternWithAlias kCustomPatternsWithoutContext[] = { {"URL", "(?i)(" IRI ")"}, // Email Addresses need to come after URLs because they can be part // of a query parameter. @@ -351,14 +359,22 @@ bool FindAndConsumeAndGetSkipped(re2::StringPiece* input, base::size(args)); } +// The following MAC addresses will not be anonymized as they are not specific +// to a device but have general meanings. +const char* const kNonAnonymizedMacAddresses[] = { + "00:00:00:00:00:00", // ARP failure result MAC. + "ff:ff:ff:ff:ff:ff", // Broadcast MAC. +}; +constexpr size_t kNumNonAnonymizedMacs = base::size(kNonAnonymizedMacAddresses); + } // namespace AnonymizerTool::AnonymizerTool(const char* const* first_party_extension_ids) - : first_party_extension_ids_(first_party_extension_ids), - custom_patterns_with_context_(base::size(kCustomPatternsWithContext)), - custom_patterns_without_context_( - base::size(kCustomPatternsWithoutContext)) { + : first_party_extension_ids_(first_party_extension_ids) { DETACH_FROM_SEQUENCE(sequence_checker_); + // Identity-map these, so we don't mangle them. + for (const char* mac : kNonAnonymizedMacAddresses) + mac_addresses_[mac] = mac; } AnonymizerTool::~AnonymizerTool() { @@ -371,7 +387,11 @@ std::string AnonymizerTool::Anonymize(const std::string& input) { << "This is an expensive operation. Do not execute this on the UI " "thread."; std::string anonymized = AnonymizeMACAddresses(input); + anonymized = AnonymizeAndroidAppStoragePaths(std::move(anonymized)); anonymized = AnonymizeCustomPatterns(std::move(anonymized)); + // Do hashes last since they may appear in URLs and they also prevent us from + // properly recognizing the Android storage paths. + anonymized = AnonymizeHashes(std::move(anonymized)); return anonymized; } @@ -391,14 +411,15 @@ RE2* AnonymizerTool::GetRegExp(const std::string& pattern) { std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) { // This regular expression finds the next MAC address. It splits the data into // an OUI (Organizationally Unique Identifier) part and a NIC (Network - // Interface Controller) specific part. + // Interface Controller) specific part. We also match on dash and underscore + // because we have seen instances of both of those occurring. RE2* mac_re = GetRegExp( - "([0-9a-fA-F][0-9a-fA-F]:" - "[0-9a-fA-F][0-9a-fA-F]:" - "[0-9a-fA-F][0-9a-fA-F]):(" - "[0-9a-fA-F][0-9a-fA-F]:" - "[0-9a-fA-F][0-9a-fA-F]:" + "([0-9a-fA-F][0-9a-fA-F][:\\-_]" + "[0-9a-fA-F][0-9a-fA-F][:\\-_]" + "[0-9a-fA-F][0-9a-fA-F])[:\\-_](" + "[0-9a-fA-F][0-9a-fA-F][:\\-_]" + "[0-9a-fA-F][0-9a-fA-F][:\\-_]" "[0-9a-fA-F][0-9a-fA-F])"); std::string result; @@ -406,21 +427,23 @@ std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) { // Keep consuming, building up a result string as we go. re2::StringPiece text(input); - re2::StringPiece skipped; - re2::StringPiece pre_mac, oui, nic; + re2::StringPiece skipped, oui, nic; + static const char kMacSeparatorChars[] = "-_"; while (FindAndConsumeAndGetSkipped(&text, *mac_re, &skipped, &oui, &nic)) { - // Look up the MAC address in the hash. + // Look up the MAC address in the hash. Force the separator to be a colon + // so that the same MAC with a different format will match in all cases. std::string oui_string = base::ToLowerASCII(oui.as_string()); + base::ReplaceChars(oui_string, kMacSeparatorChars, ":", &oui_string); std::string nic_string = base::ToLowerASCII(nic.as_string()); + base::ReplaceChars(nic_string, kMacSeparatorChars, ":", &nic_string); std::string mac = oui_string + ":" + nic_string; std::string replacement_mac = mac_addresses_[mac]; if (replacement_mac.empty()) { // If not found, build up a replacement MAC address by generating a new // NIC part. - int mac_id = mac_addresses_.size(); - replacement_mac = base::StringPrintf( - "%s:%02x:%02x:%02x", oui_string.c_str(), (mac_id & 0x00ff0000) >> 16, - (mac_id & 0x0000ff00) >> 8, (mac_id & 0x000000ff)); + int mac_id = mac_addresses_.size() - kNumNonAnonymizedMacs; + replacement_mac = base::StringPrintf("[MAC OUI=%s IFACE=%d]", + oui_string.c_str(), mac_id); mac_addresses_[mac] = replacement_mac; } @@ -432,26 +455,133 @@ std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) { return result; } +std::string AnonymizerTool::AnonymizeHashes(const std::string& input) { + // This will match hexadecimal strings from length 32 to 64 that have a word + // boundary at each end. We then check to make sure they are one of our valid + // hash lengths before replacing. + // NOTE: There are some occurrences in the dump data (specifically modetest) + // where relevant data is formatted with 32 hex chars on a line. In this case, + // it is preceded by at least 3 whitespace chars, so check for that and in + // that case do not redact. + RE2* hash_re = GetRegExp(R"((\s*)\b([0-9a-fA-F]{4})([0-9a-fA-F]{28,60})\b)"); + + std::string result; + result.reserve(input.size()); + + // Keep consuming, building up a result string as we go. + re2::StringPiece text(input); + re2::StringPiece skipped, pre_whitespace, hash_prefix, hash_suffix; + while (FindAndConsumeAndGetSkipped(&text, *hash_re, &skipped, &pre_whitespace, + &hash_prefix, &hash_suffix)) { + skipped.AppendToString(&result); + pre_whitespace.AppendToString(&result); + + // Check if it's a valid length for our hashes or if we need to skip due to + // the whitespace check. + size_t hash_length = 4 + hash_suffix.length(); + if ((hash_length != 32 && hash_length != 40 && hash_length != 64) || + (hash_length == 32 && pre_whitespace.length() >= 3)) { + // This is not a hash string, skip it. + hash_prefix.AppendToString(&result); + hash_suffix.AppendToString(&result); + continue; + } + + // Look up the hash value address in the map of replacements. + std::string hash_prefix_string = + base::ToLowerASCII(hash_prefix.as_string()); + std::string hash = + hash_prefix_string + base::ToLowerASCII(hash_suffix.as_string()); + std::string replacement_hash = hashes_[hash]; + if (replacement_hash.empty()) { + // If not found, build up a replacement value. + replacement_hash = base::StringPrintf( + "<HASH:%s %zd>", hash_prefix_string.c_str(), hashes_.size()); + hashes_[hash] = replacement_hash; + } + + result += replacement_hash; + } + + text.AppendToString(&result); + return result; +} + +std::string AnonymizerTool::AnonymizeAndroidAppStoragePaths( + const std::string& input) { + // We only use this on Chrome OS and there's differences in the API for + // FilePath on Windows which prevents this from compiling, so only enable this + // code for Chrome OS. +#if defined(OS_CHROMEOS) + std::string result; + result.reserve(input.size()); + + // This is for anonymizing 'android_app_storage' output. When the path starts + // either /home/root/<hash>/data/data/<package_name>/ or + // /home/root/<hash>/data/user_de/<number>/<package_name>/, this function will + // anonymize path components following <package_name>/. + RE2* path_re = GetRegExp( + "(?m)(\\t/home/root/[\\da-f]+/android-data/data/" + "(data|user_de/\\d+)/[^/\\n]+)(" + "/[^\\n]+)"); + + // Keep consuming, building up a result string as we go. + re2::StringPiece text(input); + re2::StringPiece skipped, path_prefix, ignored, app_specific; + while (FindAndConsumeAndGetSkipped(&text, *path_re, &skipped, &path_prefix, + &ignored, &app_specific)) { + // We can record these parts as-is. + skipped.AppendToString(&result); + path_prefix.AppendToString(&result); + + // |app_specific| has to be anonymized. First, convert it into components, + // and then anonymize each component as follows: + // - If the component has a non-ASCII character, change it to '*'. + // - Otherwise, remove all the characters in the component but the first + // one. + // - If the original component has 2 or more bytes, add '_'. + const base::FilePath path(app_specific.as_string()); + std::vector<std::string> components; + path.GetComponents(&components); + DCHECK(!components.empty()); + + auto it = components.begin() + 1; // ignore the leading slash + for (; it != components.end(); ++it) { + const auto& component = *it; + DCHECK(!component.empty()); + result += '/'; + result += (base::IsStringASCII(component) ? component[0] : '*'); + if (component.length() > 1) + result += '_'; + } + } + + text.AppendToString(&result); + return result; +#else + return input; +#endif // defined(OS_CHROMEOS) +} + std::string AnonymizerTool::AnonymizeCustomPatterns(std::string input) { for (size_t i = 0; i < base::size(kCustomPatternsWithContext); i++) { input = - AnonymizeCustomPatternWithContext(input, kCustomPatternsWithContext[i], - &custom_patterns_with_context_[i]); + AnonymizeCustomPatternWithContext(input, kCustomPatternsWithContext[i]); } for (size_t i = 0; i < base::size(kCustomPatternsWithoutContext); i++) { input = AnonymizeCustomPatternWithoutContext( - input, kCustomPatternsWithoutContext[i], - &custom_patterns_without_context_[i]); + input, kCustomPatternsWithoutContext[i]); } return input; } std::string AnonymizerTool::AnonymizeCustomPatternWithContext( const std::string& input, - const std::string& pattern, - std::map<std::string, std::string>* identifier_space) { - RE2* re = GetRegExp(pattern); + const CustomPatternWithAlias& pattern) { + RE2* re = GetRegExp(pattern.pattern); DCHECK_EQ(3, re->NumberOfCapturingGroups()); + std::map<std::string, std::string>* identifier_space = + &custom_patterns_with_context_[pattern.alias]; std::string result; result.reserve(input.size()); @@ -465,7 +595,11 @@ std::string AnonymizerTool::AnonymizeCustomPatternWithContext( std::string matched_id_as_string = matched_id.as_string(); std::string replacement_id = (*identifier_space)[matched_id_as_string]; if (replacement_id.empty()) { - replacement_id = base::NumberToString(identifier_space->size()); + // The weird NumberToString trick is because Windows does not like + // to deal with %zu and a size_t in printf, nor does it support %llu. + replacement_id = base::StringPrintf( + "<%s: %s>", pattern.alias, + base::NumberToString(identifier_space->size()).c_str()); (*identifier_space)[matched_id_as_string] = replacement_id; } @@ -525,11 +659,13 @@ bool IsUrlWhitelisted(re2::StringPiece url, std::string AnonymizerTool::AnonymizeCustomPatternWithoutContext( const std::string& input, - const CustomPatternWithoutContext& pattern, - std::map<std::string, std::string>* identifier_space) { + const CustomPatternWithAlias& pattern) { RE2* re = GetRegExp(pattern.pattern); DCHECK_EQ(1, re->NumberOfCapturingGroups()); + std::map<std::string, std::string>* identifier_space = + &custom_patterns_without_context_[pattern.alias]; + std::string result; result.reserve(input.size()); @@ -548,7 +684,7 @@ std::string AnonymizerTool::AnonymizeCustomPatternWithoutContext( if (replacement_id.empty()) { replacement_id = MaybeScrubIPAddress(matched_id_as_string); if (replacement_id != matched_id_as_string) { - // The weird Uint64toString trick is because Windows does not like + // The weird NumberToString trick is because Windows does not like // to deal with %zu and a size_t in printf, nor does it support %llu. replacement_id = base::StringPrintf( "<%s: %s>", diff --git a/chromium/components/feedback/anonymizer_tool.h b/chromium/components/feedback/anonymizer_tool.h index 9aea4bab5e8..44bfcf46df0 100644 --- a/chromium/components/feedback/anonymizer_tool.h +++ b/chromium/components/feedback/anonymizer_tool.h @@ -21,12 +21,12 @@ class RE2; namespace feedback { -struct CustomPatternWithoutContext { +struct CustomPatternWithAlias { // A string literal used in anonymized tests. Matches to the |pattern| are // replaced with <|alias|: 1>, <|alias|: 2>, ... const char* alias; - // A RE2 regexp with exactly one capture group. Matches will be replaced by - // the alias reference described above. + // A RE2 regexp used in the replacing logic. Matches will be replaced by the + // alias reference described above. const char* pattern; }; @@ -50,32 +50,42 @@ class AnonymizerTool { re2::RE2* GetRegExp(const std::string& pattern); std::string AnonymizeMACAddresses(const std::string& input); + std::string AnonymizeAndroidAppStoragePaths(const std::string& input); + std::string AnonymizeHashes(const std::string& input); std::string AnonymizeCustomPatterns(std::string input); std::string AnonymizeCustomPatternWithContext( const std::string& input, - const std::string& pattern, - std::map<std::string, std::string>* identifier_space); + const CustomPatternWithAlias& pattern); std::string AnonymizeCustomPatternWithoutContext( const std::string& input, - const CustomPatternWithoutContext& pattern, - std::map<std::string, std::string>* identifier_space); + const CustomPatternWithAlias& pattern); // Null-terminated list of first party extension IDs. We need to have this // passed into us because we can't refer to the code where these are defined. const char* const* first_party_extension_ids_; // Not owned. // Map of MAC addresses discovered in anonymized strings to anonymized - // representations. 11:22:33:44:55:66 gets anonymized to 11:22:33:00:00:01, - // where the first three bytes represent the manufacturer. The last three - // bytes are used to distinguish different MAC addresses and are incremented - // for each newly discovered MAC address. + // representations. 11:22:33:44:55:66 gets anonymized to + // [MAC OUI=11:22:33 IFACE=1], where the first three bytes (OUI) represent the + // manufacturer. The IFACE value is incremented for each newly discovered MAC + // address. std::map<std::string, std::string> mac_addresses_; + // Map of hashes discovered in anonymized strings to anonymized + // representations. Hexadecimal strings of length 32, 40 and 64 are considered + // to be hashes. 11223344556677889900aabbccddeeff gets anonymized to + // <HASH:1122 1> where the first 2 bytes of the hash are retained as-is and + // the value after that is incremented for each newly discovered hash. + std::map<std::string, std::string> hashes_; + // Like mac addresses, identifiers in custom patterns are anonymized. - // custom_patterns_with_context_[i] contains a map of original identifier to - // anonymized identifier for custom pattern number i. - std::vector<std::map<std::string, std::string>> custom_patterns_with_context_; - std::vector<std::map<std::string, std::string>> + // custom_patterns_with_context_["alias"] contains a map of original + // identifier to anonymized identifier for custom pattern with the given + // "alias". We key on alias to allow different patterns to use the same + // replacement maps. + std::map<std::string, std::map<std::string, std::string>> + custom_patterns_with_context_; + std::map<std::string, std::map<std::string, std::string>> custom_patterns_without_context_; // Cache to prevent the repeated compilation of the same regular expression diff --git a/chromium/components/feedback/anonymizer_tool_unittest.cc b/chromium/components/feedback/anonymizer_tool_unittest.cc index 7a8faecb918..853338ddfe3 100644 --- a/chromium/components/feedback/anonymizer_tool_unittest.cc +++ b/chromium/components/feedback/anonymizer_tool_unittest.cc @@ -19,23 +19,28 @@ class AnonymizerToolTest : public testing::Test { return anonymizer_.AnonymizeMACAddresses(input); } + std::string AnonymizeHashes(const std::string& input) { + return anonymizer_.AnonymizeHashes(input); + } + + std::string AnonymizeAndroidAppStoragePaths(const std::string& input) { + return anonymizer_.AnonymizeAndroidAppStoragePaths(input); + } + std::string AnonymizeCustomPatterns(const std::string& input) { return anonymizer_.AnonymizeCustomPatterns(input); } std::string AnonymizeCustomPatternWithContext( const std::string& input, - const std::string& pattern, - std::map<std::string, std::string>* space) { - return anonymizer_.AnonymizeCustomPatternWithContext(input, pattern, space); + const CustomPatternWithAlias& pattern) { + return anonymizer_.AnonymizeCustomPatternWithContext(input, pattern); } std::string AnonymizeCustomPatternWithoutContext( const std::string& input, - const CustomPatternWithoutContext& pattern, - std::map<std::string, std::string>* space) { - return anonymizer_.AnonymizeCustomPatternWithoutContext(input, pattern, - space); + const CustomPatternWithAlias& pattern) { + return anonymizer_.AnonymizeCustomPatternWithoutContext(input, pattern); } AnonymizerTool anonymizer_{kFakeFirstPartyExtensionIDs}; @@ -46,10 +51,16 @@ TEST_F(AnonymizerToolTest, Anonymize) { EXPECT_EQ("foo\nbar\n", anonymizer_.Anonymize("foo\nbar\n")); // Make sure MAC address anonymization is invoked. - EXPECT_EQ("02:46:8a:00:00:01", anonymizer_.Anonymize("02:46:8a:ce:13:57")); + EXPECT_EQ("[MAC OUI=02:46:8a IFACE=1]", + anonymizer_.Anonymize("02:46:8a:ce:13:57")); + + // Make sure hash anonymization is invoked. + EXPECT_EQ("<HASH:1122 1>", + anonymizer_.Anonymize("11223344556677889900AABBCCDDEEFF")); // Make sure custom pattern anonymization is invoked. - EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'")); + EXPECT_EQ("Cell ID: '<CellID: 1>'", + AnonymizeCustomPatterns("Cell ID: 'A1B2'")); // Make sure UUIDs are anonymized. EXPECT_EQ( @@ -72,61 +83,129 @@ TEST_F(AnonymizerToolTest, AnonymizeMACAddresses) { EXPECT_EQ("", AnonymizeMACAddresses("")); EXPECT_EQ("foo\nbar\n", AnonymizeMACAddresses("foo\nbar\n")); EXPECT_EQ("11:22:33:44:55", AnonymizeMACAddresses("11:22:33:44:55")); - EXPECT_EQ("aa:bb:cc:00:00:01", AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff")); + EXPECT_EQ("[MAC OUI=aa:bb:cc IFACE=1]", + AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff")); + EXPECT_EQ("[MAC OUI=aa:bb:cc IFACE=1]", + AnonymizeMACAddresses("aa_bb_cc_dd_ee_ff")); + EXPECT_EQ("[MAC OUI=aa:bb:cc IFACE=1]", + AnonymizeMACAddresses("aa-bb-cc-dd-ee-ff")); + EXPECT_EQ("00:00:00:00:00:00", AnonymizeMACAddresses("00:00:00:00:00:00")); + EXPECT_EQ("ff:ff:ff:ff:ff:ff", AnonymizeMACAddresses("ff:ff:ff:ff:ff:ff")); EXPECT_EQ( - "BSSID: aa:bb:cc:00:00:01 in the middle\n" - "bb:cc:dd:00:00:02 start of line\n" - "end of line aa:bb:cc:00:00:01\n" + "BSSID: [MAC OUI=aa:bb:cc IFACE=1] in the middle\n" + "[MAC OUI=bb:cc:dd IFACE=2] start of line\n" + "end of line [MAC OUI=aa:bb:cc IFACE=1]\n" "no match across lines aa:bb:cc:\n" "dd:ee:ff two on the same line:\n" - "x bb:cc:dd:00:00:02 cc:dd:ee:00:00:03 x\n", + "x [MAC OUI=bb:cc:dd IFACE=2] [MAC OUI=cc:dd:ee IFACE=3] x\n", AnonymizeMACAddresses("BSSID: aa:bb:cc:dd:ee:ff in the middle\n" "bb:cc:dd:ee:ff:00 start of line\n" "end of line aa:bb:cc:dd:ee:ff\n" "no match across lines aa:bb:cc:\n" "dd:ee:ff two on the same line:\n" "x bb:cc:dd:ee:ff:00 cc:dd:ee:ff:00:11 x\n")); - EXPECT_EQ("Remember bb:cc:dd:00:00:02?", + EXPECT_EQ("Remember [MAC OUI=bb:cc:dd IFACE=2]?", AnonymizeMACAddresses("Remember bB:Cc:DD:ee:ff:00?")); } +TEST_F(AnonymizerToolTest, AnonymizeHashes) { + EXPECT_EQ("", AnonymizeHashes("")); + EXPECT_EQ("foo\nbar\n", AnonymizeHashes("foo\nbar\n")); + // Too short. + EXPECT_EQ("11223344556677889900aabbccddee", + AnonymizeHashes("11223344556677889900aabbccddee")); + // Not the right length. + EXPECT_EQ("11223344556677889900aabbccddeeff1122", + AnonymizeHashes("11223344556677889900aabbccddeeff1122")); + // Too long. + EXPECT_EQ( + "11223344556677889900aabbccddeeff11223344556677889900aabbccddeeff11", + AnonymizeHashes("11223344556677889900aabbccddeeff11223344556677889900aabb" + "ccddeeff11")); + // Test all 3 valid lengths. + EXPECT_EQ("<HASH:aabb 1>", + AnonymizeHashes("aabbccddeeff00112233445566778899")); + EXPECT_EQ("<HASH:aabb 2>", + AnonymizeHashes("aabbccddeeff00112233445566778899aabbccdd")); + EXPECT_EQ( + "<HASH:9988 3>", + AnonymizeHashes( + "99887766554433221100ffeeddccbbaaaabbccddeeff00112233445566778899")); + // Skip 32 byte hashes that have a at least 3 whitespace chars before it. + EXPECT_EQ(" <HASH:aabb 1>", + AnonymizeHashes(" aabbccddeeff00112233445566778899")); + EXPECT_EQ(" aabbccddeeff00112233445566778899", + AnonymizeHashes(" aabbccddeeff00112233445566778899")); + // Multiline test. + EXPECT_EQ( + "Hash value=<HASH:aabb 1>, should be replaced as\n" + "well as /<HASH:aabb 1>/ and mixed case of\n" + "<HASH:aabb 1> but we don't go across lines\n" + "aabbccddeeff\n00112233445566778899 but allow multiple on a line " + "<HASH:aabb 4>-" + "<HASH:0011 5>\n", + AnonymizeHashes( + "Hash value=aabbccddeeff00112233445566778899, should be replaced as\n" + "well as /aabbccddeeff00112233445566778899/ and mixed case of\n" + "AaBbCCddEeFf00112233445566778899 but we don't go across lines\n" + "aabbccddeeff\n00112233445566778899 but allow multiple on a line " + "aabbccddeeffaabbccddeeffaabbccddeeffaabb-" + "00112233445566778899aabbccddeeff\n")); +} + TEST_F(AnonymizerToolTest, AnonymizeCustomPatterns) { EXPECT_EQ("", AnonymizeCustomPatterns("")); - EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'")); - EXPECT_EQ("Cell ID: '2'", AnonymizeCustomPatterns("Cell ID: 'C1D2'")); - EXPECT_EQ("foo Cell ID: '1' bar", + EXPECT_EQ("Cell ID: '<CellID: 1>'", + AnonymizeCustomPatterns("Cell ID: 'A1B2'")); + EXPECT_EQ("Cell ID: '<CellID: 2>'", + AnonymizeCustomPatterns("Cell ID: 'C1D2'")); + EXPECT_EQ("foo Cell ID: '<CellID: 1>' bar", AnonymizeCustomPatterns("foo Cell ID: 'A1B2' bar")); - EXPECT_EQ("foo Location area code: '1' bar", + EXPECT_EQ("foo Location area code: '<LocAC: 1>' bar", AnonymizeCustomPatterns("foo Location area code: 'A1B2' bar")); - EXPECT_EQ("foo\na SSID='1' b\n'", + EXPECT_EQ("foo\na SSID='<SSID: 1>' b\n'", AnonymizeCustomPatterns("foo\na SSID='Joe's' b\n'")); - EXPECT_EQ("ssid '2'", AnonymizeCustomPatterns("ssid 'My AP'")); + EXPECT_EQ("ssid '<SSID: 2>'", AnonymizeCustomPatterns("ssid 'My AP'")); EXPECT_EQ("bssid 'aa:bb'", AnonymizeCustomPatterns("bssid 'aa:bb'")); - EXPECT_EQ("Scan SSID - hexdump(len=6): 1\nfoo", + EXPECT_EQ("Scan SSID - hexdump(len=6): <SSIDHex: 1>\nfoo", AnonymizeCustomPatterns( "Scan SSID - hexdump(len=6): 47 6f 6f 67 6c 65\nfoo")); - EXPECT_EQ( - "a\nb [SSID=1] [SSID=2] [SSID=foo\nbar] b", - AnonymizeCustomPatterns("a\nb [SSID=foo] [SSID=bar] [SSID=foo\nbar] b")); + EXPECT_EQ("a\nb [SSID=<SSID: 3>] [SSID=<SSID: 1>] [SSID=foo\nbar] b", + AnonymizeCustomPatterns( + "a\nb [SSID=foo] [SSID=Joe's] [SSID=foo\nbar] b")); - EXPECT_EQ("SerialNumber: 1", + EXPECT_EQ("SerialNumber: <Serial: 1>", AnonymizeCustomPatterns("SerialNumber: 1217D7EF")); - EXPECT_EQ("serial number: 2", + EXPECT_EQ("serial number: <Serial: 2>", AnonymizeCustomPatterns("serial number: 50C971FEE7F3x010900")); - EXPECT_EQ("SerialNumber: 3", + EXPECT_EQ("SerialNumber: <Serial: 3>", AnonymizeCustomPatterns("SerialNumber: EVT23-17BA01-004")); - EXPECT_EQ("serial=4", AnonymizeCustomPatterns("serial=\"1234AA5678\"")); + EXPECT_EQ("serial=\"<Serial: 4>\"", + AnonymizeCustomPatterns("serial=\"1234AA5678\"")); + EXPECT_EQ("\"serial_number\"=\"<Serial: 1>\"", + AnonymizeCustomPatterns("\"serial_number\"=\"1217D7EF\"")); + EXPECT_EQ("SerialNumber: <Serial: 5>", + AnonymizeCustomPatterns("SerialNumber: 5:00:14.0")); + EXPECT_EQ("Serial: <Serial: 6>", + AnonymizeCustomPatterns("Serial: ABCEFG\x01kjmn-as:342/234\\432")); + // Don't overly anonymize serial numbers, we only do this for a specific + // formatting case for edid-decode. + EXPECT_EQ("Foo serial number 123", + AnonymizeCustomPatterns("Foo serial number 123")); + EXPECT_EQ("Foo Serial Number <Serial: 7>", + AnonymizeCustomPatterns("Foo Serial Number 123")); - EXPECT_EQ("\"gaia_id\":\"1\"", + EXPECT_EQ("\"gaia_id\":\"<GAIA: 1>\"", AnonymizeCustomPatterns("\"gaia_id\":\"1234567890\"")); - EXPECT_EQ("gaia_id='2'", AnonymizeCustomPatterns("gaia_id='987654321'")); - EXPECT_EQ("{id: 1, email:", - AnonymizeCustomPatterns("{id: 123454321, email:")); + EXPECT_EQ("gaia_id='<GAIA: 2>'", + AnonymizeCustomPatterns("gaia_id='987654321'")); + EXPECT_EQ("{id: <GAIA: 1>, email:", + AnonymizeCustomPatterns("{id: 1234567890, email:")); EXPECT_EQ("<email: 1>", AnonymizeCustomPatterns("foo@bar.com")); @@ -145,8 +224,9 @@ TEST_F(AnonymizerToolTest, AnonymizeCustomPatterns) { EXPECT_EQ("<URL: 1>", AnonymizeCustomPatterns("http://example.com/foo?test=1")); - EXPECT_EQ("Foo <URL: 2> Bar", - AnonymizeCustomPatterns("Foo http://192.168.0.1/foo?test=1#123 Bar")); + EXPECT_EQ( + "Foo <URL: 2> Bar", + AnonymizeCustomPatterns("Foo http://192.168.0.1/foo?test=1#123 Bar")); const char* kURLs[] = { "http://example.com/foo?test=1", "http://userid:password@example.com:8080", @@ -177,38 +257,38 @@ TEST_F(AnonymizerToolTest, AnonymizeCustomPatterns) { } TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithContext) { - const char kPattern[] = "(\\b(?i)id:? ')(\\d+)(')"; - std::map<std::string, std::string> space; - EXPECT_EQ("", AnonymizeCustomPatternWithContext("", kPattern, &space)); + const CustomPatternWithAlias kPattern1 = {"ID", "(\\b(?i)id:? ')(\\d+)(')"}; + const CustomPatternWithAlias kPattern2 = {"ID", "(\\b(?i)id=')(\\d+)(')"}; + const CustomPatternWithAlias kPattern3 = {"IDG", "(\\b(?i)idg=')(\\d+)(')"}; + EXPECT_EQ("", AnonymizeCustomPatternWithContext("", kPattern1)); EXPECT_EQ("foo\nbar\n", - AnonymizeCustomPatternWithContext("foo\nbar\n", kPattern, &space)); - EXPECT_EQ("id '1'", - AnonymizeCustomPatternWithContext("id '2345'", kPattern, &space)); - EXPECT_EQ("id '2'", - AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space)); - EXPECT_EQ("id: '2'", - AnonymizeCustomPatternWithContext("id: '1234'", kPattern, &space)); - EXPECT_EQ("ID: '1'", - AnonymizeCustomPatternWithContext("ID: '2345'", kPattern, &space)); - EXPECT_EQ("x1 id '1' 1x id '2'\nid '1'\n", + AnonymizeCustomPatternWithContext("foo\nbar\n", kPattern1)); + EXPECT_EQ("id '<ID: 1>'", + AnonymizeCustomPatternWithContext("id '2345'", kPattern1)); + EXPECT_EQ("id '<ID: 2>'", + AnonymizeCustomPatternWithContext("id '1234'", kPattern1)); + EXPECT_EQ("id: '<ID: 2>'", + AnonymizeCustomPatternWithContext("id: '1234'", kPattern1)); + EXPECT_EQ("ID: '<ID: 1>'", + AnonymizeCustomPatternWithContext("ID: '2345'", kPattern1)); + EXPECT_EQ("x1 id '<ID: 1>' 1x id '<ID: 2>'\nid '<ID: 1>'\n", AnonymizeCustomPatternWithContext( - "x1 id '2345' 1x id '1234'\nid '2345'\n", kPattern, &space)); - space.clear(); - EXPECT_EQ("id '1'", - AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space)); - - space.clear(); - EXPECT_EQ("x1z", - AnonymizeCustomPatternWithContext("xyz", "()(y+)()", &space)); + "x1 id '2345' 1x id '1234'\nid '2345'\n", kPattern1)); + // Different pattern with same alias should reuse the replacements. + EXPECT_EQ("id='<ID: 2>'", + AnonymizeCustomPatternWithContext("id='1234'", kPattern2)); + // Different alias should not reuse replacement from another pattern. + EXPECT_EQ("idg='<IDG: 1>'", + AnonymizeCustomPatternWithContext("idg='1234'", kPattern3)); + EXPECT_EQ("x<FOO: 1>z", + AnonymizeCustomPatternWithContext("xyz", {"FOO", "()(y+)()"})); } TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithoutContext) { - CustomPatternWithoutContext kPattern = {"pattern", "(o+)"}; - std::map<std::string, std::string> space; - EXPECT_EQ("", AnonymizeCustomPatternWithoutContext("", kPattern, &space)); + CustomPatternWithAlias kPattern = {"pattern", "(o+)"}; + EXPECT_EQ("", AnonymizeCustomPatternWithoutContext("", kPattern)); EXPECT_EQ("f<pattern: 1>\nf<pattern: 2>z\nf<pattern: 1>l\n", - AnonymizeCustomPatternWithoutContext("fo\nfooz\nfol\n", kPattern, - &space)); + AnonymizeCustomPatternWithoutContext("fo\nfooz\nfol\n", kPattern)); } TEST_F(AnonymizerToolTest, AnonymizeChunk) { @@ -216,149 +296,159 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) { // of pairs, and then convert that to two strings which become the input and // output of the anonymizer. std::pair<std::string, std::string> data[] = { - {"aaaaaaaa [SSID=123aaaaaa]aaaaa", // SSID. - "aaaaaaaa [SSID=1]aaaaa"}, - {"aaaaaaaahttp://tets.comaaaaaaa", // URL. - "aaaaaaaa<URL: 1>"}, - {"aaaaaemail@example.comaaa", // Email address. - "<email: 1>"}, - {"example@@1234", // No PII, it is not invalid email address. - "example@@1234"}, - {"255.255.155.2", // IP address. - "<IPv4: 1>"}, - {"255.255.155.255", // IP address. - "<IPv4: 2>"}, - {"127.0.0.1", // IPv4 loopback. - "<127.0.0.0/8: 3>"}, - {"127.255.0.1", // IPv4 loopback. - "<127.0.0.0/8: 4>"}, - {"0.0.0.0", // Any IPv4. - "<0.0.0.0/8: 5>"}, - {"0.255.255.255", // Any IPv4. - "<0.0.0.0/8: 6>"}, - {"10.10.10.100", // IPv4 private class A. - "<10.0.0.0/8: 7>"}, - {"10.10.10.100", // Intentional duplicate. - "<10.0.0.0/8: 7>"}, - {"10.10.10.101", // IPv4 private class A. - "<10.0.0.0/8: 8>"}, - {"10.255.255.255", // IPv4 private class A. - "<10.0.0.0/8: 9>"}, - {"172.16.0.0", // IPv4 private class B. - "<172.16.0.0/12: 10>"}, - {"172.31.255.255", // IPv4 private class B. - "<172.16.0.0/12: 11>"}, - {"172.11.5.5", // IP address. - "<IPv4: 12>"}, - {"172.111.5.5", // IP address. - "<IPv4: 13>"}, - {"192.168.0.0", // IPv4 private class C. - "<192.168.0.0/16: 14>"}, - {"192.168.255.255", // IPv4 private class C. - "<192.168.0.0/16: 15>"}, - {"192.169.2.120", // IP address. - "<IPv4: 16>"}, - {"169.254.0.1", // Link local. - "<169.254.0.0/16: 17>"}, - {"169.200.0.1", // IP address. - "<IPv4: 18>"}, - {"fe80::", // Link local. - "<fe80::/10: 1>"}, - {"fe80::ffff", // Link local. - "<fe80::/10: 2>"}, - {"febf:ffff::ffff", // Link local. - "<fe80::/10: 3>"}, - {"fecc::1111", // IP address. - "<IPv6: 4>"}, - {"224.0.0.24", // Multicast. - "<224.0.0.0/4: 19>"}, - {"240.0.0.0", // IP address. - "<IPv4: 20>"}, - {"255.255.255.255", // Broadcast. - "255.255.255.255"}, - {"100.115.92.92", // ChromeOS. - "100.115.92.92"}, - {"100.115.91.92", // IP address. - "<IPv4: 23>"}, - {"1.1.1.1", // DNS - "1.1.1.1"}, - {"8.8.8.8", // DNS - "8.8.8.8"}, - {"8.8.4.4", // DNS - "8.8.4.4"}, - {"8.8.8.4", // IP address. - "<IPv4: 27>"}, - {"255.255.259.255", // Not an IP address. - "255.255.259.255"}, - {"255.300.255.255", // Not an IP address. - "255.300.255.255"}, - {"aaaa123.123.45.4aaa", // IP address. - "aaaa<IPv4: 28>aaa"}, - {"11:11;11::11", // IP address. - "11:11;<IPv6: 5>"}, - {"11::11", // IP address. - "<IPv6: 5>"}, - {"11:11:abcdef:0:0:0:0:0", // No PII. - "11:11:abcdef:0:0:0:0:0"}, - {"::", // Unspecified. - "::"}, - {"::1", // Local host. - "::1"}, - {"Instance::Set", // Ignore match, no PII. - "Instance::Set"}, - {"Instant::ff", // Ignore match, no PII. - "Instant::ff"}, - {"net::ERR_CONN_TIMEOUT", // Ignore match, no PII. - "net::ERR_CONN_TIMEOUT"}, - {"ff01::1", // All nodes address (interface local). - "ff01::1"}, - {"ff01::2", // All routers (interface local). - "ff01::2"}, - {"ff01::3", // Multicast (interface local). - "<ff01::/16: 13>"}, - {"ff02::1", // All nodes address (link local). - "ff02::1"}, - {"ff02::2", // All routers (link local). - "ff02::2"}, - {"ff02::3", // Multicast (link local). - "<ff02::/16: 16>"}, - {"ff02::fb", // mDNSv6 (link local). - "<ff02::/16: 17>"}, - {"ff08::fb", // mDNSv6. - "<IPv6: 18>"}, - {"ff0f::101", // All NTP servers. - "<IPv6: 19>"}, - {"::ffff:cb0c:10ea", // IPv4-mapped IPV6 (IP address). - "<IPv6: 20>"}, - {"::ffff:a0a:a0a", // IPv4-mapped IPV6 (private class A). - "<M 10.0.0.0/8: 21>"}, - {"::ffff:a0a:a0a", // Intentional duplicate. - "<M 10.0.0.0/8: 21>"}, - {"::ffff:ac1e:1e1e", // IPv4-mapped IPV6 (private class B). - "<M 172.16.0.0/12: 22>"}, - {"::ffff:c0a8:640a", // IPv4-mapped IPV6 (private class C). - "<M 192.168.0.0/16: 23>"}, - {"::ffff:6473:5c01", // IPv4-mapped IPV6 (Chrome). - "<M 100.115.92.1: 24>"}, - {"64:ff9b::a0a:a0a", // IPv4-translated 6to4 IPV6 (private class A). - "<T 10.0.0.0/8: 25>"}, - {"64:ff9b::6473:5c01", // IPv4-translated 6to4 IPV6 (Chrome). - "<T 100.115.92.1: 26>"}, - {"::0101:ffff:c0a8:640a", // IP address. - "<IPv6: 27>"}, - {"aa:aa:aa:aa:aa:aa", // MAC address (BSSID). - "aa:aa:aa:00:00:01"}, - {"chrome://resources/foo", // Secure chrome resource, whitelisted. - "chrome://resources/foo"}, - {"chrome://settings/crisper.js", // Whitelisted settings URLs. - "chrome://settings/crisper.js"}, - // Whitelisted first party extension. - {"chrome-extension://nkoccljplnhpfnfiajclkommnmllphnl/foobar.js", - "chrome-extension://nkoccljplnhpfnfiajclkommnmllphnl/foobar.js"}, - {"chrome://resources/f?user=bar", // Potentially PII in parameter. - "<URL: 2>"}, - {"chrome-extension://nkoccljplnhpfnfiajclkommnmllphnl/foobar.js?bar=x", - "<URL: 3>"}, // Potentially PII in parameter. + {"aaaaaaaa [SSID=123aaaaaa]aaaaa", // SSID. + "aaaaaaaa [SSID=<SSID: 1>]aaaaa"}, + {"aaaaaaaahttp://tets.comaaaaaaa", // URL. + "aaaaaaaa<URL: 1>"}, + {"aaaaaemail@example.comaaa", // Email address. + "<email: 1>"}, + {"example@@1234", // No PII, it is not invalid email address. + "example@@1234"}, + {"255.255.155.2", // IP address. + "<IPv4: 1>"}, + {"255.255.155.255", // IP address. + "<IPv4: 2>"}, + {"127.0.0.1", // IPv4 loopback. + "<127.0.0.0/8: 3>"}, + {"127.255.0.1", // IPv4 loopback. + "<127.0.0.0/8: 4>"}, + {"0.0.0.0", // Any IPv4. + "<0.0.0.0/8: 5>"}, + {"0.255.255.255", // Any IPv4. + "<0.0.0.0/8: 6>"}, + {"10.10.10.100", // IPv4 private class A. + "<10.0.0.0/8: 7>"}, + {"10.10.10.100", // Intentional duplicate. + "<10.0.0.0/8: 7>"}, + {"10.10.10.101", // IPv4 private class A. + "<10.0.0.0/8: 8>"}, + {"10.255.255.255", // IPv4 private class A. + "<10.0.0.0/8: 9>"}, + {"172.16.0.0", // IPv4 private class B. + "<172.16.0.0/12: 10>"}, + {"172.31.255.255", // IPv4 private class B. + "<172.16.0.0/12: 11>"}, + {"172.11.5.5", // IP address. + "<IPv4: 12>"}, + {"172.111.5.5", // IP address. + "<IPv4: 13>"}, + {"192.168.0.0", // IPv4 private class C. + "<192.168.0.0/16: 14>"}, + {"192.168.255.255", // IPv4 private class C. + "<192.168.0.0/16: 15>"}, + {"192.169.2.120", // IP address. + "<IPv4: 16>"}, + {"169.254.0.1", // Link local. + "<169.254.0.0/16: 17>"}, + {"169.200.0.1", // IP address. + "<IPv4: 18>"}, + {"fe80::", // Link local. + "<fe80::/10: 1>"}, + {"fe80::ffff", // Link local. + "<fe80::/10: 2>"}, + {"febf:ffff::ffff", // Link local. + "<fe80::/10: 3>"}, + {"fecc::1111", // IP address. + "<IPv6: 4>"}, + {"224.0.0.24", // Multicast. + "<224.0.0.0/4: 19>"}, + {"240.0.0.0", // IP address. + "<IPv4: 20>"}, + {"255.255.255.255", // Broadcast. + "255.255.255.255"}, + {"100.115.92.92", // ChromeOS. + "100.115.92.92"}, + {"100.115.91.92", // IP address. + "<IPv4: 23>"}, + {"1.1.1.1", // DNS + "1.1.1.1"}, + {"8.8.8.8", // DNS + "8.8.8.8"}, + {"8.8.4.4", // DNS + "8.8.4.4"}, + {"8.8.8.4", // IP address. + "<IPv4: 27>"}, + {"255.255.259.255", // Not an IP address. + "255.255.259.255"}, + {"255.300.255.255", // Not an IP address. + "255.300.255.255"}, + {"aaaa123.123.45.4aaa", // IP address. + "aaaa<IPv4: 28>aaa"}, + {"11:11;11::11", // IP address. + "11:11;<IPv6: 5>"}, + {"11::11", // IP address. + "<IPv6: 5>"}, + {"11:11:abcdef:0:0:0:0:0", // No PII. + "11:11:abcdef:0:0:0:0:0"}, + {"::", // Unspecified. + "::"}, + {"::1", // Local host. + "::1"}, + {"Instance::Set", // Ignore match, no PII. + "Instance::Set"}, + {"Instant::ff", // Ignore match, no PII. + "Instant::ff"}, + {"net::ERR_CONN_TIMEOUT", // Ignore match, no PII. + "net::ERR_CONN_TIMEOUT"}, + {"ff01::1", // All nodes address (interface local). + "ff01::1"}, + {"ff01::2", // All routers (interface local). + "ff01::2"}, + {"ff01::3", // Multicast (interface local). + "<ff01::/16: 13>"}, + {"ff02::1", // All nodes address (link local). + "ff02::1"}, + {"ff02::2", // All routers (link local). + "ff02::2"}, + {"ff02::3", // Multicast (link local). + "<ff02::/16: 16>"}, + {"ff02::fb", // mDNSv6 (link local). + "<ff02::/16: 17>"}, + {"ff08::fb", // mDNSv6. + "<IPv6: 18>"}, + {"ff0f::101", // All NTP servers. + "<IPv6: 19>"}, + {"::ffff:cb0c:10ea", // IPv4-mapped IPV6 (IP address). + "<IPv6: 20>"}, + {"::ffff:a0a:a0a", // IPv4-mapped IPV6 (private class A). + "<M 10.0.0.0/8: 21>"}, + {"::ffff:a0a:a0a", // Intentional duplicate. + "<M 10.0.0.0/8: 21>"}, + {"::ffff:ac1e:1e1e", // IPv4-mapped IPV6 (private class B). + "<M 172.16.0.0/12: 22>"}, + {"::ffff:c0a8:640a", // IPv4-mapped IPV6 (private class C). + "<M 192.168.0.0/16: 23>"}, + {"::ffff:6473:5c01", // IPv4-mapped IPV6 (Chrome). + "<M 100.115.92.1: 24>"}, + {"64:ff9b::a0a:a0a", // IPv4-translated 6to4 IPV6 (private class A). + "<T 10.0.0.0/8: 25>"}, + {"64:ff9b::6473:5c01", // IPv4-translated 6to4 IPV6 (Chrome). + "<T 100.115.92.1: 26>"}, + {"::0101:ffff:c0a8:640a", // IP address. + "<IPv6: 27>"}, + {"aa:aa:aa:aa:aa:aa", // MAC address (BSSID). + "[MAC OUI=aa:aa:aa IFACE=1]"}, + {"chrome://resources/foo", // Secure chrome resource, whitelisted. + "chrome://resources/foo"}, + {"chrome://settings/crisper.js", // Whitelisted settings URLs. + "chrome://settings/crisper.js"}, + // Whitelisted first party extension. + {"chrome-extension://nkoccljplnhpfnfiajclkommnmllphnl/foobar.js", + "chrome-extension://nkoccljplnhpfnfiajclkommnmllphnl/foobar.js"}, + {"chrome://resources/f?user=bar", // Potentially PII in parameter. + "<URL: 2>"}, + {"chrome-extension://nkoccljplnhpfnfiajclkommnmllphnl/foobar.js?bar=x", + "<URL: 3>"}, // Potentially PII in parameter. + {"/root/27540283740a0897ab7c8de0f809add2bacde78f/foo", + "/root/<HASH:2754 1>/foo"}, // Hash string. +#if defined(OS_CHROMEOS) // We only anonymize Android paths on Chrome OS. + // Allowed android storage path. + {"112K\t/home/root/deadbeef1234/android-data/data/system_de", + "112K\t/home/root/deadbeef1234/android-data/data/system_de"}, + // Anonymized app-specific storage path. + {"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de", + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_"}, +#endif // defined(OS_CHROMEOS) }; std::string anon_input; std::string anon_output; @@ -369,4 +459,53 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) { EXPECT_EQ(anon_output, anonymizer_.Anonymize(anon_input)); } +#if defined(OS_CHROMEOS) // We only anonymize Android paths on Chrome OS. +TEST_F(AnonymizerToolTest, AnonymizeAndroidAppStoragePaths) { + EXPECT_EQ("", AnonymizeAndroidAppStoragePaths("")); + EXPECT_EQ("foo\nbar\n", AnonymizeAndroidAppStoragePaths("foo\nbar\n")); + + constexpr char kDuOutput[] = + "112K\t/home/root/deadbeef1234/android-data/data/system_de\n" + // /data/data will be modified by the anonymizer. + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/a\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/bc\n" + "24K\t/home/root/deadbeef1234/android-data/data/data/pack.age1\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de/" + "\xe3\x81\x82\n" + "8.1K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de/" + "\xe3\x81\x82\xe3\x81\x83\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/ef\n" + "24K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2\n" + // /data/app won't. + "8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/a\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/bc\n" + "24K\t/home/root/deadbeef1234/android-data/data/app/pack.age1\n" + // /data/user_de will. + "8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/a\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/bc\n" + "24K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1\n" + "78M\t/home/root/deadbeef1234/android-data/data/data\n"; + constexpr char kDuOutputRedacted[] = + "112K\t/home/root/deadbeef1234/android-data/data/system_de\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/a\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/b_\n" + "24K\t/home/root/deadbeef1234/android-data/data/data/pack.age1\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_\n" + // The non-ASCII directory names will become '*_'. + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_/*_\n" + "8.1K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_/*_\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/e_\n" + "24K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/a\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/bc\n" + "24K\t/home/root/deadbeef1234/android-data/data/app/pack.age1\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/a\n" + "8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/b_\n" + "24K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1\n" + "78M\t/home/root/deadbeef1234/android-data/data/data\n"; + EXPECT_EQ(kDuOutputRedacted, AnonymizeAndroidAppStoragePaths(kDuOutput)); +} +#endif // defined(OS_CHROMEOS) + } // namespace feedback diff --git a/chromium/components/feedback/feedback_data.cc b/chromium/components/feedback/feedback_data.cc index 91cc10ebc6d..0264e5a69fe 100644 --- a/chromium/components/feedback/feedback_data.cc +++ b/chromium/components/feedback/feedback_data.cc @@ -69,8 +69,9 @@ void FeedbackData::CompressSystemInfo() { } ++pending_op_count_; - base::PostTaskWithTraitsAndReply( - FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT}, + base::PostTaskAndReply( + FROM_HERE, + {base::ThreadPool(), base::MayBlock(), base::TaskPriority::BEST_EFFORT}, base::BindOnce(&FeedbackData::CompressLogs, this), base::BindOnce(&FeedbackData::OnCompressComplete, this)); } @@ -79,8 +80,9 @@ void FeedbackData::SetAndCompressHistograms(std::string histograms) { DCHECK_CURRENTLY_ON(BrowserThread::UI); ++pending_op_count_; - base::PostTaskWithTraitsAndReply( - FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT}, + base::PostTaskAndReply( + FROM_HERE, + {base::ThreadPool(), base::MayBlock(), base::TaskPriority::BEST_EFFORT}, base::BindOnce(&FeedbackData::CompressFile, this, base::FilePath(kHistogramsFilename), kHistogramsAttachmentName, std::move(histograms)), @@ -95,8 +97,9 @@ void FeedbackData::AttachAndCompressFileData(std::string attached_filedata) { ++pending_op_count_; base::FilePath attached_file = base::FilePath::FromUTF8Unsafe(attached_filename_); - base::PostTaskWithTraitsAndReply( - FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT}, + base::PostTaskAndReply( + FROM_HERE, + {base::ThreadPool(), base::MayBlock(), base::TaskPriority::BEST_EFFORT}, base::BindOnce(&FeedbackData::CompressFile, this, attached_file, std::string(), std::move(attached_filedata)), base::BindOnce(&FeedbackData::OnCompressComplete, this)); diff --git a/chromium/components/feedback/feedback_data_unittest.cc b/chromium/components/feedback/feedback_data_unittest.cc index b93bc26bc61..e588bfbb6da 100644 --- a/chromium/components/feedback/feedback_data_unittest.cc +++ b/chromium/components/feedback/feedback_data_unittest.cc @@ -13,8 +13,8 @@ #include "components/feedback/feedback_uploader.h" #include "components/feedback/feedback_uploader_factory.h" #include "components/prefs/testing_pref_service.h" +#include "content/public/test/browser_task_environment.h" #include "content/public/test/test_browser_context.h" -#include "content/public/test/test_browser_thread_bundle.h" #include "services/network/public/cpp/shared_url_loader_factory.h" #include "services/network/public/cpp/weak_wrapper_shared_url_loader_factory.h" #include "services/network/test/test_url_loader_factory.h" @@ -85,7 +85,7 @@ class FeedbackDataTest : public testing::Test { base::Closure quit_closure_; std::unique_ptr<base::RunLoop> run_loop_; - content::TestBrowserThreadBundle test_browser_thread_bundle_; + content::BrowserTaskEnvironment task_environment_; network::TestURLLoaderFactory test_url_loader_factory_; scoped_refptr<network::SharedURLLoaderFactory> test_shared_loader_factory_; content::TestBrowserContext context_; diff --git a/chromium/components/feedback/feedback_report.cc b/chromium/components/feedback/feedback_report.cc index c1f362fe7f7..bcac13be667 100644 --- a/chromium/components/feedback/feedback_report.cc +++ b/chromium/components/feedback/feedback_report.cc @@ -12,6 +12,7 @@ #include "base/guid.h" #include "base/sequenced_task_runner.h" #include "base/strings/string_number_conversions.h" +#include "base/threading/thread_task_runner_handle.h" namespace feedback { @@ -57,6 +58,12 @@ FeedbackReport::FeedbackReport( base::WrapRefCounted<FeedbackReport>(this))); } +FeedbackReport::FeedbackReport( + base::FilePath path, + std::unique_ptr<std::string> data, + scoped_refptr<base::SequencedTaskRunner> task_runner) + : file_(path), data_(std::move(data)), reports_task_runner_(task_runner) {} + // static const char FeedbackReport::kCrashReportIdsKey[] = "crash_report_ids"; @@ -77,9 +84,11 @@ void FeedbackReport::LoadReportsAndQueue(const base::FilePath& user_dir, !name.empty(); name = enumerator.Next()) { auto data = std::make_unique<std::string>(); - if (ReadFileToString(name, data.get())) - callback.Run(std::move(data)); - base::DeleteFile(name, false); + if (ReadFileToString(name, data.get())) { + callback.Run(base::MakeRefCounted<FeedbackReport>( + std::move(name), std::move(data), + base::ThreadTaskRunnerHandle::Get())); + } } } diff --git a/chromium/components/feedback/feedback_report.h b/chromium/components/feedback/feedback_report.h index 413e03ba65e..a71e32d91d7 100644 --- a/chromium/components/feedback/feedback_report.h +++ b/chromium/components/feedback/feedback_report.h @@ -11,29 +11,35 @@ #include "base/files/file_path.h" #include "base/macros.h" #include "base/memory/ref_counted.h" +#include "base/sequenced_task_runner.h" #include "base/time/time.h" -namespace base { -class SequencedTaskRunner; -} - namespace feedback { +class FeedbackReport; + // Repeating since for every feedback report file on disk, the callback to // queue it in the uploader needs to be invoked. using QueueCallback = - base::RepeatingCallback<void(std::unique_ptr<std::string>)>; + base::RepeatingCallback<void(scoped_refptr<FeedbackReport>)>; // This class holds a feedback report. Once a report is created, a disk backup // for it is created automatically. This backup needs to explicitly be // deleted by calling DeleteReportOnDisk. class FeedbackReport : public base::RefCountedThreadSafe<FeedbackReport> { public: + // Creates a new feedback report with the contents of |data|. FeedbackReport(const base::FilePath& path, const base::Time& upload_at, std::unique_ptr<std::string> data, scoped_refptr<base::SequencedTaskRunner> task_runner); + // Creates a feedback report from an existing one on-disk at |path|, the + // |upload_at| time should be set after construction. + FeedbackReport(base::FilePath path, + std::unique_ptr<std::string> data, + scoped_refptr<base::SequencedTaskRunner> task_runner); + // The ID of the product specific data for the crash report IDs as stored by // the feedback server. static const char kCrashReportIdsKey[]; @@ -54,6 +60,9 @@ class FeedbackReport : public base::RefCountedThreadSafe<FeedbackReport> { const base::Time& upload_at() const { return upload_at_; } void set_upload_at(const base::Time& time) { upload_at_ = time; } const std::string& data() const { return *data_; } + scoped_refptr<base::SequencedTaskRunner> reports_task_runner() const { + return reports_task_runner_; + } private: friend class base::RefCountedThreadSafe<FeedbackReport>; diff --git a/chromium/components/feedback/feedback_uploader.cc b/chromium/components/feedback/feedback_uploader.cc index 43ac0af9cb2..80e737ead7f 100644 --- a/chromium/components/feedback/feedback_uploader.cc +++ b/chromium/components/feedback/feedback_uploader.cc @@ -82,7 +82,17 @@ void FeedbackUploader::SetMinimumRetryDelayForTesting(base::TimeDelta delay) { } void FeedbackUploader::QueueReport(std::unique_ptr<std::string> data) { - QueueReportWithDelay(std::move(data), base::TimeDelta()); + reports_queue_.emplace(base::MakeRefCounted<FeedbackReport>( + feedback_reports_path_, base::Time::Now(), std::move(data), + task_runner_)); + UpdateUploadTimer(); +} + +void FeedbackUploader::RequeueReport(scoped_refptr<FeedbackReport> report) { + DCHECK_EQ(task_runner_, report->reports_task_runner()); + report->set_upload_at(base::Time::Now()); + reports_queue_.emplace(std::move(report)); + UpdateUploadTimer(); } void FeedbackUploader::StartDispatchingReport() { @@ -159,7 +169,7 @@ void FeedbackUploader::DispatchReport() { })"); auto resource_request = std::make_unique<network::ResourceRequest>(); resource_request->url = feedback_post_url_; - resource_request->allow_credentials = false; + resource_request->credentials_mode = network::mojom::CredentialsMode::kOmit; resource_request->method = "POST"; // Tell feedback server about the variation state of this install. @@ -249,12 +259,4 @@ void FeedbackUploader::UpdateUploadTimer() { } } -void FeedbackUploader::QueueReportWithDelay(std::unique_ptr<std::string> data, - base::TimeDelta delay) { - reports_queue_.emplace(base::MakeRefCounted<FeedbackReport>( - feedback_reports_path_, base::Time::Now() + delay, std::move(data), - task_runner_)); - UpdateUploadTimer(); -} - } // namespace feedback diff --git a/chromium/components/feedback/feedback_uploader.h b/chromium/components/feedback/feedback_uploader.h index 816fa256d9e..9dec3f6fb56 100644 --- a/chromium/components/feedback/feedback_uploader.h +++ b/chromium/components/feedback/feedback_uploader.h @@ -50,6 +50,9 @@ class FeedbackUploader : public KeyedService, // Queues a report for uploading. void QueueReport(std::unique_ptr<std::string> data); + // Re-queues an existing report from disk for uploading. + void RequeueReport(scoped_refptr<FeedbackReport> report); + bool QueueEmpty() const { return reports_queue_.empty(); } content::BrowserContext* context() { return context_; } @@ -113,9 +116,6 @@ class FeedbackUploader : public KeyedService, // Update our timer for uploading the next report. void UpdateUploadTimer(); - void QueueReportWithDelay(std::unique_ptr<std::string> data, - base::TimeDelta delay); - // URLLoaderFactory used for network requests. scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory_; diff --git a/chromium/components/feedback/feedback_uploader_dispatch_unittest.cc b/chromium/components/feedback/feedback_uploader_dispatch_unittest.cc index ab1a8b0ed33..8830c7c44e8 100644 --- a/chromium/components/feedback/feedback_uploader_dispatch_unittest.cc +++ b/chromium/components/feedback/feedback_uploader_dispatch_unittest.cc @@ -16,8 +16,8 @@ #include "components/variations/net/variations_http_headers.h" #include "components/variations/variations_associated_data.h" #include "components/variations/variations_http_header_provider.h" +#include "content/public/test/browser_task_environment.h" #include "content/public/test/test_browser_context.h" -#include "content/public/test/test_browser_thread_bundle.h" #include "net/http/http_util.h" #include "services/network/public/cpp/shared_url_loader_factory.h" #include "services/network/public/cpp/weak_wrapper_shared_url_loader_factory.h" @@ -43,7 +43,7 @@ void QueueReport(FeedbackUploader* uploader, const std::string& report_data) { class FeedbackUploaderDispatchTest : public ::testing::Test { protected: FeedbackUploaderDispatchTest() - : browser_thread_bundle_(content::TestBrowserThreadBundle::IO_MAINLOOP), + : task_environment_(content::BrowserTaskEnvironment::IO_MAINLOOP), shared_url_loader_factory_( base::MakeRefCounted<network::WeakWrapperSharedURLLoaderFactory>( &test_url_loader_factory_)) {} @@ -75,7 +75,7 @@ class FeedbackUploaderDispatchTest : public ::testing::Test { content::BrowserContext* context() { return &context_; } private: - content::TestBrowserThreadBundle browser_thread_bundle_; + content::BrowserTaskEnvironment task_environment_; content::TestBrowserContext context_; network::TestURLLoaderFactory test_url_loader_factory_; scoped_refptr<network::SharedURLLoaderFactory> shared_url_loader_factory_; diff --git a/chromium/components/feedback/feedback_uploader_factory.cc b/chromium/components/feedback/feedback_uploader_factory.cc index 187880744b1..b8f034b00e4 100644 --- a/chromium/components/feedback/feedback_uploader_factory.cc +++ b/chromium/components/feedback/feedback_uploader_factory.cc @@ -32,8 +32,8 @@ scoped_refptr<base::SingleThreadTaskRunner> FeedbackUploaderFactory::CreateUploaderTaskRunner() { // Uses a BLOCK_SHUTDOWN file task runner because we really don't want to // lose reports or corrupt their files. - return base::CreateSingleThreadTaskRunnerWithTraits( - {base::MayBlock(), base::TaskPriority::BEST_EFFORT, + return base::CreateSingleThreadTaskRunner( + {base::ThreadPool(), base::MayBlock(), base::TaskPriority::BEST_EFFORT, base::TaskShutdownBehavior::BLOCK_SHUTDOWN}); } diff --git a/chromium/components/feedback/feedback_uploader_unittest.cc b/chromium/components/feedback/feedback_uploader_unittest.cc index 8fc865f0841..659f674edbc 100644 --- a/chromium/components/feedback/feedback_uploader_unittest.cc +++ b/chromium/components/feedback/feedback_uploader_unittest.cc @@ -16,8 +16,8 @@ #include "base/threading/sequenced_task_runner_handle.h" #include "components/feedback/feedback_report.h" #include "components/feedback/feedback_uploader_factory.h" +#include "content/public/test/browser_task_environment.h" #include "content/public/test/test_browser_context.h" -#include "content/public/test/test_browser_thread_bundle.h" #include "services/network/public/cpp/shared_url_loader_factory.h" #include "services/network/public/cpp/weak_wrapper_shared_url_loader_factory.h" #include "services/network/test/test_url_loader_factory.h" @@ -59,7 +59,8 @@ class MockFeedbackUploader : public FeedbackUploader { base::BindOnce( &FeedbackReport::LoadReportsAndQueue, feedback_reports_path(), base::BindRepeating(&MockFeedbackUploader::QueueSingleReport, - base::SequencedTaskRunnerHandle::Get(), this))); + base::SequencedTaskRunnerHandle::Get(), + AsWeakPtr()))); } const std::map<std::string, unsigned int>& dispatched_reports() const { @@ -71,11 +72,11 @@ class MockFeedbackUploader : public FeedbackUploader { private: static void QueueSingleReport( scoped_refptr<base::SequencedTaskRunner> main_task_runner, - MockFeedbackUploader* uploader, - std::unique_ptr<std::string> data) { + base::WeakPtr<FeedbackUploader> uploader, + scoped_refptr<FeedbackReport> report) { main_task_runner->PostTask( - FROM_HERE, base::BindOnce(&MockFeedbackUploader::QueueReport, - uploader->AsWeakPtr(), std::move(data))); + FROM_HERE, base::BindOnce(&MockFeedbackUploader::RequeueReport, + std::move(uploader), std::move(report))); } // FeedbackUploaderChrome: @@ -139,7 +140,7 @@ class FeedbackUploaderTest : public testing::Test { private: network::TestURLLoaderFactory test_url_loader_factory_; scoped_refptr<network::SharedURLLoaderFactory> test_shared_loader_factory_; - content::TestBrowserThreadBundle test_browser_thread_bundle_; + content::BrowserTaskEnvironment task_environment_; content::TestBrowserContext context_; std::unique_ptr<MockFeedbackUploader> uploader_; diff --git a/chromium/components/feedback/system_logs/system_logs_fetcher.cc b/chromium/components/feedback/system_logs/system_logs_fetcher.cc index 62ed817c0dc..6b061a58842 100644 --- a/chromium/components/feedback/system_logs/system_logs_fetcher.cc +++ b/chromium/components/feedback/system_logs/system_logs_fetcher.cc @@ -51,9 +51,10 @@ SystemLogsFetcher::SystemLogsFetcher( const char* const first_party_extension_ids[]) : response_(std::make_unique<SystemLogsResponse>()), num_pending_requests_(0), - task_runner_for_anonymizer_(base::CreateSequencedTaskRunnerWithTraits( - {// User visible because this is called when the user is looking at - // the send feedback dialog, watching a spinner. + task_runner_for_anonymizer_(base::CreateSequencedTaskRunner( + {base::ThreadPool(), // User visible because this is called when the + // user is looking at the send feedback dialog, + // watching a spinner. base::TaskPriority::USER_VISIBLE, base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN})) { if (scrub_data) |