diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-12 14:27:29 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-13 09:35:20 +0000 |
commit | c30a6232df03e1efbd9f3b226777b07e087a1122 (patch) | |
tree | e992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/components/url_formatter | |
parent | 7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff) | |
download | qtwebengine-chromium-85-based.tar.gz |
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/components/url_formatter')
13 files changed, 959 insertions, 623 deletions
diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc index a38e1824113..b3b0a454916 100644 --- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc +++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc @@ -5,6 +5,7 @@ #include "components/url_formatter/spoof_checks/idn_spoof_checker.h" #include "base/check_op.h" +#include "base/logging.h" #include "base/no_destructor.h" #include "base/numerics/safe_conversions.h" #include "base/strings/string_piece.h" @@ -24,6 +25,15 @@ namespace url_formatter { namespace { +uint8_t BitLength(uint32_t input) { + uint8_t number_of_bits = 0; + while (input != 0) { + number_of_bits++; + input >>= 1; + } + return number_of_bits; +} + class TopDomainPreloadDecoder : public net::extras::PreloadDecoder { public: using net::extras::PreloadDecoder::PreloadDecoder; @@ -33,14 +43,24 @@ class TopDomainPreloadDecoder : public net::extras::PreloadDecoder { const std::string& search, size_t current_search_offset, bool* out_found) override { + // Make sure the assigned bit length is enough to encode all SkeletonType + // values. + DCHECK_EQ(kSkeletonTypeBitLength, + BitLength(url_formatter::SkeletonType::kMaxValue)); + bool is_same_skeleton; + if (!reader->Next(&is_same_skeleton)) return false; TopDomainEntry top_domain; if (!reader->Next(&top_domain.is_top_500)) return false; - + uint32_t skeletontype_value; + if (!reader->Read(kSkeletonTypeBitLength, &skeletontype_value)) + return false; + top_domain.skeleton_type = + static_cast<url_formatter::SkeletonType>(skeletontype_value); if (is_same_skeleton) { top_domain.domain = search; } else { @@ -56,7 +76,6 @@ class TopDomainPreloadDecoder : public net::extras::PreloadDecoder { if (has_com_suffix) top_domain.domain += ".com"; } - if (current_search_offset == 0) { *out_found = true; DCHECK(!top_domain.domain.empty()); @@ -296,7 +315,7 @@ IDNSpoofChecker::IDNSpoofChecker() { digits_ = icu::UnicodeSet(UNICODE_STRING_SIMPLE("[0-9]"), status); digits_.freeze(); digit_lookalikes_ = icu::UnicodeSet( - icu::UnicodeString::fromUTF8("[θ२২੨੨૨೩೭շзҙӡउওਤ੩૩౩ဒვპ੫丩ㄐճ৪੪୫૭୨౨]"), + icu::UnicodeString::fromUTF8("[θ२২੨੨૨೩೭շзҙӡउওਤ੩૩౩ဒვპੜ੫丩ㄐճ৪੪୫૭୨౨]"), status); digit_lookalikes_.freeze(); @@ -335,7 +354,7 @@ IDNSpoofChecker::~IDNSpoofChecker() { uspoof_close(checker_); } -bool IDNSpoofChecker::SafeToDisplayAsUnicode( +IDNSpoofChecker::Result IDNSpoofChecker::SafeToDisplayAsUnicode( base::StringPiece16 label, base::StringPiece top_level_domain, base::StringPiece16 top_level_domain_unicode) { @@ -345,8 +364,9 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( base::checked_cast<int32_t>(label.size()), nullptr, &status); // If uspoof_check fails (due to library failure), or if any of the checks // fail, treat the IDN as unsafe. - if (U_FAILURE(status) || (result & USPOOF_ALL_CHECKS)) - return false; + if (U_FAILURE(status) || (result & USPOOF_ALL_CHECKS)) { + return Result::kICUSpoofChecks; + } icu::UnicodeString label_string(FALSE /* isTerminated */, label.data(), base::checked_cast<int32_t>(label.size())); @@ -363,21 +383,21 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as // such. See http://crbug.com/595263 . if (deviation_characters_.containsSome(label_string)) - return false; + return Result::kDeviationCharacters; // Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is). if (label_string.length() > 1 && top_level_domain != "is" && icelandic_characters_.containsSome(label_string)) - return false; + return Result::kTLDSpecificCharacters; // Disallow Latin Schwa (U+0259) for domains outside Azerbaijan's ccTLD (.az). if (label_string.length() > 1 && top_level_domain != "az" && label_string.indexOf("ə") != -1) - return false; + return Result::kTLDSpecificCharacters; // Disallow middle dot (U+00B7) when unsafe. if (HasUnsafeMiddleDot(label_string, top_level_domain)) { - return false; + return Result::kUnsafeMiddleDot; } // If there's no script mixing, the input is regarded as safe without any @@ -393,7 +413,8 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( // - Korean: Hangul, Han, Common result &= USPOOF_RESTRICTION_LEVEL_MASK; if (result == USPOOF_ASCII) - return true; + return Result::kSafe; + if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE && kana_letters_exceptions_.containsNone(label_string) && combining_diacritics_exceptions_.containsNone(label_string)) { @@ -401,15 +422,15 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( if (IsLabelWholeScriptConfusableForScript(*script.get(), label_string) && !IsWholeScriptConfusableAllowedForTLD(*script.get(), top_level_domain, top_level_domain_unicode)) { - return false; + return Result::kWholeScriptConfusable; } } - return true; + return Result::kSafe; } // Disallow domains that contain only numbers and number-spoofs. if (IsDigitLookalike(label_string)) - return false; + return Result::kDigitLookalikes; // Additional checks for |label| with multiple scripts, one of which is Latin. // Disallow non-ASCII Latin letters to mix with a non-Latin script. @@ -418,7 +439,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( // because script mixing of LGC is already rejected. if (non_ascii_latin_letters_.containsSome(label_string) && !lgc_letters_n_ascii_.containsAll(label_string)) - return false; + return Result::kNonAsciiLatinCharMixedWithNonLatin; icu::RegexMatcher* dangerous_pattern = reinterpret_cast<icu::RegexMatcher*>(DangerousPatternTLS().Get()); @@ -508,7 +529,10 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( DangerousPatternTLS().Set(dangerous_pattern); } dangerous_pattern->reset(label_string); - return !dangerous_pattern->find(); + if (dangerous_pattern->find()) { + return Result::kDangerousPattern; + } + return Result::kSafe; } TopDomainEntry IDNSpoofChecker::GetSimilarTopDomain( @@ -538,7 +562,8 @@ Skeletons IDNSpoofChecker::GetSkeletons(base::StringPiece16 hostname) { } TopDomainEntry IDNSpoofChecker::LookupSkeletonInTopDomains( - const std::string& skeleton) { + const std::string& skeleton, + SkeletonType skeleton_type) { DCHECK(!skeleton.empty()); // There are no other guarantees about a skeleton string such as not including // a dot. Skeleton of certain characters are dots (e.g. "۰" (U+06F0)). @@ -554,7 +579,11 @@ TopDomainEntry IDNSpoofChecker::LookupSkeletonInTopDomains( labels.begin() + labels.size() - kNumberOfLabelsToCheck); } - while (labels.size() > 1) { + while (labels.size() > 0) { + // A full skeleton needs at least two labels to match. + if (labels.size() == 1 && skeleton_type == SkeletonType::kFull) { + break; + } std::string partial_skeleton = base::JoinString(labels, "."); bool match = false; bool decoded = preload_decoder.Decode(partial_skeleton, &match); diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h index b534162305e..64057b3215f 100644 --- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h +++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h @@ -33,12 +33,31 @@ FORWARD_DECLARE_TEST(UrlFormatterTest, IDNToUnicode); using Skeletons = base::flat_set<std::string>; +// The |SkeletonType| and |TopDomainEntry| are mirrored in trie_entry.h. These +// are used to insert and read nodes from the Trie. +// The type of skeleton in the trie node. +enum SkeletonType { + // The skeleton represents the full domain (e.g. google.corn). + kFull = 0, + // The skeleton represents the domain with '.'s and '-'s removed (e.g. + // googlecorn). + kSeparatorsRemoved = 1, + // Max value used to determine the number of different types. Update this and + // |kSkeletonTypeBitLength| when new SkeletonTypes are added. + kMaxValue = kSeparatorsRemoved + +}; + +const uint8_t kSkeletonTypeBitLength = 1; + // Represents a top domain entry in the trie. struct TopDomainEntry { // The domain name. std::string domain; // True if the domain is in the top 500. bool is_top_500 = false; + // Type of the skeleton stored in the trie node. + SkeletonType skeleton_type; }; // A helper class for IDN Spoof checking, used to ensure that no IDN input is @@ -55,17 +74,50 @@ class IDNSpoofChecker { size_t trie_root_position; }; + enum class Result { + // Spoof checks weren't performed because the domain wasn't IDN. Should + // never be returned from SafeToDisplayAsUnicode. + kNone, + // The domain passed all spoof checks. + kSafe, + // Failed ICU's standard spoof checks such as Greek mixing with Latin. + kICUSpoofChecks, + // Domain contains deviation characters. + kDeviationCharacters, + // Domain contains characters that are only allowed for certain TLDs, such + // as thorn (þ) used outside Icelandic. + kTLDSpecificCharacters, + // Domain has an unsafe middle dot. + kUnsafeMiddleDot, + // Domain is composed of only Latin-like characters from non Latin scripts. + // E.g. apple.com but apple in Cyrillic (xn--80ak6aa92e.com). + kWholeScriptConfusable, + // Domain is composed of only characters that look like digits. + kDigitLookalikes, + // Domain mixes Non-ASCII Latin with Non-Latin characters. + kNonAsciiLatinCharMixedWithNonLatin, + // Domain contains dangerous patterns that are mostly found when mixing + // Latin and CJK scripts. E.g. Katakana iteration mark (U+30FD) not preceded + // by Katakana. + kDangerousPattern, + }; + IDNSpoofChecker(); ~IDNSpoofChecker(); - - // Returns true if |label| is safe to display as Unicode. In the event of - // library failure, all IDN inputs will be treated as unsafe. + // Returns kSafe if |label| is safe to display as Unicode and fills + // |top_level_domain_unicode| with the converted value. Otherwise, returns the + // reason of the failure and leaves |top_level_domain_unicode| unchanged. + // This method doesn't check for similarity to a top domain: If the input + // matches a top domain but is otherwise safe (e.g. googlé.com), the result + // will be kSafe. + // In the event of library failure, all IDN inputs will be treated as unsafe + // and the return value will be kUSpoofChecks. // See the function body for details on the specific safety checks performed. - // top_level_domain_unicode can be empty if top_level_domain is not well + // |top_level_domain_unicode| can be empty if |top_level_domain| is not well // formed punycode. - bool SafeToDisplayAsUnicode(base::StringPiece16 label, - base::StringPiece top_level_domain, - base::StringPiece16 top_level_domain_unicode); + Result SafeToDisplayAsUnicode(base::StringPiece16 label, + base::StringPiece top_level_domain, + base::StringPiece16 top_level_domain_unicode); // Returns the matching top domain if |hostname| or the last few components of // |hostname| looks similar to one of top domains listed in domains.list. @@ -84,7 +136,11 @@ class IDNSpoofChecker { Skeletons GetSkeletons(base::StringPiece16 hostname); // Returns a top domain from the top 10K list matching the given |skeleton|. - TopDomainEntry LookupSkeletonInTopDomains(const std::string& skeleton); + // If |without_separators| is set, the skeleton will be compared against + // skeletons without '.' and '-'s as well. + TopDomainEntry LookupSkeletonInTopDomains( + const std::string& skeleton, + SkeletonType skeleton_type = SkeletonType::kFull); // Used for unit tests. static void SetTrieParamsForTesting(const HuffmanTrieParams& trie_params); diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker_unittest.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker_unittest.cc index f9821773c96..69c4d9c8d50 100644 --- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker_unittest.cc +++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker_unittest.cc @@ -1369,12 +1369,21 @@ TEST_F(IDNSpoofCheckerTest, LookupSkeletonInTopDomains) { IDNSpoofChecker().LookupSkeletonInTopDomains("d4OOO.corn"); EXPECT_EQ("d4000.com", entry.domain); EXPECT_TRUE(entry.is_top_500); + EXPECT_EQ(entry.skeleton_type, SkeletonType::kFull); + } + { + TopDomainEntry entry = IDNSpoofChecker().LookupSkeletonInTopDomains( + "d4OOOcorn", SkeletonType::kSeparatorsRemoved); + EXPECT_EQ("d4000.com", entry.domain); + EXPECT_TRUE(entry.is_top_500); + EXPECT_EQ(entry.skeleton_type, SkeletonType::kSeparatorsRemoved); } { TopDomainEntry entry = IDNSpoofChecker().LookupSkeletonInTopDomains("digklrno68.corn"); EXPECT_EQ("digklmo68.com", entry.domain); EXPECT_FALSE(entry.is_top_500); + EXPECT_EQ(entry.skeleton_type, SkeletonType::kFull); } } @@ -1385,6 +1394,14 @@ TEST(IDNSpoofCheckerNoFixtureTest, LookupSkeletonInTopDomains) { IDNSpoofChecker().LookupSkeletonInTopDomains("google.corn"); EXPECT_EQ("google.com", entry.domain); EXPECT_TRUE(entry.is_top_500); + EXPECT_EQ(entry.skeleton_type, SkeletonType::kFull); + } + { + TopDomainEntry entry = IDNSpoofChecker().LookupSkeletonInTopDomains( + "googlecorn", SkeletonType::kSeparatorsRemoved); + EXPECT_EQ("google.com", entry.domain); + EXPECT_TRUE(entry.is_top_500); + EXPECT_EQ(entry.skeleton_type, SkeletonType::kSeparatorsRemoved); } { // This is data dependent, must be updated when the top domain list @@ -1393,6 +1410,7 @@ TEST(IDNSpoofCheckerNoFixtureTest, LookupSkeletonInTopDomains) { IDNSpoofChecker().LookupSkeletonInTopDomains("google.sk"); EXPECT_EQ("google.sk", entry.domain); EXPECT_FALSE(entry.is_top_500); + EXPECT_EQ(entry.skeleton_type, SkeletonType::kFull); } } @@ -1410,6 +1428,7 @@ TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) { const char* const expected_matching_domain; // If true, the matching top domain is expected to be in top 500. const bool expected_is_top_500; + const IDNSpoofChecker::Result expected_spoof_check_result; } kTestCases[] = { {// An ASCII, top domain. "google.com", L"google.com", false, @@ -1417,22 +1436,25 @@ TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) { "", // ...And since we don't match it to a top domain, we don't know if it's // a top 500 domain. - false}, + false, IDNSpoofChecker::Result::kNone}, {// An ASCII domain that's not a top domain. - "not-top-domain.com", L"not-top-domain.com", false, "", false}, + "not-top-domain.com", L"not-top-domain.com", false, "", false, + IDNSpoofChecker::Result::kNone}, {// A unicode domain that's valid according to all of the rules in IDN // spoof checker except that it matches a top domain. Should be - // converted to punycode. - "xn--googl-fsa.com", L"googlé.com", true, "google.com", true}, + // converted to punycode. Spoof check result is kSafe because top domain + // similarity isn't included in IDNSpoofChecker::Result. + "xn--googl-fsa.com", L"googlé.com", true, "google.com", true, + IDNSpoofChecker::Result::kSafe}, {// A unicode domain that's not valid according to the rules in IDN spoof - // checker (mixed script) and it matches a top domain. Should be - // converted to punycode. - "xn--80ak6aa92e.com", L"аррӏе.com", true, "apple.com", true}, + // checker (whole script confusable in Cyrillic) and it matches a top + // domain. Should be converted to punycode. + "xn--80ak6aa92e.com", L"аррӏе.com", true, "apple.com", true, + IDNSpoofChecker::Result::kWholeScriptConfusable}, {// A unicode domain that's not valid according to the rules in IDN spoof // checker (mixed script) but it doesn't match a top domain. "xn--o-o-oai-26a223aia177a7ab7649d.com", L"ɴoτ-τoρ-ďoᛖaiɴ.com", true, "", - false}, - }; + false, IDNSpoofChecker::Result::kICUSpoofChecks}}; for (const TestCase& test_case : kTestCases) { const url_formatter::IDNConversionResult result = @@ -1443,6 +1465,7 @@ TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) { result.matching_top_domain.domain); EXPECT_EQ(test_case.expected_is_top_500, result.matching_top_domain.is_top_500); + EXPECT_EQ(test_case.expected_spoof_check_result, result.spoof_check_result); } } diff --git a/chromium/components/url_formatter/spoof_checks/top_domains/domains.skeletons b/chromium/components/url_formatter/spoof_checks/top_domains/domains.skeletons index dcc1cfe907a..a96c5cb2a4c 100644 --- a/chromium/components/url_formatter/spoof_checks/top_domains/domains.skeletons +++ b/chromium/components/url_formatter/spoof_checks/top_domains/domains.skeletons @@ -6,504 +6,513 @@ # components/url_formatter/spoof_checks/make_top_domain_skeletons.cc # DO NOT MANUALLY EDIT! +# This list contains top 500 domains followed by the top 5000 domains. These are +# separated by ###END_TOP_500### line. + +# For top 500 domains, each row has three columns: full skeleton, skeleton +# without label separators (e.g. '.' and '-'), and the domain itself. + +# For top 5000 domains, each row has two columns: full skeleton and the domain +# itself. + # Each entry is the skeleton of a top domain for the confusability check # in components/url_formatter/url_formatter.cc. -l7track.net, 17track.net -24h.corn.vn, 24h.com.vn -acadernia.edu, academia.edu -acadernic.ru, academic.ru -accuweather.corn, accuweather.com -adobe.corn, adobe.com -adp.corn, adp.com -agoda.corn, agoda.com -aif.ru, aif.ru -airbnb.corn, airbnb.com -alibaba.corn, alibaba.com -aliexpress.corn, aliexpress.com -allegro.pl, allegro.pl -allocine.fr, allocine.fr -alodokter.corn, alodokter.com -arnazon.ca, amazon.ca -arnazon.co.jp, amazon.co.jp -arnazon.co.uk, amazon.co.uk -arnazon.corn, amazon.com -arnazon.corn.rnx, amazon.com.mx -arnazon.de, amazon.de -arnazon.es, amazon.es -arnazon.fr, amazon.fr -arnazon.in, amazon.in -arnazon.it, amazon.it -arneblo.jp, ameblo.jp -arnericanexpress.corn, americanexpress.com -ancestry.corn, ancestry.com -anirneflv.net, animeflv.net -anyxxx.pro, anyxxx.pro -aol.corn, aol.com -apkpure.corn, apkpure.com -apple.corn, apple.com -appledaily.corn, appledaily.com -archive.org, archive.org -asahi.corn, asahi.com -ask.corn, ask.com -asos.corn, asos.com -asus.corn, asus.com -autotrader.corn, autotrader.com -avito.ru, avito.ru -azlyrics.corn, azlyrics.com -badoo.corn, badoo.com -baidu.corn, baidu.com -baixaki.corn.br, baixaki.com.br -banggood.corn, banggood.com -bankofarnerica.corn, bankofamerica.com -battle.net, battle.net -bbc.co.uk, bbc.co.uk -bbc.corn, bbc.com -beeg.corn, beeg.com -bestbuy.corn, bestbuy.com -bet365.corn, bet365.com -biblegateway.corn, biblegateway.com -biglobe.ne.jp, biglobe.ne.jp -bild.de, bild.de -bilibili.corn, bilibili.com -bing.corn, bing.com -blackboard.corn, blackboard.com -blogspot.corn, blogspot.com -bloornberg.corn, bloomberg.com -booking.corn, booking.com -bookrnyshow.corn, bookmyshow.com -britannica.corn, britannica.com -bukalapak.corn, bukalapak.com -businessinsider.corn, businessinsider.com -bustle.corn, bustle.com -buzzfeed.corn, buzzfeed.com -ca.gov, ca.gov -caixa.gov.br, caixa.gov.br -canada.ca, canada.ca -canalrcn.corn, canalrcn.com -canva.corn, canva.com -capitalone.corn, capitalone.com -cargurus.corn, cargurus.com -cbsnews.corn, cbsnews.com -cbssports.corn, cbssports.com -ccrn.net, ccm.net -cdc.gov, cdc.gov -cdiscount.corn, cdiscount.com -ceneo.pl, ceneo.pl -chase.corn, chase.com -chaturbate.corn, chaturbate.com -chegg.corn, chegg.com -chinatirnes.corn, chinatimes.com -chip.de, chip.de -chron.corn, chron.com -cinecalidad.to, cinecalidad.to -clarin.corn, clarin.com -cnbc.corn, cnbc.com -cnet.corn, cnet.com -cnn.corn, cnn.com -collegedunia.corn, collegedunia.com -correios.corn.br, correios.com.br -craigslist.org, craigslist.org -cricbuzz.corn, cricbuzz.com -dailyrnail.co.uk, dailymail.co.uk -dailyrnotion.corn, dailymotion.com -daurn.net, daum.net -dell.corn, dell.com -depositphotos.corn, depositphotos.com -detik.corn, detik.com -deviantart.corn, deviantart.com -dhgate.corn, dhgate.com -dhl.de, dhl.de -dictionary.corn, dictionary.com -discordapp.corn, discordapp.com -drnrn.co.jp, dmm.co.jp -drnrn.corn, dmm.com -drorn.ru, drom.ru -dropbox.corn, dropbox.com -ebay-kleinanzeigen.de, ebay-kleinanzeigen.de -ebay.co.uk, ebay.co.uk -ebay.corn, ebay.com -ebay.corn.au, ebay.com.au -ebay.de, ebay.de -ebay.es, ebay.es -ebay.fr, ebay.fr -ebay.it, ebay.it -eksisozluk.corn, eksisozluk.com -elconfidencial.corn, elconfidencial.com -elintransigente.corn, elintransigente.com -elrnundo.es, elmundo.es -elpais.corn, elpais.com -eluniversal.corn.rnx, eluniversal.com.mx -ernag.ro, emag.ro -engadget.corn, engadget.com -epicgarnes.corn, epicgames.com -espn.corn, espn.com -etsy.corn, etsy.com -europa.eu, europa.eu -eventbrite.corn, eventbrite.com -excite.co.jp, excite.co.jp -expedia.corn, expedia.com -express.co.uk, express.co.uk -facebook.corn, facebook.com -fandango.corn, fandango.com -fandorn.corn, fandom.com -fast.corn, fast.com -fazenda.gov.br, fazenda.gov.br -fc2.corn, fc2.com -fedex.corn, fedex.com -filrnweb.pl, filmweb.pl -flickr.corn, flickr.com -flipkart.corn, flipkart.com -fnac.corn, fnac.com -forbes.corn, forbes.com -foxnews.corn, foxnews.com -francetvinfo.fr, francetvinfo.fr -free.fr, free.fr -gaana.corn, gaana.com -garnepedia.corn, gamepedia.com -garnespot.corn, gamespot.com -gearbest.corn, gearbest.com -genius.corn, genius.com -gfycat.corn, gfycat.com -giphy.corn, giphy.com -gisrneteo.ru, gismeteo.ru -github.corn, github.com -glassdoor.corn, glassdoor.com -globo.corn, globo.com -glosbe.corn, glosbe.com -go.corn, go.com -goal.corn, goal.com -gofundrne.corn, gofundme.com -goo.ne.jp, goo.ne.jp -goodreads.corn, goodreads.com -google.ca, google.ca -google.cl, google.cl -google.co.id, google.co.id -google.co.in, google.co.in -google.co.jp, google.co.jp -google.co.kr, google.co.kr -google.co.th, google.co.th -google.co.uk, google.co.uk -google.corn, google.com -google.corn.ar, google.com.ar -google.corn.au, google.com.au -google.corn.br, google.com.br -google.corn.co, google.com.co -google.corn.rnx, google.com.mx -google.corn.tr, google.com.tr -google.corn.tw, google.com.tw -google.de, google.de -google.es, google.es -google.fr, google.fr -google.it, google.it -google.nl, google.nl -google.pl, google.pl -google.pt, google.pt -google.ru, google.ru -gotporn.corn, gotporn.com -gsrnarena.corn, gsmarena.com -gurntree.corn, gumtree.com -haberler.corn, haberler.com -hatenablog.corn, hatenablog.com -hdfcbank.corn, hdfcbank.com -hdrezka.ag, hdrezka.ag -healthline.corn, healthline.com -hilton.corn, hilton.com -hindustantirnes.corn, hindustantimes.com -hornedepot.corn, homedepot.com -hotels.corn, hotels.com -hotpepper.jp, hotpepper.jp -hotstar.corn, hotstar.com -hp.corn, hp.com -huawei.corn, huawei.com -huffpost.corn, huffpost.com -hulu.corn, hulu.com -icicibank.corn, icicibank.com -icloud.corn, icloud.com -ign.corn, ign.com -ikea.corn, ikea.com -ilrneteo.it, ilmeteo.it -ilovepdf.corn, ilovepdf.com -irndb.corn, imdb.com -irngur.corn, imgur.com -irnpress.co.jp, impress.co.jp -indeed.corn, indeed.com -indiarnart.corn, indiamart.com -indiatirnes.corn, indiatimes.com -indiatoday.in, indiatoday.in -infobae.corn, infobae.com -instagrarn.corn, instagram.com -instructables.corn, instructables.com -instructure.corn, instructure.com -interia.pl, interia.pl -intuit.corn, intuit.com -investing.corn, investing.com -investopedia.corn, investopedia.com -irecornrnend.ru, irecommend.ru -irs.gov, irs.gov -itau.corn.br, itau.com.br -itrnedia.co.jp, itmedia.co.jp -jobrapido.corn, jobrapido.com -justdial.corn, justdial.com -jw.org, jw.org -kahoot.it, kahoot.it -kakaku.corn, kakaku.com -kakao.corn, kakao.com -kayak.corn, kayak.com -kenhl4.vn, kenh14.vn -khanacaderny.org, khanacademy.org -kinopoisk.ru, kinopoisk.ru -kizlarsoruyor.corn, kizlarsoruyor.com -kknews.cc, kknews.cc -kohls.corn, kohls.com -kornpas.corn, kompas.com -kotobank.jp, kotobank.jp -kp.ru, kp.ru -lazada.co.th, lazada.co.th -leagueoflegends.corn, leagueoflegends.com -leboncoin.fr, leboncoin.fr -lernonde.fr, lemonde.fr -lenta.ru, lenta.ru -letrnejerk.corn, letmejerk.com -lifewire.corn, lifewire.com -line.rne, line.me -linkedin.corn, linkedin.com -live.corn, live.com -livedoor.corn, livedoor.com -livedoor.jp, livedoor.jp -livejournal.corn, livejournal.com -livescore.corn, livescore.com -livestrong.corn, livestrong.com -rnail.ru, mail.ru -rnakernytrip.corn, makemytrip.com -rnalavida.corn, malavida.com -rnarca.corn, marca.com -rnawdoo3.corn, mawdoo3.com -rnayoclinic.org, mayoclinic.org -rneb.gov.tr, meb.gov.tr -rnediafire.corn, mediafire.com -rnediurn.corn, medium.com -rnega.nz, mega.nz -rnercadolibre.corn, mercadolibre.com -rnercadolibre.corn.ar, mercadolibre.com.ar -rnercadolibre.corn.co, mercadolibre.com.co -rnercadolibre.corn.rnx, mercadolibre.com.mx -rnercadolivre.corn.br, mercadolivre.com.br -rnercari.corn, mercari.com -rnessenger.corn, messenger.com -rni.corn, mi.com -rnicrosoft.corn, microsoft.com -rnicrosoftonline.corn, microsoftonline.com -rnilliyet.corn.tr, milliyet.com.tr -rnk.ru, mk.ru -rnlb.corn, mlb.com -rnobile.de, mobile.de -rnobileOl.corn, mobile01.com -rnonografias.corn, monografias.com -rnsn.corn, msn.com -rnundodeportivo.corn, mundodeportivo.com -rnyanirnelist.net, myanimelist.net -rnynavi.jp, mynavi.jp -rnynet.corn, mynet.com -nll.corn, n11.com -narnu.wiki, namu.wiki -naver.corn, naver.com -naver.jp, naver.jp -nba.corn, nba.com -nbcnews.corn, nbcnews.com -nbcsports.corn, nbcsports.com -ndtv.corn, ndtv.com -netflix.corn, netflix.com -newsl8.corn, news18.com -nextdoor.corn, nextdoor.com -nhk.or.jp, nhk.or.jp -nicovideo.jp, nicovideo.jp -nifty.corn, nifty.com -nih.gov, nih.gov -nike.corn, nike.com -nikkansports.corn, nikkansports.com -nikkei.corn, nikkei.com -nvidia.corn, nvidia.com -nypost.corn, nypost.com -nytirnes.corn, nytimes.com -office.corn, office.com -office365.corn, office365.com -ok.ru, ok.ru -okdiario.corn, okdiario.com -olx.co.id, olx.co.id -olx.corn.br, olx.com.br -olx.pl, olx.pl -olx.ua, olx.ua -onet.pl, onet.pl -onlinesbi.corn, onlinesbi.com -onlinevideoconverter.corn, onlinevideoconverter.com -op.gg, op.gg -orange.fr, orange.fr -otto.de, otto.de -otvfoco.corn.br, otvfoco.com.br -otzovik.corn, otzovik.com -over-blog.corn, over-blog.com -pantip.corn, pantip.com -passeidireto.corn, passeidireto.com -patreon.corn, patreon.com -paypal.corn, paypal.com -perfectgirls.net, perfectgirls.net -pinterest.co.uk, pinterest.co.uk -pinterest.corn, pinterest.com -pinterest.es, pinterest.es -pixiv.net, pixiv.net -pixnet.net, pixnet.net -playstation.corn, playstation.com -politico.corn, politico.com -polygon.corn, polygon.com -pornhub.corn, pornhub.com -prezi.corn, prezi.com -prirnevideo.corn, primevideo.com -prograrnrne-tv.net, programme-tv.net -qq.corn, qq.com -qualtrics.corn, qualtrics.com -quizlet.corn, quizlet.com -quora.corn, quora.com -rakuten.co.jp, rakuten.co.jp -rarnbler.ru, rambler.ru -ranker.corn, ranker.com -reddit.corn, reddit.com -redtube.corn, redtube.com -researchgate.net, researchgate.net -reverso.net, reverso.net -rg.ru, rg.ru -rightrnove.co.uk, rightmove.co.uk -roblox.corn, roblox.com -rottentornatoes.corn, rottentomatoes.com -rozetka.corn.ua, rozetka.com.ua -rt.corn, rt.com -sabah.corn.tr, sabah.com.tr -sahibinden.corn, sahibinden.com -sarnsung.corn, samsung.com -sanook.corn, sanook.com -sarkariresult.corn, sarkariresult.com -savefrorn.net, savefrom.net -sciencedirect.corn, sciencedirect.com -scribd.corn, scribd.com -sfgate.corn, sfgate.com -shopee.co.th, shopee.co.th -shutterstock.corn, shutterstock.com -sinoptik.ua, sinoptik.ua -sky.corn, sky.com -skyscanner.net, skyscanner.net -skysports.corn, skysports.com -slideshare.net, slideshare.net -srnallpdf.corn, smallpdf.com -srni2.ru, smi2.ru -softonic.corn, softonic.com -soundcloud.corn, soundcloud.com -sozcu.corn.tr, sozcu.com.tr -spankbang.corn, spankbang.com -speedtest.net, speedtest.net -spiegel.de, spiegel.de -spotify.corn, spotify.com -sputniknews.corn, sputniknews.com -stackexchange.corn, stackexchange.com -stackoverflow.corn, stackoverflow.com -stearncornrnunity.corn, steamcommunity.com -stearnpowered.corn, steampowered.com -study.corn, study.com -surveyrnonkey.corn, surveymonkey.com -t-online.de, t-online.de -tabelog.corn, tabelog.com -taleo.net, taleo.net -taobao.corn, taobao.com -target.corn, target.com -techradar.corn, techradar.com -techtudo.corn.br, techtudo.com.br -telegraph.co.uk, telegraph.co.uk -terrnornetropolitico.it, termometropolitico.it -thebalancecareers.corn, thebalancecareers.com -thedailybeast.corn, thedailybeast.com -thefreedictionary.corn, thefreedictionary.com -theguardian.corn, theguardian.com -thepiratebay.org, thepiratebay.org -thesaurus.corn, thesaurus.com -theverge.corn, theverge.com -thoughtco.corn, thoughtco.com -tirn.it, tim.it -tistory.corn, tistory.com -tnaflix.corn, tnaflix.com -tokopedia.corn, tokopedia.com -trello.corn, trello.com -tribunnews.corn, tribunnews.com -tripadvisor.corn, tripadvisor.com -trulia.corn, trulia.com -trustpilot.corn, trustpilot.com -turnblr.corn, tumblr.com -tutorialspoint.corn, tutorialspoint.com -twitch.tv, twitch.tv -twitter.corn, twitter.com -uber.corn, uber.com -ubisoft.corn, ubisoft.com -uderny.corn, udemy.com -udn.corn, udn.com -ultirnate-guitar.corn, ultimate-guitar.com -unarn.rnx, unam.mx -uniqlo.corn, uniqlo.com -uol.corn.br, uol.com.br -ups.corn, ups.com -uptodown.corn, uptodown.com -urbandictionary.corn, urbandictionary.com -usatoday.corn, usatoday.com -usps.corn, usps.com -vice.corn, vice.com -virneo.corn, vimeo.com -vk.corn, vk.com -vnexpress.net, vnexpress.net -w3schools.corn, w3schools.com -walrnart.corn, walmart.com -washingtonpost.corn, washingtonpost.com -wattpad.corn, wattpad.com -weather.corn, weather.com -web.de, web.de -webrnd.corn, webmd.com -weebly.corn, weebly.com -wellsfargo.corn, wellsfargo.com -wetransfer.corn, wetransfer.com -whatsapp.corn, whatsapp.com -wikihow.corn, wikihow.com -wikipedia.org, wikipedia.org -wiktionary.org, wiktionary.org -wish.corn, wish.com -wixsite.corn, wixsite.com -wondershare.corn, wondershare.com -wordpress.corn, wordpress.com -wordreference.corn, wordreference.com -wp.pl, wp.pl -wsj.corn, wsj.com -www.gob.rnx, www.gob.mx -www.gov.uk, www.gov.uk -xbox.corn, xbox.com -xe.corn, xe.com -xfinity.corn, xfinity.com -xharnster.corn, xhamster.com -xnxx.corn, xnxx.com -xnxx2.pro, xnxx2.pro -xozilla.corn, xozilla.com -xvideos.corn, xvideos.com -y2rnate.corn, y2mate.com -yahoo.co.jp, yahoo.co.jp -yahoo.corn, yahoo.com -yandex.ru, yandex.ru -yelp.corn, yelp.com -yenisafak.corn, yenisafak.com -youjizz.corn, youjizz.com -yourn7.corn, youm7.com -youporn.corn, youporn.com -yourdictionary.corn, yourdictionary.com -youtube.corn, youtube.com -yts.arn, yts.am -zendesk.corn, zendesk.com -zillow.corn, zillow.com -zing.vn, zing.vn -znanija.corn, znanija.com -zornato.corn, zomato.com +l7track.net, l7tracknet, 17track.net +24h.corn.vn, 24hcornvn, 24h.com.vn +acadernia.edu, acaderniaedu, academia.edu +acadernic.ru, acadernicru, academic.ru +accuweather.corn, accuweathercorn, accuweather.com +adobe.corn, adobecorn, adobe.com +adp.corn, adpcorn, adp.com +agoda.corn, agodacorn, agoda.com +aif.ru, aifru, aif.ru +airbnb.corn, airbnbcorn, airbnb.com +alibaba.corn, alibabacorn, alibaba.com +aliexpress.corn, aliexpresscorn, aliexpress.com +allegro.pl, allegropl, allegro.pl +allocine.fr, allocinefr, allocine.fr +alodokter.corn, alodoktercorn, alodokter.com +arnazon.ca, arnazonca, amazon.ca +arnazon.co.jp, arnazoncojp, amazon.co.jp +arnazon.co.uk, arnazoncouk, amazon.co.uk +arnazon.corn, arnazoncorn, amazon.com +arnazon.corn.rnx, arnazoncornrnx, amazon.com.mx +arnazon.de, arnazonde, amazon.de +arnazon.es, arnazones, amazon.es +arnazon.fr, arnazonfr, amazon.fr +arnazon.in, arnazonin, amazon.in +arnazon.it, arnazonit, amazon.it +arneblo.jp, arneblojp, ameblo.jp +arnericanexpress.corn, arnericanexpresscorn, americanexpress.com +ancestry.corn, ancestrycorn, ancestry.com +anirneflv.net, anirneflvnet, animeflv.net +anyxxx.pro, anyxxxpro, anyxxx.pro +aol.corn, aolcorn, aol.com +apkpure.corn, apkpurecorn, apkpure.com +apple.corn, applecorn, apple.com +appledaily.corn, appledailycorn, appledaily.com +archive.org, archiveorg, archive.org +asahi.corn, asahicorn, asahi.com +ask.corn, askcorn, ask.com +asos.corn, asoscorn, asos.com +asus.corn, asuscorn, asus.com +autotrader.corn, autotradercorn, autotrader.com +avito.ru, avitoru, avito.ru +azlyrics.corn, azlyricscorn, azlyrics.com +badoo.corn, badoocorn, badoo.com +baidu.corn, baiducorn, baidu.com +baixaki.corn.br, baixakicornbr, baixaki.com.br +banggood.corn, banggoodcorn, banggood.com +bankofarnerica.corn, bankofarnericacorn, bankofamerica.com +battle.net, battlenet, battle.net +bbc.co.uk, bbccouk, bbc.co.uk +bbc.corn, bbccorn, bbc.com +beeg.corn, beegcorn, beeg.com +bestbuy.corn, bestbuycorn, bestbuy.com +bet365.corn, bet365corn, bet365.com +biblegateway.corn, biblegatewaycorn, biblegateway.com +biglobe.ne.jp, biglobenejp, biglobe.ne.jp +bild.de, bildde, bild.de +bilibili.corn, bilibilicorn, bilibili.com +bing.corn, bingcorn, bing.com +blackboard.corn, blackboardcorn, blackboard.com +blogspot.corn, blogspotcorn, blogspot.com +bloornberg.corn, bloornbergcorn, bloomberg.com +booking.corn, bookingcorn, booking.com +bookrnyshow.corn, bookrnyshowcorn, bookmyshow.com +britannica.corn, britannicacorn, britannica.com +bukalapak.corn, bukalapakcorn, bukalapak.com +businessinsider.corn, businessinsidercorn, businessinsider.com +bustle.corn, bustlecorn, bustle.com +buzzfeed.corn, buzzfeedcorn, buzzfeed.com +ca.gov, cagov, ca.gov +caixa.gov.br, caixagovbr, caixa.gov.br +canada.ca, canadaca, canada.ca +canalrcn.corn, canalrcncorn, canalrcn.com +canva.corn, canvacorn, canva.com +capitalone.corn, capitalonecorn, capitalone.com +cargurus.corn, carguruscorn, cargurus.com +cbsnews.corn, cbsnewscorn, cbsnews.com +cbssports.corn, cbssportscorn, cbssports.com +ccrn.net, ccrnnet, ccm.net +cdc.gov, cdcgov, cdc.gov +cdiscount.corn, cdiscountcorn, cdiscount.com +ceneo.pl, ceneopl, ceneo.pl +chase.corn, chasecorn, chase.com +chaturbate.corn, chaturbatecorn, chaturbate.com +chegg.corn, cheggcorn, chegg.com +chinatirnes.corn, chinatirnescorn, chinatimes.com +chip.de, chipde, chip.de +chron.corn, chroncorn, chron.com +cinecalidad.to, cinecalidadto, cinecalidad.to +clarin.corn, clarincorn, clarin.com +cnbc.corn, cnbccorn, cnbc.com +cnet.corn, cnetcorn, cnet.com +cnn.corn, cnncorn, cnn.com +collegedunia.corn, collegeduniacorn, collegedunia.com +correios.corn.br, correioscornbr, correios.com.br +craigslist.org, craigslistorg, craigslist.org +cricbuzz.corn, cricbuzzcorn, cricbuzz.com +dailyrnail.co.uk, dailyrnailcouk, dailymail.co.uk +dailyrnotion.corn, dailyrnotioncorn, dailymotion.com +daurn.net, daurnnet, daum.net +dell.corn, dellcorn, dell.com +depositphotos.corn, depositphotoscorn, depositphotos.com +detik.corn, detikcorn, detik.com +deviantart.corn, deviantartcorn, deviantart.com +dhgate.corn, dhgatecorn, dhgate.com +dhl.de, dhlde, dhl.de +dictionary.corn, dictionarycorn, dictionary.com +discordapp.corn, discordappcorn, discordapp.com +drnrn.co.jp, drnrncojp, dmm.co.jp +drnrn.corn, drnrncorn, dmm.com +drorn.ru, drornru, drom.ru +dropbox.corn, dropboxcorn, dropbox.com +ebay-kleinanzeigen.de, ebaykleinanzeigende, ebay-kleinanzeigen.de +ebay.co.uk, ebaycouk, ebay.co.uk +ebay.corn, ebaycorn, ebay.com +ebay.corn.au, ebaycornau, ebay.com.au +ebay.de, ebayde, ebay.de +ebay.es, ebayes, ebay.es +ebay.fr, ebayfr, ebay.fr +ebay.it, ebayit, ebay.it +eksisozluk.corn, eksisozlukcorn, eksisozluk.com +elconfidencial.corn, elconfidencialcorn, elconfidencial.com +elintransigente.corn, elintransigentecorn, elintransigente.com +elrnundo.es, elrnundoes, elmundo.es +elpais.corn, elpaiscorn, elpais.com +eluniversal.corn.rnx, eluniversalcornrnx, eluniversal.com.mx +ernag.ro, ernagro, emag.ro +engadget.corn, engadgetcorn, engadget.com +epicgarnes.corn, epicgarnescorn, epicgames.com +espn.corn, espncorn, espn.com +etsy.corn, etsycorn, etsy.com +europa.eu, europaeu, europa.eu +eventbrite.corn, eventbritecorn, eventbrite.com +excite.co.jp, excitecojp, excite.co.jp +expedia.corn, expediacorn, expedia.com +express.co.uk, expresscouk, express.co.uk +facebook.corn, facebookcorn, facebook.com +fandango.corn, fandangocorn, fandango.com +fandorn.corn, fandorncorn, fandom.com +fast.corn, fastcorn, fast.com +fazenda.gov.br, fazendagovbr, fazenda.gov.br +fc2.corn, fc2corn, fc2.com +fedex.corn, fedexcorn, fedex.com +filrnweb.pl, filrnwebpl, filmweb.pl +flickr.corn, flickrcorn, flickr.com +flipkart.corn, flipkartcorn, flipkart.com +fnac.corn, fnaccorn, fnac.com +forbes.corn, forbescorn, forbes.com +foxnews.corn, foxnewscorn, foxnews.com +francetvinfo.fr, francetvinfofr, francetvinfo.fr +free.fr, freefr, free.fr +gaana.corn, gaanacorn, gaana.com +garnepedia.corn, garnepediacorn, gamepedia.com +garnespot.corn, garnespotcorn, gamespot.com +gearbest.corn, gearbestcorn, gearbest.com +genius.corn, geniuscorn, genius.com +gfycat.corn, gfycatcorn, gfycat.com +giphy.corn, giphycorn, giphy.com +gisrneteo.ru, gisrneteoru, gismeteo.ru +github.corn, githubcorn, github.com +glassdoor.corn, glassdoorcorn, glassdoor.com +globo.corn, globocorn, globo.com +glosbe.corn, glosbecorn, glosbe.com +go.corn, gocorn, go.com +goal.corn, goalcorn, goal.com +gofundrne.corn, gofundrnecorn, gofundme.com +goo.ne.jp, goonejp, goo.ne.jp +goodreads.corn, goodreadscorn, goodreads.com +google.ca, googleca, google.ca +google.cl, googlecl, google.cl +google.co.id, googlecoid, google.co.id +google.co.in, googlecoin, google.co.in +google.co.jp, googlecojp, google.co.jp +google.co.kr, googlecokr, google.co.kr +google.co.th, googlecoth, google.co.th +google.co.uk, googlecouk, google.co.uk +google.corn, googlecorn, google.com +google.corn.ar, googlecornar, google.com.ar +google.corn.au, googlecornau, google.com.au +google.corn.br, googlecornbr, google.com.br +google.corn.co, googlecornco, google.com.co +google.corn.rnx, googlecornrnx, google.com.mx +google.corn.tr, googlecorntr, google.com.tr +google.corn.tw, googlecorntw, google.com.tw +google.de, googlede, google.de +google.es, googlees, google.es +google.fr, googlefr, google.fr +google.it, googleit, google.it +google.nl, googlenl, google.nl +google.pl, googlepl, google.pl +google.pt, googlept, google.pt +google.ru, googleru, google.ru +gotporn.corn, gotporncorn, gotporn.com +gsrnarena.corn, gsrnarenacorn, gsmarena.com +gurntree.corn, gurntreecorn, gumtree.com +haberler.corn, haberlercorn, haberler.com +hatenablog.corn, hatenablogcorn, hatenablog.com +hdfcbank.corn, hdfcbankcorn, hdfcbank.com +hdrezka.ag, hdrezkaag, hdrezka.ag +healthline.corn, healthlinecorn, healthline.com +hilton.corn, hiltoncorn, hilton.com +hindustantirnes.corn, hindustantirnescorn, hindustantimes.com +hornedepot.corn, hornedepotcorn, homedepot.com +hotels.corn, hotelscorn, hotels.com +hotpepper.jp, hotpepperjp, hotpepper.jp +hotstar.corn, hotstarcorn, hotstar.com +hp.corn, hpcorn, hp.com +huawei.corn, huaweicorn, huawei.com +huffpost.corn, huffpostcorn, huffpost.com +hulu.corn, hulucorn, hulu.com +icicibank.corn, icicibankcorn, icicibank.com +icloud.corn, icloudcorn, icloud.com +ign.corn, igncorn, ign.com +ikea.corn, ikeacorn, ikea.com +ilrneteo.it, ilrneteoit, ilmeteo.it +ilovepdf.corn, ilovepdfcorn, ilovepdf.com +irndb.corn, irndbcorn, imdb.com +irngur.corn, irngurcorn, imgur.com +irnpress.co.jp, irnpresscojp, impress.co.jp +indeed.corn, indeedcorn, indeed.com +indiarnart.corn, indiarnartcorn, indiamart.com +indiatirnes.corn, indiatirnescorn, indiatimes.com +indiatoday.in, indiatodayin, indiatoday.in +infobae.corn, infobaecorn, infobae.com +instagrarn.corn, instagrarncorn, instagram.com +instructables.corn, instructablescorn, instructables.com +instructure.corn, instructurecorn, instructure.com +interia.pl, interiapl, interia.pl +intuit.corn, intuitcorn, intuit.com +investing.corn, investingcorn, investing.com +investopedia.corn, investopediacorn, investopedia.com +irecornrnend.ru, irecornrnendru, irecommend.ru +irs.gov, irsgov, irs.gov +itau.corn.br, itaucornbr, itau.com.br +itrnedia.co.jp, itrnediacojp, itmedia.co.jp +jobrapido.corn, jobrapidocorn, jobrapido.com +justdial.corn, justdialcorn, justdial.com +jw.org, jworg, jw.org +kahoot.it, kahootit, kahoot.it +kakaku.corn, kakakucorn, kakaku.com +kakao.corn, kakaocorn, kakao.com +kayak.corn, kayakcorn, kayak.com +kenhl4.vn, kenhl4vn, kenh14.vn +khanacaderny.org, khanacadernyorg, khanacademy.org +kinopoisk.ru, kinopoiskru, kinopoisk.ru +kizlarsoruyor.corn, kizlarsoruyorcorn, kizlarsoruyor.com +kknews.cc, kknewscc, kknews.cc +kohls.corn, kohlscorn, kohls.com +kornpas.corn, kornpascorn, kompas.com +kotobank.jp, kotobankjp, kotobank.jp +kp.ru, kpru, kp.ru +lazada.co.th, lazadacoth, lazada.co.th +leagueoflegends.corn, leagueoflegendscorn, leagueoflegends.com +leboncoin.fr, leboncoinfr, leboncoin.fr +lernonde.fr, lernondefr, lemonde.fr +lenta.ru, lentaru, lenta.ru +letrnejerk.corn, letrnejerkcorn, letmejerk.com +lifewire.corn, lifewirecorn, lifewire.com +line.rne, linerne, line.me +linkedin.corn, linkedincorn, linkedin.com +live.corn, livecorn, live.com +livedoor.corn, livedoorcorn, livedoor.com +livedoor.jp, livedoorjp, livedoor.jp +livejournal.corn, livejournalcorn, livejournal.com +livescore.corn, livescorecorn, livescore.com +livestrong.corn, livestrongcorn, livestrong.com +rnail.ru, rnailru, mail.ru +rnakernytrip.corn, rnakernytripcorn, makemytrip.com +rnalavida.corn, rnalavidacorn, malavida.com +rnarca.corn, rnarcacorn, marca.com +rnawdoo3.corn, rnawdoo3corn, mawdoo3.com +rnayoclinic.org, rnayoclinicorg, mayoclinic.org +rneb.gov.tr, rnebgovtr, meb.gov.tr +rnediafire.corn, rnediafirecorn, mediafire.com +rnediurn.corn, rnediurncorn, medium.com +rnega.nz, rneganz, mega.nz +rnercadolibre.corn, rnercadolibrecorn, mercadolibre.com +rnercadolibre.corn.ar, rnercadolibrecornar, mercadolibre.com.ar +rnercadolibre.corn.co, rnercadolibrecornco, mercadolibre.com.co +rnercadolibre.corn.rnx, rnercadolibrecornrnx, mercadolibre.com.mx +rnercadolivre.corn.br, rnercadolivrecornbr, mercadolivre.com.br +rnercari.corn, rnercaricorn, mercari.com +rnessenger.corn, rnessengercorn, messenger.com +rni.corn, rnicorn, mi.com +rnicrosoft.corn, rnicrosoftcorn, microsoft.com +rnicrosoftonline.corn, rnicrosoftonlinecorn, microsoftonline.com +rnilliyet.corn.tr, rnilliyetcorntr, milliyet.com.tr +rnk.ru, rnkru, mk.ru +rnlb.corn, rnlbcorn, mlb.com +rnobile.de, rnobilede, mobile.de +rnobileOl.corn, rnobileOlcorn, mobile01.com +rnonografias.corn, rnonografiascorn, monografias.com +rnsn.corn, rnsncorn, msn.com +rnundodeportivo.corn, rnundodeportivocorn, mundodeportivo.com +rnyanirnelist.net, rnyanirnelistnet, myanimelist.net +rnynavi.jp, rnynavijp, mynavi.jp +rnynet.corn, rnynetcorn, mynet.com +nll.corn, nllcorn, n11.com +narnu.wiki, narnuwiki, namu.wiki +naver.corn, navercorn, naver.com +naver.jp, naverjp, naver.jp +nba.corn, nbacorn, nba.com +nbcnews.corn, nbcnewscorn, nbcnews.com +nbcsports.corn, nbcsportscorn, nbcsports.com +ndtv.corn, ndtvcorn, ndtv.com +netflix.corn, netflixcorn, netflix.com +newsl8.corn, newsl8corn, news18.com +nextdoor.corn, nextdoorcorn, nextdoor.com +nhk.or.jp, nhkorjp, nhk.or.jp +nicovideo.jp, nicovideojp, nicovideo.jp +nifty.corn, niftycorn, nifty.com +nih.gov, nihgov, nih.gov +nike.corn, nikecorn, nike.com +nikkansports.corn, nikkansportscorn, nikkansports.com +nikkei.corn, nikkeicorn, nikkei.com +nvidia.corn, nvidiacorn, nvidia.com +nypost.corn, nypostcorn, nypost.com +nytirnes.corn, nytirnescorn, nytimes.com +office.corn, officecorn, office.com +office365.corn, office365corn, office365.com +ok.ru, okru, ok.ru +okdiario.corn, okdiariocorn, okdiario.com +olx.co.id, olxcoid, olx.co.id +olx.corn.br, olxcornbr, olx.com.br +olx.pl, olxpl, olx.pl +olx.ua, olxua, olx.ua +onet.pl, onetpl, onet.pl +onlinesbi.corn, onlinesbicorn, onlinesbi.com +onlinevideoconverter.corn, onlinevideoconvertercorn, onlinevideoconverter.com +op.gg, opgg, op.gg +orange.fr, orangefr, orange.fr +otto.de, ottode, otto.de +otvfoco.corn.br, otvfococornbr, otvfoco.com.br +otzovik.corn, otzovikcorn, otzovik.com +over-blog.corn, overblogcorn, over-blog.com +pantip.corn, pantipcorn, pantip.com +passeidireto.corn, passeidiretocorn, passeidireto.com +patreon.corn, patreoncorn, patreon.com +paypal.corn, paypalcorn, paypal.com +perfectgirls.net, perfectgirlsnet, perfectgirls.net +pinterest.co.uk, pinterestcouk, pinterest.co.uk +pinterest.corn, pinterestcorn, pinterest.com +pinterest.es, pinterestes, pinterest.es +pixiv.net, pixivnet, pixiv.net +pixnet.net, pixnetnet, pixnet.net +playstation.corn, playstationcorn, playstation.com +politico.corn, politicocorn, politico.com +polygon.corn, polygoncorn, polygon.com +pornhub.corn, pornhubcorn, pornhub.com +prezi.corn, prezicorn, prezi.com +prirnevideo.corn, prirnevideocorn, primevideo.com +prograrnrne-tv.net, prograrnrnetvnet, programme-tv.net +qq.corn, qqcorn, qq.com +qualtrics.corn, qualtricscorn, qualtrics.com +quizlet.corn, quizletcorn, quizlet.com +quora.corn, quoracorn, quora.com +rakuten.co.jp, rakutencojp, rakuten.co.jp +rarnbler.ru, rarnblerru, rambler.ru +ranker.corn, rankercorn, ranker.com +reddit.corn, redditcorn, reddit.com +redtube.corn, redtubecorn, redtube.com +researchgate.net, researchgatenet, researchgate.net +reverso.net, reversonet, reverso.net +rg.ru, rgru, rg.ru +rightrnove.co.uk, rightrnovecouk, rightmove.co.uk +roblox.corn, robloxcorn, roblox.com +rottentornatoes.corn, rottentornatoescorn, rottentomatoes.com +rozetka.corn.ua, rozetkacornua, rozetka.com.ua +rt.corn, rtcorn, rt.com +sabah.corn.tr, sabahcorntr, sabah.com.tr +sahibinden.corn, sahibindencorn, sahibinden.com +sarnsung.corn, sarnsungcorn, samsung.com +sanook.corn, sanookcorn, sanook.com +sarkariresult.corn, sarkariresultcorn, sarkariresult.com +savefrorn.net, savefrornnet, savefrom.net +sciencedirect.corn, sciencedirectcorn, sciencedirect.com +scribd.corn, scribdcorn, scribd.com +sfgate.corn, sfgatecorn, sfgate.com +shopee.co.th, shopeecoth, shopee.co.th +shutterstock.corn, shutterstockcorn, shutterstock.com +sinoptik.ua, sinoptikua, sinoptik.ua +sky.corn, skycorn, sky.com +skyscanner.net, skyscannernet, skyscanner.net +skysports.corn, skysportscorn, skysports.com +slideshare.net, slidesharenet, slideshare.net +srnallpdf.corn, srnallpdfcorn, smallpdf.com +srni2.ru, srni2ru, smi2.ru +softonic.corn, softoniccorn, softonic.com +soundcloud.corn, soundcloudcorn, soundcloud.com +sozcu.corn.tr, sozcucorntr, sozcu.com.tr +spankbang.corn, spankbangcorn, spankbang.com +speedtest.net, speedtestnet, speedtest.net +spiegel.de, spiegelde, spiegel.de +spotify.corn, spotifycorn, spotify.com +sputniknews.corn, sputniknewscorn, sputniknews.com +stackexchange.corn, stackexchangecorn, stackexchange.com +stackoverflow.corn, stackoverflowcorn, stackoverflow.com +stearncornrnunity.corn, stearncornrnunitycorn, steamcommunity.com +stearnpowered.corn, stearnpoweredcorn, steampowered.com +study.corn, studycorn, study.com +surveyrnonkey.corn, surveyrnonkeycorn, surveymonkey.com +t-online.de, tonlinede, t-online.de +tabelog.corn, tabelogcorn, tabelog.com +taleo.net, taleonet, taleo.net +taobao.corn, taobaocorn, taobao.com +target.corn, targetcorn, target.com +techradar.corn, techradarcorn, techradar.com +techtudo.corn.br, techtudocornbr, techtudo.com.br +telegraph.co.uk, telegraphcouk, telegraph.co.uk +terrnornetropolitico.it, terrnornetropoliticoit, termometropolitico.it +thebalancecareers.corn, thebalancecareerscorn, thebalancecareers.com +thedailybeast.corn, thedailybeastcorn, thedailybeast.com +thefreedictionary.corn, thefreedictionarycorn, thefreedictionary.com +theguardian.corn, theguardiancorn, theguardian.com +thepiratebay.org, thepiratebayorg, thepiratebay.org +thesaurus.corn, thesauruscorn, thesaurus.com +theverge.corn, thevergecorn, theverge.com +thoughtco.corn, thoughtcocorn, thoughtco.com +tirn.it, tirnit, tim.it +tistory.corn, tistorycorn, tistory.com +tnaflix.corn, tnaflixcorn, tnaflix.com +tokopedia.corn, tokopediacorn, tokopedia.com +trello.corn, trellocorn, trello.com +tribunnews.corn, tribunnewscorn, tribunnews.com +tripadvisor.corn, tripadvisorcorn, tripadvisor.com +trulia.corn, truliacorn, trulia.com +trustpilot.corn, trustpilotcorn, trustpilot.com +turnblr.corn, turnblrcorn, tumblr.com +tutorialspoint.corn, tutorialspointcorn, tutorialspoint.com +twitch.tv, twitchtv, twitch.tv +twitter.corn, twittercorn, twitter.com +uber.corn, ubercorn, uber.com +ubisoft.corn, ubisoftcorn, ubisoft.com +uderny.corn, udernycorn, udemy.com +udn.corn, udncorn, udn.com +ultirnate-guitar.corn, ultirnateguitarcorn, ultimate-guitar.com +unarn.rnx, unarnrnx, unam.mx +uniqlo.corn, uniqlocorn, uniqlo.com +uol.corn.br, uolcornbr, uol.com.br +ups.corn, upscorn, ups.com +uptodown.corn, uptodowncorn, uptodown.com +urbandictionary.corn, urbandictionarycorn, urbandictionary.com +usatoday.corn, usatodaycorn, usatoday.com +usps.corn, uspscorn, usps.com +vice.corn, vicecorn, vice.com +virneo.corn, virneocorn, vimeo.com +vk.corn, vkcorn, vk.com +vnexpress.net, vnexpressnet, vnexpress.net +w3schools.corn, w3schoolscorn, w3schools.com +walrnart.corn, walrnartcorn, walmart.com +washingtonpost.corn, washingtonpostcorn, washingtonpost.com +wattpad.corn, wattpadcorn, wattpad.com +weather.corn, weathercorn, weather.com +web.de, webde, web.de +webrnd.corn, webrndcorn, webmd.com +weebly.corn, weeblycorn, weebly.com +wellsfargo.corn, wellsfargocorn, wellsfargo.com +wetransfer.corn, wetransfercorn, wetransfer.com +whatsapp.corn, whatsappcorn, whatsapp.com +wikihow.corn, wikihowcorn, wikihow.com +wikipedia.org, wikipediaorg, wikipedia.org +wiktionary.org, wiktionaryorg, wiktionary.org +wish.corn, wishcorn, wish.com +wixsite.corn, wixsitecorn, wixsite.com +wondershare.corn, wondersharecorn, wondershare.com +wordpress.corn, wordpresscorn, wordpress.com +wordreference.corn, wordreferencecorn, wordreference.com +wp.pl, wppl, wp.pl +wsj.corn, wsjcorn, wsj.com +www.gob.rnx, wwwgobrnx, www.gob.mx +www.gov.uk, wwwgovuk, www.gov.uk +xbox.corn, xboxcorn, xbox.com +xe.corn, xecorn, xe.com +xfinity.corn, xfinitycorn, xfinity.com +xharnster.corn, xharnstercorn, xhamster.com +xnxx.corn, xnxxcorn, xnxx.com +xnxx2.pro, xnxx2pro, xnxx2.pro +xozilla.corn, xozillacorn, xozilla.com +xvideos.corn, xvideoscorn, xvideos.com +y2rnate.corn, y2rnatecorn, y2mate.com +yahoo.co.jp, yahoocojp, yahoo.co.jp +yahoo.corn, yahoocorn, yahoo.com +yandex.ru, yandexru, yandex.ru +yelp.corn, yelpcorn, yelp.com +yenisafak.corn, yenisafakcorn, yenisafak.com +youjizz.corn, youjizzcorn, youjizz.com +yourn7.corn, yourn7corn, youm7.com +youporn.corn, youporncorn, youporn.com +yourdictionary.corn, yourdictionarycorn, yourdictionary.com +youtube.corn, youtubecorn, youtube.com +yts.arn, ytsarn, yts.am +zendesk.corn, zendeskcorn, zendesk.com +zillow.corn, zillowcorn, zillow.com +zing.vn, zingvn, zing.vn +znanija.corn, znanijacorn, znanija.com +zornato.corn, zornatocorn, zomato.com ###END_TOP_500### Olnet.corn, 01net.com lOOO.rnenu, 1000.menu diff --git a/chromium/components/url_formatter/spoof_checks/top_domains/make_top_domain_skeletons.cc b/chromium/components/url_formatter/spoof_checks/top_domains/make_top_domain_skeletons.cc index e86ef84a31c..dd10ee1a667 100644 --- a/chromium/components/url_formatter/spoof_checks/top_domains/make_top_domain_skeletons.cc +++ b/chromium/components/url_formatter/spoof_checks/top_domains/make_top_domain_skeletons.cc @@ -13,8 +13,10 @@ #include "base/files/file_path.h" #include "base/files/file_util.h" #include "base/i18n/icu_util.h" +#include "base/logging.h" #include "base/numerics/safe_conversions.h" #include "base/path_service.h" +#include "base/strings/string16.h" #include "base/strings/string_split.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" @@ -44,6 +46,31 @@ bool WriteToFile(const std::string& content, base::StringPiece basename) { return succeeded; } +std::string GenerateTop500OutputLine(const Skeletons& skeletons, + const Skeletons& no_separators_skeletons, + const std::string& domain) { + std::string output; + for (const std::string& skeleton : skeletons) { + for (const std::string& no_separators_skeleton : no_separators_skeletons) { + DCHECK(!skeleton.empty()) << "Empty skeleton for " << domain; + DCHECK(!no_separators_skeleton.empty()) + << "Empty without separator skeleton for " << domain; + output += skeleton + ", " + no_separators_skeleton + ", " + domain + "\n"; + } + } + return output; +} + +std::string GenerateTop5kOutputLine(const Skeletons& skeletons, + const std::string& domain) { + std::string output; + for (const std::string& skeleton : skeletons) { + DCHECK(!skeleton.empty()) << "Empty skeleton for " << domain; + output += skeleton + ", " + domain + "\n"; + } + return output; +} + int GenerateSkeletons(const char* input_file_name, const char* output_file_name, const USpoofChecker* spoof_checker) { @@ -55,6 +82,10 @@ int GenerateSkeletons(const char* input_file_name, return 1; } + // These characters are used to separate labels in a hostname. We generate + // skeletons of top 500 domains without these separators as well. These + // skeletons could be used in lookalike heuristics such as Target Embedding. + base::string16 kLabelSeparators = base::UTF8ToUTF16(".-"); std::stringstream input(input_content); std::string output = R"(# Copyright 2018 The Chromium Authors. All rights reserved. @@ -65,6 +96,15 @@ int GenerateSkeletons(const char* input_file_name, # components/url_formatter/spoof_checks/make_top_domain_skeletons.cc # DO NOT MANUALLY EDIT! +# This list contains top 500 domains followed by the top 5000 domains. These are +# separated by ###END_TOP_500### line. + +# For top 500 domains, each row has three columns: full skeleton, skeleton +# without label separators (e.g. '.' and '-'), and the domain itself. + +# For top 5000 domains, each row has two columns: full skeleton and the domain +# itself. + # Each entry is the skeleton of a top domain for the confusability check # in components/url_formatter/url_formatter.cc. @@ -75,11 +115,13 @@ int GenerateSkeletons(const char* input_file_name, std::string domain; size_t max_labels = 0; std::string domain_with_max_labels; + bool is_top_500 = true; while (std::getline(input, domain)) { base::TrimWhitespaceASCII(domain, base::TRIM_ALL, &domain); if (domain == kTop500Separator) { output += std::string(kTop500Separator) + "\n"; + is_top_500 = false; continue; } @@ -90,9 +132,21 @@ int GenerateSkeletons(const char* input_file_name, const Skeletons skeletons = skeleton_generator.GetSkeletons(domain16); DCHECK(!skeletons.empty()) << "Failed to generate skeletons of " << domain; - for (const std::string& skeleton : skeletons) { - DCHECK(!skeleton.empty()) << "Empty skeleton for " << domain; - output += skeleton + ", " + domain + "\n"; + // Generate skeletons for domains without their separators (e.g. googlecom). + // These skeletons are used in target embedding lookalikes. + base::string16 domain16_with_no_separators; + base::ReplaceChars(domain16, kLabelSeparators, base::string16(), + &domain16_with_no_separators); + const Skeletons no_separators_skeletons = + skeleton_generator.GetSkeletons(domain16_with_no_separators); + DCHECK(!no_separators_skeletons.empty()) + << "No skeletons generated for " << domain16_with_no_separators; + + if (is_top_500) { + output += + GenerateTop500OutputLine(skeletons, no_separators_skeletons, domain); + } else { + output += GenerateTop5kOutputLine(skeletons, domain); } std::vector<base::StringPiece> labels = base::SplitStringPiece( diff --git a/chromium/components/url_formatter/spoof_checks/top_domains/test_domains.skeletons b/chromium/components/url_formatter/spoof_checks/top_domains/test_domains.skeletons index ddee4c7855d..9486f6b782c 100644 --- a/chromium/components/url_formatter/spoof_checks/top_domains/test_domains.skeletons +++ b/chromium/components/url_formatter/spoof_checks/top_domains/test_domains.skeletons @@ -6,10 +6,19 @@ # components/url_formatter/spoof_checks/make_top_domain_skeletons.cc # DO NOT MANUALLY EDIT! +# This list contains top 500 domains followed by the top 5000 domains. These are +# separated by ###END_TOP_500### line. + +# For top 500 domains, each row has three columns: full skeleton, skeleton +# without label separators (e.g. '.' and '-'), and the domain itself. + +# For top 5000 domains, each row has two columns: full skeleton and the domain +# itself. + # Each entry is the skeleton of a top domain for the confusability check # in components/url_formatter/url_formatter.cc. -d4OOO.corn, d4000.com +d4OOO.corn, d4OOOcorn, d4000.com ###END_TOP_500### digklrno68.corn, digklmo68.com digklrno68.co.uk, digklmo68.co.uk diff --git a/chromium/components/url_formatter/spoof_checks/top_domains/top_domain_generator.cc b/chromium/components/url_formatter/spoof_checks/top_domains/top_domain_generator.cc index ad36fade603..ced7b384996 100644 --- a/chromium/components/url_formatter/spoof_checks/top_domains/top_domain_generator.cc +++ b/chromium/components/url_formatter/spoof_checks/top_domains/top_domain_generator.cc @@ -51,6 +51,37 @@ void CheckName(const std::string& name) { } } +std::unique_ptr<TopDomainEntry> MakeEntry( + const std::string& hostname, + const std::string& skeleton, + url_formatter::SkeletonType skeleton_type, + bool is_top_500, + std::set<std::string>* all_skeletons) { + auto entry = std::make_unique<TopDomainEntry>(); + // Another site has the same skeleton. This is low proability so stop now. + CHECK(all_skeletons->find(skeleton) == all_skeletons->end()) + << "A domain with the same skeleton is already in the list (" << skeleton + << ")."; + + all_skeletons->insert(skeleton); + + // TODO: Should we lowercase these? + entry->skeleton = skeleton; + + // There might be unicode domains in the list. Store them in punycode in + // the trie. + const GURL domain(std::string("http://") + hostname); + entry->top_domain = domain.host(); + + entry->is_top_500 = is_top_500; + entry->skeleton_type = skeleton_type; + + CheckName(entry->skeleton); + CheckName(entry->top_domain); + + return entry; +} + } // namespace int main(int argc, char* argv[]) { @@ -97,7 +128,7 @@ int main(int argc, char* argv[]) { bool is_top_500 = true; TopDomainEntries entries; - std::set<std::string> skeletons; + std::set<std::string> all_skeletons; for (std::string line : lines) { base::TrimWhitespaceASCII(line, base::TRIM_ALL, &line); @@ -109,35 +140,27 @@ int main(int argc, char* argv[]) { if (line.empty() || line[0] == '#') { continue; } - auto entry = std::make_unique<TopDomainEntry>(); std::vector<std::string> tokens = base::SplitString( line, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); - - CHECK_EQ(2u, tokens.size()) << "Invalid line: " << tokens[0]; - const std::string skeleton = tokens[0]; - - // Another site has the same skeleton. This is low proability so stop now. - CHECK(skeletons.find(skeleton) == skeletons.end()) - << "A domain with the same skeleton is already in the list (" - << skeleton << ")."; - - skeletons.insert(skeleton); - - // TODO: Should we lowercase these? - entry->skeleton = skeleton; - - // There might be unicode domains in the list. Store them in punycode in the - // trie. - const GURL domain(std::string("http://") + tokens[1]); - entry->top_domain = domain.host(); - - entry->is_top_500 = is_top_500; - - CheckName(entry->skeleton); - CheckName(entry->top_domain); - - entries.push_back(std::move(entry)); + // Top 500 domains will have full skeletons as well as skeletons without + // label separators (e.g. '.' and '-'). + if (is_top_500) { + CHECK_EQ(3u, tokens.size()) << "Invalid line: " << tokens[0]; + + entries.push_back(MakeEntry(tokens[2], tokens[0], + url_formatter::SkeletonType::kFull, + /*is_top_500=*/true, &all_skeletons)); + entries.push_back(MakeEntry( + tokens[2], tokens[1], url_formatter::SkeletonType::kSeparatorsRemoved, + /*is_top_500=*/true, &all_skeletons)); + } else { + CHECK_EQ(2u, tokens.size()) << "Invalid line: " << tokens[0]; + + entries.push_back(MakeEntry(tokens[1], tokens[0], + url_formatter::SkeletonType::kFull, + /*is_top_500=*/false, &all_skeletons)); + } } base::FilePath template_path = base::FilePath::FromUTF8Unsafe(args[1]); diff --git a/chromium/components/url_formatter/spoof_checks/top_domains/top_domain_util.cc b/chromium/components/url_formatter/spoof_checks/top_domains/top_domain_util.cc index 3559e26732e..f40b4342286 100644 --- a/chromium/components/url_formatter/spoof_checks/top_domains/top_domain_util.cc +++ b/chromium/components/url_formatter/spoof_checks/top_domains/top_domain_util.cc @@ -13,7 +13,7 @@ namespace top_domains { namespace { // Minimum length of the e2LD (the registered domain name without the registry) -// to be considered for an edit distance comparison, including a trailing dot. +// to be considered for an edit distance comparison. // Thus: 'google.com' has of length 6 ("google") and is long enough, while // 'abc.co.uk' has a length of 3 ("abc"), and will not be considered. const size_t kMinLengthForEditDistance = 5u; diff --git a/chromium/components/url_formatter/spoof_checks/top_domains/trie_entry.cc b/chromium/components/url_formatter/spoof_checks/top_domains/trie_entry.cc index 13ebcd0b589..a503aa2e6a1 100644 --- a/chromium/components/url_formatter/spoof_checks/top_domains/trie_entry.cc +++ b/chromium/components/url_formatter/spoof_checks/top_domains/trie_entry.cc @@ -9,6 +9,15 @@ namespace url_formatter { +uint8_t BitLength(uint32_t input) { + uint8_t number_of_bits = 0; + while (input != 0) { + number_of_bits++; + input >>= 1; + } + return number_of_bits; +} + namespace top_domains { TopDomainTrieEntry::TopDomainTrieEntry( @@ -27,13 +36,20 @@ std::string TopDomainTrieEntry::name() const { bool TopDomainTrieEntry::WriteEntry( net::huffman_trie::TrieBitBuffer* writer) const { + // Make sure the assigned bit length is enough to encode all SkeletonType + // values. + DCHECK_EQ(kSkeletonTypeBitLength, + BitLength(url_formatter::SkeletonType::kMaxValue)); + if (entry_->skeleton == entry_->top_domain) { writer->WriteBit(1); writer->WriteBit(entry_->is_top_500 ? 1 : 0); + writer->WriteBits(entry_->skeleton_type, kSkeletonTypeBitLength); return true; } writer->WriteBit(0); writer->WriteBit(entry_->is_top_500 ? 1 : 0); + writer->WriteBits(entry_->skeleton_type, kSkeletonTypeBitLength); std::string top_domain = entry_->top_domain; // With the current top 10,000 domains, this optimization reduces the diff --git a/chromium/components/url_formatter/spoof_checks/top_domains/trie_entry.h b/chromium/components/url_formatter/spoof_checks/top_domains/trie_entry.h index 20b7c1aa28d..2a9664b9e9a 100644 --- a/chromium/components/url_formatter/spoof_checks/top_domains/trie_entry.h +++ b/chromium/components/url_formatter/spoof_checks/top_domains/trie_entry.h @@ -13,12 +13,30 @@ namespace url_formatter { +// The |SkeletonType| and |TopDomainEntry| are mirrored in trie_entry.h. These +// are used to insert and read nodes from the Trie. +// The type of skeleton in the trie node. This type is encoded by 2 bits in the +// trie. +enum SkeletonType { + // The skeleton represents the full domain (e.g. google.corn). + kFull = 0, + // The skeleton represents the domain with '.'s and '-'s removed (e.g. + // googlecorn). + kSeparatorsRemoved = 1, + // Max value used to determine the number of different types. Update this and + // |kSkeletonTypeBitLength| when new SkeletonTypes are added. + kMaxValue = kSeparatorsRemoved +}; + +const uint8_t kSkeletonTypeBitLength = 1; + namespace top_domains { struct TopDomainEntry { std::string skeleton; std::string top_domain; bool is_top_500; + SkeletonType skeleton_type; }; class TopDomainTrieEntry : public net::huffman_trie::TrieEntry { diff --git a/chromium/components/url_formatter/tools/format_url.cc b/chromium/components/url_formatter/tools/format_url.cc index b991476304e..91c60c9229e 100644 --- a/chromium/components/url_formatter/tools/format_url.cc +++ b/chromium/components/url_formatter/tools/format_url.cc @@ -4,8 +4,10 @@ // This binary takes a list of domain names, tries to convert them to unicode // and prints out the result. The list can be passed as a text file or via -// stdin. In both cases, the output is printed as (input_domain, output_domain) -// pairs on separate lines. +// stdin. In both cases, the output is printed as (input_domain, output_domain, +// spoof_check_result) tuples on separate lines. spoof_check_result is the +// string representation of IDNSpoofChecker::Result enum with an additional +// kTopDomainLookalike value. #include <cstdlib> #include <fstream> @@ -15,9 +17,15 @@ #include "base/command_line.h" #include "base/i18n/icu_util.h" #include "base/logging.h" +#include "base/notreached.h" #include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "components/url_formatter/spoof_checks/idn_spoof_checker.h" #include "components/url_formatter/url_formatter.h" +using url_formatter::IDNConversionResult; +using url_formatter::IDNSpoofChecker; + void PrintUsage(const char* process_name) { std::cout << "Usage:" << std::endl; std::cout << process_name << " <file>" << std::endl; @@ -30,6 +38,58 @@ void PrintUsage(const char* process_name) { << "it's printed unchanged." << std::endl; } +std::string SpoofCheckResultToString(IDNSpoofChecker::Result result) { + switch (result) { + case IDNSpoofChecker::Result::kNone: + return "kNone"; + case IDNSpoofChecker::Result::kSafe: + return "kSafe"; + case IDNSpoofChecker::Result::kICUSpoofChecks: + return "kICUSpoofChecks"; + case IDNSpoofChecker::Result::kDeviationCharacters: + return "kDeviationCharacters"; + case IDNSpoofChecker::Result::kTLDSpecificCharacters: + return "kTLDSpecificCharacters"; + case IDNSpoofChecker::Result::kUnsafeMiddleDot: + return "kUnsafeMiddleDot"; + case IDNSpoofChecker::Result::kWholeScriptConfusable: + return "kWholeScriptConfusable"; + case IDNSpoofChecker::Result::kDigitLookalikes: + return "kDigitLookalikes"; + case IDNSpoofChecker::Result::kNonAsciiLatinCharMixedWithNonLatin: + return "kNonAsciiLatinCharMixedWithNonLatin"; + case IDNSpoofChecker::Result::kDangerousPattern: + return "kDangerousPattern"; + default: + NOTREACHED(); + }; + return std::string(); +} + +// Returns the spoof check result as a string. |ascii_domain| must contain +// ASCII characters only. |unicode_domain| is the IDN conversion result +// according to url_formatter. It can be either punycode or unicode. +std::string GetSpoofCheckResult(const std::string& ascii_domain, + const base::string16& unicode_domain) { + IDNConversionResult result = + url_formatter::UnsafeIDNToUnicodeWithDetails(ascii_domain); + std::string spoof_check_result = + SpoofCheckResultToString(result.spoof_check_result); + if (result.spoof_check_result == IDNSpoofChecker::Result::kNone) { + // Input was not punycode. + return spoof_check_result; + } + if (result.spoof_check_result != IDNSpoofChecker::Result::kSafe) { + return spoof_check_result; + } + // If the domain passed all spoof checks but |unicode_domain| is still in + // punycode, the domain must be a lookalike of a top domain. + if (base::ASCIIToUTF16(ascii_domain) == unicode_domain) { + return "kTopDomainLookalike"; + } + return spoof_check_result; +} + void Convert(std::istream& input) { base::i18n::InitializeICU(); for (std::string line; std::getline(input, line);) { @@ -41,7 +101,16 @@ void Convert(std::istream& input) { << "This binary only accepts hostnames in ASCII form (punycode for " "IDN): " << line; - std::cout << line << ", " << url_formatter::IDNToUnicode(line) << std::endl; + + // Convert twice, first with spoof checks on, then with spoof checks + // ignored inside GetSpoofCheckResult(). This is because only the call to + // UnsafeIDNToUnicodeWithDetails returns information about spoof check + // results (a quirk of the url_formatter interface). + const base::string16 converted_hostname = url_formatter::IDNToUnicode(line); + const std::string spoof_check_result = + GetSpoofCheckResult(line, converted_hostname); + std::cout << line << ", " << converted_hostname << ", " + << spoof_check_result << std::endl; } } diff --git a/chromium/components/url_formatter/url_formatter.cc b/chromium/components/url_formatter/url_formatter.cc index d7cc2ea441c..faaa7a05a76 100644 --- a/chromium/components/url_formatter/url_formatter.cc +++ b/chromium/components/url_formatter/url_formatter.cc @@ -31,13 +31,23 @@ IDNConversionResult IDNToUnicodeWithAdjustments( base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments); -bool IDNToUnicodeOneComponent(const base::char16* comp, - size_t comp_len, - base::StringPiece top_level_domain, - base::StringPiece16 top_level_domain_unicode, - bool enable_spoof_checks, - base::string16* out, - bool* has_idn_component); +// Result of converting a single IDN component (i.e. label) to unicode. +struct ComponentResult { + // Set to true if the component is converted to unicode. + bool converted = false; + // Set to true if the component is IDN, even if it's not converted to unicode. + bool has_idn_component = false; + // Result of the IDN spoof check. + IDNSpoofChecker::Result spoof_check_result = IDNSpoofChecker::Result::kNone; +}; + +ComponentResult IDNToUnicodeOneComponent( + const base::char16* comp, + size_t comp_len, + base::StringPiece top_level_domain, + base::StringPiece16 top_level_domain_unicode, + bool ignore_spoof_check_results, + base::string16* out); class AppendComponentTransform { public: @@ -249,17 +259,17 @@ void GetTopLevelDomain(base::StringPiece host, tld16.reserve(top_level_domain->length()); tld16.insert(tld16.end(), top_level_domain->begin(), top_level_domain->end()); - // Convert the TLD to unicode with the spoof checks disabled. - bool tld_has_idn_component = false; - IDNToUnicodeOneComponent(tld16.data(), tld16.size(), std::string(), - base::string16(), false /* enable_spoof_checks */, - top_level_domain_unicode, &tld_has_idn_component); + // Convert the TLD to unicode, ignoring the spoof check results. This will + // always decode the input to unicode as long as it's valid punycode. + IDNToUnicodeOneComponent( + tld16.data(), tld16.size(), std::string(), base::string16(), + /*ignore_spoof_check_results=*/true, top_level_domain_unicode); } IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments, - bool enable_spoof_checks) { + bool ignore_spoof_check_results) { if (adjustments) adjustments->clear(); // Convert the ASCII input to a base::string16 for ICU. @@ -284,19 +294,23 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( component_end = host16.length(); // For getting the last component. size_t component_length = component_end - component_start; size_t new_component_start = out16.length(); - bool converted_idn = false; + ComponentResult component_result; if (component_end > component_start) { // Add the substring that we just found. - bool has_idn_component = false; - converted_idn = IDNToUnicodeOneComponent( + component_result = IDNToUnicodeOneComponent( host16.data() + component_start, component_length, top_level_domain, - top_level_domain_unicode, enable_spoof_checks, &out16, - &has_idn_component); - result.has_idn_component |= has_idn_component; + top_level_domain_unicode, ignore_spoof_check_results, &out16); + result.has_idn_component |= component_result.has_idn_component; + if (component_result.spoof_check_result != + IDNSpoofChecker::Result::kNone && + (result.spoof_check_result == IDNSpoofChecker::Result::kNone || + result.spoof_check_result == IDNSpoofChecker::Result::kSafe)) { + result.spoof_check_result = component_result.spoof_check_result; + } } size_t new_component_length = out16.length() - new_component_start; - if (converted_idn && adjustments) { + if (component_result.converted && adjustments) { adjustments->push_back(base::OffsetAdjuster::Adjustment( component_start, component_length, new_component_length)); } @@ -312,7 +326,8 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( if (result.has_idn_component) { result.matching_top_domain = g_idn_spoof_checker.Get().GetSimilarTopDomain(out16); - if (enable_spoof_checks && !result.matching_top_domain.domain.empty()) { + if (!ignore_spoof_check_results && + !result.matching_top_domain.domain.empty()) { if (adjustments) adjustments->clear(); result.result = host16; @@ -327,22 +342,25 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( IDNConversionResult IDNToUnicodeWithAdjustments( base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments) { - return IDNToUnicodeWithAdjustmentsImpl(host, adjustments, true); + return IDNToUnicodeWithAdjustmentsImpl(host, adjustments, + /*ignore_spoof_check_results=*/false); } IDNConversionResult UnsafeIDNToUnicodeWithAdjustments( base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments) { - return IDNToUnicodeWithAdjustmentsImpl(host, adjustments, false); + return IDNToUnicodeWithAdjustmentsImpl(host, adjustments, + /*ignore_spoof_check_results=*/true); } // Returns true if the given Unicode host component is safe to display to the // user. Note that this function does not deal with pure ASCII domain labels at // all even though it's possible to make up look-alike labels with ASCII // characters alone. -bool IsIDNComponentSafe(base::StringPiece16 label, - base::StringPiece top_level_domain, - base::StringPiece16 top_level_domain_unicode) { +IDNSpoofChecker::Result SpoofCheckIDNComponent( + base::StringPiece16 label, + base::StringPiece top_level_domain, + base::StringPiece16 top_level_domain_unicode) { return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode( label, top_level_domain, top_level_domain_unicode); } @@ -387,25 +405,23 @@ struct UIDNAWrapper { base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER; // Converts one component (label) of a host (between dots) to Unicode if safe. -// If |enable_spoof_checks| is false and input is valid unicode, skips spoof -// checks and always converts to unicode. -// The result will be APPENDED to the given output string and will be the -// same as the input if it is not IDN in ACE/punycode or the IDN is unsafe to -// display. -// Returns true if conversion was made. Sets |has_idn_component| to true if the -// input has IDN, regardless of whether it was converted to unicode or not. -bool IDNToUnicodeOneComponent(const base::char16* comp, - size_t comp_len, - base::StringPiece top_level_domain, - base::StringPiece16 top_level_domain_unicode, - bool enable_spoof_checks, - base::string16* out, - bool* has_idn_component) { +// If |ignore_spoof_check_results| is true and input is valid unicode, ignores +// spoof check results and always converts the input to unicode. The result will +// be APPENDED to the given output string and will be the same as the input if +// it is not IDN in ACE/punycode or the IDN is unsafe to display. Returns true +// if conversion was made. Sets |has_idn_component| to true if the input has +// IDN, regardless of whether it was converted to unicode or not. +ComponentResult IDNToUnicodeOneComponent( + const base::char16* comp, + size_t comp_len, + base::StringPiece top_level_domain, + base::StringPiece16 top_level_domain_unicode, + bool ignore_spoof_check_results, + base::string16* out) { DCHECK(out); - DCHECK(has_idn_component); - *has_idn_component = false; + ComponentResult result; if (comp_len == 0) - return false; + return result; // Early return if the input cannot be an IDN component. // Valid punycode must not end with a dash. @@ -414,7 +430,7 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, memcmp(comp, kIdnPrefix, sizeof(kIdnPrefix)) != 0 || comp[comp_len - 1] == '-') { out->append(comp, comp_len); - return false; + return result; } UIDNA* uidna = g_uidna.Get().value; @@ -435,20 +451,21 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0)); if (U_SUCCESS(status) && info.errors == 0) { - *has_idn_component = true; + result.has_idn_component = true; // Converted successfully. At this point the length of the output string // is original_length + output_length which may be shorter than the current // length of |out|. Trim |out| and ensure that the converted component can // be safely displayed to the user. out->resize(original_length + output_length); - if (!enable_spoof_checks) { - return true; - } - if (IsIDNComponentSafe( - base::StringPiece16(out->data() + original_length, - base::checked_cast<size_t>(output_length)), - top_level_domain, top_level_domain_unicode)) { - return true; + result.spoof_check_result = SpoofCheckIDNComponent( + base::StringPiece16(out->data() + original_length, + base::checked_cast<size_t>(output_length)), + top_level_domain, top_level_domain_unicode); + DCHECK_NE(IDNSpoofChecker::Result::kNone, result.spoof_check_result); + if (ignore_spoof_check_results || + result.spoof_check_result == IDNSpoofChecker::Result::kSafe) { + result.converted = true; + return result; } } @@ -456,7 +473,7 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, // original string and append the literal input. out->resize(original_length); out->append(comp, comp_len); - return false; + return result; } } // namespace @@ -739,8 +756,9 @@ Skeletons GetSkeletons(const base::string16& host) { return g_idn_spoof_checker.Get().GetSkeletons(host); } -TopDomainEntry LookupSkeletonInTopDomains(const std::string& skeleton) { - return g_idn_spoof_checker.Get().LookupSkeletonInTopDomains(skeleton); +TopDomainEntry LookupSkeletonInTopDomains(const std::string& skeleton, + const SkeletonType type) { + return g_idn_spoof_checker.Get().LookupSkeletonInTopDomains(skeleton, type); } } // namespace url_formatter diff --git a/chromium/components/url_formatter/url_formatter.h b/chromium/components/url_formatter/url_formatter.h index 4803b557e8d..59c28bbc35d 100644 --- a/chromium/components/url_formatter/url_formatter.h +++ b/chromium/components/url_formatter/url_formatter.h @@ -56,6 +56,12 @@ struct IDNConversionResult { // E.g. IDNToUnicodeWithDetails("googlé.com") will fill |result| with // "xn--googl-fsa.com" and |matching_top_domain.domain| with "google.com". TopDomainEntry matching_top_domain; + // Result of the spoof check. If the domain was converted to unicode, this + // must be kSafe. Otherwise, this will be the failure reason + // for the domain component (i.e. label) that failed the spoof checks. If + // multiple labels fail the checks, this will be the result of the first + // component that failed, counting from the left in the punycode form. + IDNSpoofChecker::Result spoof_check_result = IDNSpoofChecker::Result::kNone; }; // Nothing is omitted. @@ -192,8 +198,14 @@ base::string16 StripWWWFromHost(const GURL& url); Skeletons GetSkeletons(const base::string16& host); // Returns a domain from the top 10K list matching the given skeleton. Used for -// spoof checking. -TopDomainEntry LookupSkeletonInTopDomains(const std::string& skeleton); +// spoof checking. Different types of skeletons are saved in the skeleton trie. +// Providing |type| makes sure the right type of skeletons are looked up. For +// example if |skeleton|="googlecorn", |type|="kFull", no match would be found +// even though the skeleton is saved in the trie, because the type of this +// skeleton in the trie is "kSeparatorsRemoved". +TopDomainEntry LookupSkeletonInTopDomains( + const std::string& skeleton, + const SkeletonType type = SkeletonType::kFull); } // namespace url_formatter |