diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-12 14:27:29 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-13 09:35:20 +0000 |
commit | c30a6232df03e1efbd9f3b226777b07e087a1122 (patch) | |
tree | e992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/components/url_formatter/tools | |
parent | 7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff) | |
download | qtwebengine-chromium-85-based.tar.gz |
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/components/url_formatter/tools')
-rw-r--r-- | chromium/components/url_formatter/tools/format_url.cc | 75 |
1 files changed, 72 insertions, 3 deletions
diff --git a/chromium/components/url_formatter/tools/format_url.cc b/chromium/components/url_formatter/tools/format_url.cc index b991476304e..91c60c9229e 100644 --- a/chromium/components/url_formatter/tools/format_url.cc +++ b/chromium/components/url_formatter/tools/format_url.cc @@ -4,8 +4,10 @@ // This binary takes a list of domain names, tries to convert them to unicode // and prints out the result. The list can be passed as a text file or via -// stdin. In both cases, the output is printed as (input_domain, output_domain) -// pairs on separate lines. +// stdin. In both cases, the output is printed as (input_domain, output_domain, +// spoof_check_result) tuples on separate lines. spoof_check_result is the +// string representation of IDNSpoofChecker::Result enum with an additional +// kTopDomainLookalike value. #include <cstdlib> #include <fstream> @@ -15,9 +17,15 @@ #include "base/command_line.h" #include "base/i18n/icu_util.h" #include "base/logging.h" +#include "base/notreached.h" #include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "components/url_formatter/spoof_checks/idn_spoof_checker.h" #include "components/url_formatter/url_formatter.h" +using url_formatter::IDNConversionResult; +using url_formatter::IDNSpoofChecker; + void PrintUsage(const char* process_name) { std::cout << "Usage:" << std::endl; std::cout << process_name << " <file>" << std::endl; @@ -30,6 +38,58 @@ void PrintUsage(const char* process_name) { << "it's printed unchanged." << std::endl; } +std::string SpoofCheckResultToString(IDNSpoofChecker::Result result) { + switch (result) { + case IDNSpoofChecker::Result::kNone: + return "kNone"; + case IDNSpoofChecker::Result::kSafe: + return "kSafe"; + case IDNSpoofChecker::Result::kICUSpoofChecks: + return "kICUSpoofChecks"; + case IDNSpoofChecker::Result::kDeviationCharacters: + return "kDeviationCharacters"; + case IDNSpoofChecker::Result::kTLDSpecificCharacters: + return "kTLDSpecificCharacters"; + case IDNSpoofChecker::Result::kUnsafeMiddleDot: + return "kUnsafeMiddleDot"; + case IDNSpoofChecker::Result::kWholeScriptConfusable: + return "kWholeScriptConfusable"; + case IDNSpoofChecker::Result::kDigitLookalikes: + return "kDigitLookalikes"; + case IDNSpoofChecker::Result::kNonAsciiLatinCharMixedWithNonLatin: + return "kNonAsciiLatinCharMixedWithNonLatin"; + case IDNSpoofChecker::Result::kDangerousPattern: + return "kDangerousPattern"; + default: + NOTREACHED(); + }; + return std::string(); +} + +// Returns the spoof check result as a string. |ascii_domain| must contain +// ASCII characters only. |unicode_domain| is the IDN conversion result +// according to url_formatter. It can be either punycode or unicode. +std::string GetSpoofCheckResult(const std::string& ascii_domain, + const base::string16& unicode_domain) { + IDNConversionResult result = + url_formatter::UnsafeIDNToUnicodeWithDetails(ascii_domain); + std::string spoof_check_result = + SpoofCheckResultToString(result.spoof_check_result); + if (result.spoof_check_result == IDNSpoofChecker::Result::kNone) { + // Input was not punycode. + return spoof_check_result; + } + if (result.spoof_check_result != IDNSpoofChecker::Result::kSafe) { + return spoof_check_result; + } + // If the domain passed all spoof checks but |unicode_domain| is still in + // punycode, the domain must be a lookalike of a top domain. + if (base::ASCIIToUTF16(ascii_domain) == unicode_domain) { + return "kTopDomainLookalike"; + } + return spoof_check_result; +} + void Convert(std::istream& input) { base::i18n::InitializeICU(); for (std::string line; std::getline(input, line);) { @@ -41,7 +101,16 @@ void Convert(std::istream& input) { << "This binary only accepts hostnames in ASCII form (punycode for " "IDN): " << line; - std::cout << line << ", " << url_formatter::IDNToUnicode(line) << std::endl; + + // Convert twice, first with spoof checks on, then with spoof checks + // ignored inside GetSpoofCheckResult(). This is because only the call to + // UnsafeIDNToUnicodeWithDetails returns information about spoof check + // results (a quirk of the url_formatter interface). + const base::string16 converted_hostname = url_formatter::IDNToUnicode(line); + const std::string spoof_check_result = + GetSpoofCheckResult(line, converted_hostname); + std::cout << line << ", " << converted_hostname << ", " + << spoof_check_result << std::endl; } } |