summaryrefslogtreecommitdiff
path: root/chromium/components/url_formatter/tools
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-12 14:27:29 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-13 09:35:20 +0000
commitc30a6232df03e1efbd9f3b226777b07e087a1122 (patch)
treee992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/components/url_formatter/tools
parent7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff)
downloadqtwebengine-chromium-85-based.tar.gz
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/components/url_formatter/tools')
-rw-r--r--chromium/components/url_formatter/tools/format_url.cc75
1 files changed, 72 insertions, 3 deletions
diff --git a/chromium/components/url_formatter/tools/format_url.cc b/chromium/components/url_formatter/tools/format_url.cc
index b991476304e..91c60c9229e 100644
--- a/chromium/components/url_formatter/tools/format_url.cc
+++ b/chromium/components/url_formatter/tools/format_url.cc
@@ -4,8 +4,10 @@
// This binary takes a list of domain names, tries to convert them to unicode
// and prints out the result. The list can be passed as a text file or via
-// stdin. In both cases, the output is printed as (input_domain, output_domain)
-// pairs on separate lines.
+// stdin. In both cases, the output is printed as (input_domain, output_domain,
+// spoof_check_result) tuples on separate lines. spoof_check_result is the
+// string representation of IDNSpoofChecker::Result enum with an additional
+// kTopDomainLookalike value.
#include <cstdlib>
#include <fstream>
@@ -15,9 +17,15 @@
#include "base/command_line.h"
#include "base/i18n/icu_util.h"
#include "base/logging.h"
+#include "base/notreached.h"
#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversions.h"
+#include "components/url_formatter/spoof_checks/idn_spoof_checker.h"
#include "components/url_formatter/url_formatter.h"
+using url_formatter::IDNConversionResult;
+using url_formatter::IDNSpoofChecker;
+
void PrintUsage(const char* process_name) {
std::cout << "Usage:" << std::endl;
std::cout << process_name << " <file>" << std::endl;
@@ -30,6 +38,58 @@ void PrintUsage(const char* process_name) {
<< "it's printed unchanged." << std::endl;
}
+std::string SpoofCheckResultToString(IDNSpoofChecker::Result result) {
+ switch (result) {
+ case IDNSpoofChecker::Result::kNone:
+ return "kNone";
+ case IDNSpoofChecker::Result::kSafe:
+ return "kSafe";
+ case IDNSpoofChecker::Result::kICUSpoofChecks:
+ return "kICUSpoofChecks";
+ case IDNSpoofChecker::Result::kDeviationCharacters:
+ return "kDeviationCharacters";
+ case IDNSpoofChecker::Result::kTLDSpecificCharacters:
+ return "kTLDSpecificCharacters";
+ case IDNSpoofChecker::Result::kUnsafeMiddleDot:
+ return "kUnsafeMiddleDot";
+ case IDNSpoofChecker::Result::kWholeScriptConfusable:
+ return "kWholeScriptConfusable";
+ case IDNSpoofChecker::Result::kDigitLookalikes:
+ return "kDigitLookalikes";
+ case IDNSpoofChecker::Result::kNonAsciiLatinCharMixedWithNonLatin:
+ return "kNonAsciiLatinCharMixedWithNonLatin";
+ case IDNSpoofChecker::Result::kDangerousPattern:
+ return "kDangerousPattern";
+ default:
+ NOTREACHED();
+ };
+ return std::string();
+}
+
+// Returns the spoof check result as a string. |ascii_domain| must contain
+// ASCII characters only. |unicode_domain| is the IDN conversion result
+// according to url_formatter. It can be either punycode or unicode.
+std::string GetSpoofCheckResult(const std::string& ascii_domain,
+ const base::string16& unicode_domain) {
+ IDNConversionResult result =
+ url_formatter::UnsafeIDNToUnicodeWithDetails(ascii_domain);
+ std::string spoof_check_result =
+ SpoofCheckResultToString(result.spoof_check_result);
+ if (result.spoof_check_result == IDNSpoofChecker::Result::kNone) {
+ // Input was not punycode.
+ return spoof_check_result;
+ }
+ if (result.spoof_check_result != IDNSpoofChecker::Result::kSafe) {
+ return spoof_check_result;
+ }
+ // If the domain passed all spoof checks but |unicode_domain| is still in
+ // punycode, the domain must be a lookalike of a top domain.
+ if (base::ASCIIToUTF16(ascii_domain) == unicode_domain) {
+ return "kTopDomainLookalike";
+ }
+ return spoof_check_result;
+}
+
void Convert(std::istream& input) {
base::i18n::InitializeICU();
for (std::string line; std::getline(input, line);) {
@@ -41,7 +101,16 @@ void Convert(std::istream& input) {
<< "This binary only accepts hostnames in ASCII form (punycode for "
"IDN): "
<< line;
- std::cout << line << ", " << url_formatter::IDNToUnicode(line) << std::endl;
+
+ // Convert twice, first with spoof checks on, then with spoof checks
+ // ignored inside GetSpoofCheckResult(). This is because only the call to
+ // UnsafeIDNToUnicodeWithDetails returns information about spoof check
+ // results (a quirk of the url_formatter interface).
+ const base::string16 converted_hostname = url_formatter::IDNToUnicode(line);
+ const std::string spoof_check_result =
+ GetSpoofCheckResult(line, converted_hostname);
+ std::cout << line << ", " << converted_hostname << ", "
+ << spoof_check_result << std::endl;
}
}