[Backport] CVE-2019-5839

Make path URL parsing more lax Parsing the path component of a non-special URL like javascript or data should not fail for invalid URL characters like \uFFFF. See this bit in the spec: https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state Note: some failing WPTs are added which are because url parsing replaces invalid characters (e.g. \uFFFF) with the replacement char \uFFFD, when that isn't in the spec. Bug: 925614 Change-Id: Iad9ef7456ddb4d86b1d8d995e2d48fee9483864e Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
author: Charlie Harrison <csharrison@chromium.org> 2019-04-05 13:30:53 +0000
committer: Michael Brüning <michael.bruning@qt.io> 2019-12-04 21:04:56 +0000
commit: 9ccd70b1edbe5226ef6a7f911c560a85d52a3bea (patch)
tree: 546cf88a5217492c2c21e903a71ccd7b0e9ac431
parent: e274a456ab5109712f6ba91e3b9410ab22a439fa (diff)
download: qtwebengine-chromium-9ccd70b1edbe5226ef6a7f911c560a85d52a3bea.tar.gz
2 files changed, 19 insertions, 14 deletions
diff --git a/chromium/url/url_canon_pathurl.cc b/chromium/url/url_canon_pathurl.cc
index 494fbdafb16..36fbce8ee39 100644
--- a/chromium/url/url_canon_pathurl.cc
+++ b/chromium/url/url_canon_pathurl.cc
@@ -16,13 +16,12 @@ namespace {
 // Canonicalize the given |component| from |source| into |output| and
 // |new_component|. If |separator| is non-zero, it is pre-pended to |output|
 // prior to the canonicalized component; i.e. for the '?' or '#' characters.
-template<typename CHAR, typename UCHAR>
-bool DoCanonicalizePathComponent(const CHAR* source,
+template <typename CHAR, typename UCHAR>
+void DoCanonicalizePathComponent(const CHAR* source,
                                  const Component& component,
                                  char separator,
                                  CanonOutput* output,
                                  Component* new_component) {
-  bool success = true;
   if (component.is_valid()) {
     if (separator)
       output->push_back(separator);
@@ -34,7 +33,7 @@ bool DoCanonicalizePathComponent(const CHAR* source,
     for (int i = component.begin; i < end; i++) {
       UCHAR uch = static_cast<UCHAR>(source[i]);
       if (uch < 0x20 || uch >= 0x80)
-        success &= AppendUTF8EscapedChar(source, &i, end, output);
+        AppendUTF8EscapedChar(source, &i, end, output);
       else
         output->push_back(static_cast<char>(uch));
     }
@@ -43,7 +42,6 @@ bool DoCanonicalizePathComponent(const CHAR* source,
     // Empty part.
     new_component->reset();
   }
-  return success;
 }
 
 template <typename CHAR, typename UCHAR>
@@ -63,12 +61,15 @@ bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
   new_parsed->port.reset();
   // We allow path URLs to have the path, query and fragment components, but we
   // will canonicalize each of the via the weaker path URL rules.
-  success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
-      source.path, parsed.path, '\0', output, &new_parsed->path);
-  success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
-      source.query, parsed.query, '?', output, &new_parsed->query);
-  success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
-      source.ref, parsed.ref, '#', output, &new_parsed->ref);
+  //
+  // Note: parsing the path part should never cause a failure, see
+  // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
+  DoCanonicalizePathComponent<CHAR, UCHAR>(source.path, parsed.path, '\0',
+                                           output, &new_parsed->path);
+  DoCanonicalizePathComponent<CHAR, UCHAR>(source.query, parsed.query, '?',
+                                           output, &new_parsed->query);
+  DoCanonicalizePathComponent<CHAR, UCHAR>(source.ref, parsed.ref, '#', output,
+                                           &new_parsed->ref);
 
   return success;
 }
diff --git a/chromium/url/url_canon_unittest.cc b/chromium/url/url_canon_unittest.cc
index 9ca51aaaf37..6fa47a230c3 100644
--- a/chromium/url/url_canon_unittest.cc
+++ b/chromium/url/url_canon_unittest.cc
@@ -1808,9 +1808,13 @@ TEST(URLCanonTest, CanonicalizePathURL) {
     const char* input;
     const char* expected;
   } path_cases[] = {
-    {"javascript:", "javascript:"},
-    {"JavaScript:Foo", "javascript:Foo"},
-    {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"},
+      {"javascript:", "javascript:"},
+      {"JavaScript:Foo", "javascript:Foo"},
+      {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"},
+
+      // Validation errors should not cause failure. See
+      // https://crbug.com/925614.
+      {"javascript:\uFFFF", "javascript:%EF%BF%BD"},
   };
 
   for (size_t i = 0; i < arraysize(path_cases); i++) {
author	Charlie Harrison <csharrison@chromium.org>	2019-04-05 13:30:53 +0000
committer	Michael Brüning <michael.bruning@qt.io>	2019-12-04 21:04:56 +0000
commit	9ccd70b1edbe5226ef6a7f911c560a85d52a3bea (patch)
tree	546cf88a5217492c2c21e903a71ccd7b0e9ac431
parent	e274a456ab5109712f6ba91e3b9410ab22a439fa (diff)
download	qtwebengine-chromium-9ccd70b1edbe5226ef6a7f911c560a85d52a3bea.tar.gz