summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Thompson <cthomp@chromium.org>2019-09-23 18:08:01 +0000
committerMichal Klocek <michal.klocek@qt.io>2020-01-16 12:16:08 +0000
commit05833b160db844fd8f286fe034cb1dd1edaf8601 (patch)
tree613ec51a99ab416c6144eb768021b7a51acf42a7
parentd5651e21821474110e252779d7885da4ea2ee329 (diff)
downloadqtwebengine-chromium-05833b160db844fd8f286fe034cb1dd1edaf8601.tar.gz
[Backport] CVE-2019-13739
Add more charsets to URL unescape banned list Adds remaining characters from the the Default Ignorable and Formatting character sets to the URL unescape ban list. Bug: 824715 Change-Id: I118b2aa71877a830f32a8bff76e3f7f28c9760b0 Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
-rw-r--r--chromium/net/base/escape.cc59
-rw-r--r--chromium/net/base/escape_unittest.cc42
2 files changed, 98 insertions, 3 deletions
diff --git a/chromium/net/base/escape.cc b/chromium/net/base/escape.cc
index 3521f9e00c9..c9f4aec37eb 100644
--- a/chromium/net/base/escape.cc
+++ b/chromium/net/base/escape.cc
@@ -193,8 +193,6 @@ bool ShouldUnescapeCodePoint(UnescapeRule::Type rules, uint32_t code_point) {
// TODO(https://crbug.com/829873): Try to make this use icu, both to
// protect against regressions as the Unicode standard is updated and to
// reduce the number of long lists of characters.
- // TODO(https://crbug.com/824715): Add default ignorable and formatting
- // code points.
return !(
// Per http://tools.ietf.org/html/rfc3987#section-4.1, certain BiDi
// control characters are not allowed to appear unescaped in URLs.
@@ -241,7 +239,62 @@ bool ShouldUnescapeCodePoint(UnescapeRule::Type rules, uint32_t code_point) {
code_point == 0x2029 || // PARAGRAPH SEPARATOR (%E2%80%A9)
code_point == 0x202F || // NARROW NO-BREAK SPACE (%E2%80%AF)
code_point == 0x205F || // MEDIUM MATHEMATICAL SPACE (%E2%81%9F)
- code_point == 0x3000); // IDEOGRAPHIC SPACE (%E3%80%80)
+ code_point == 0x3000 || // IDEOGRAPHIC SPACE (%E3%80%80)
+
+ // Default Ignorable ([:Default_Ignorable_Code_Point=Yes:]) and Format
+ // characters ([:Cf:]) are also banned (see crbug.com/824715).
+ code_point == 0x00AD || // SOFT HYPHEN (%C2%AD)
+ code_point == 0x034F || // COMBINING GRAPHEME JOINER (%CD%8F)
+ // Arabic number formatting
+ (code_point >= 0x0600 && code_point <= 0x0605) ||
+ // U+061C is already banned as a BiDi control character.
+ code_point == 0x06DD || // ARABIC END OF AYAH (%DB%9D)
+ code_point == 0x070F || // SYRIAC ABBREVIATION MARK (%DC%8F)
+ code_point == 0x08E2 || // ARABIC DISPUTED END OF AYAH (%E0%A3%A2)
+ code_point == 0x115F || // HANGUL CHOSEONG FILLER (%E1%85%9F)
+ code_point == 0x1160 || // HANGUL JUNGSEONG FILLER (%E1%85%A0)
+ code_point == 0x17B4 || // KHMER VOWEL INHERENT AQ (%E1%9E%B4)
+ code_point == 0x17B5 || // KHMER VOWEL INHERENT AA (%E1%9E%B5)
+ code_point == 0x180B || // MONGOLIAN FREE VARIATION SELECTOR ONE
+ // (%E1%A0%8B)
+ code_point == 0x180C || // MONGOLIAN FREE VARIATION SELECTOR TWO
+ // (%E1%A0%8C)
+ code_point == 0x180D || // MONGOLIAN FREE VARIATION SELECTOR THREE
+ // (%E1%A0%8D)
+ code_point == 0x180E || // MONGOLIAN VOWEL SEPARATOR (%E1%A0%8E)
+ code_point == 0x200B || // ZERO WIDTH SPACE (%E2%80%8B)
+ code_point == 0x200C || // ZERO WIDTH SPACE NON-JOINER (%E2%80%8C)
+ code_point == 0x200D || // ZERO WIDTH JOINER (%E2%80%8D)
+ // U+200E, U+200F, U+202A--202E, and U+2066--2069 are already banned as
+ // BiDi control characters.
+ code_point == 0x2060 || // WORD JOINER (%E2%81%A0)
+ code_point == 0x2061 || // FUNCTION APPLICATION (%E2%81%A1)
+ code_point == 0x2062 || // INVISIBLE TIMES (%E2%81%A2)
+ code_point == 0x2063 || // INVISIBLE SEPARATOR (%E2%81%A3)
+ code_point == 0x2064 || // INVISIBLE PLUS (%E2%81%A4)
+ code_point == 0x2065 || // null (%E2%81%A5)
+ // 0x2066--0x2069 are already banned as a BiDi control characters.
+ // General Punctuation - Deprecated (U+206A--206F)
+ (code_point >= 0x206A && code_point <= 0x206F) ||
+ code_point == 0x3164 || // HANGUL FILLER (%E3%85%A4)
+ (code_point >= 0xFFF0 && code_point <= 0xFFF8) || // null
+ // Variation selectors (%EF%B8%80 -- %EF%B8%8F)
+ (code_point >= 0xFE00 && code_point <= 0xFE0F) ||
+ code_point == 0xFEFF || // ZERO WIDTH NO-BREAK SPACE (%EF%BB%BF)
+ code_point == 0xFFA0 || // HALFWIDTH HANGUL FILLER (%EF%BE%A0)
+ code_point == 0xFFF9 || // INTERLINEAR ANNOTATION ANCHOR (%EF%BF%B9)
+ code_point == 0xFFFA || // INTERLINEAR ANNOTATION SEPARATOR (%EF%BF%BA)
+ code_point == 0xFFFB || // INTERLINEAR ANNOTATION TERMINATOR (%EF%BF%BB)
+ code_point == 0x110BD || // KAITHI NUMBER SIGN (%F0%91%82%BD)
+ code_point == 0x110CD || // KAITHI NUMBER SIGN ABOVE (%F0%91%83%8D)
+ // Egyptian hieroglyph formatting (%F0%93%90%B0 -- %F0%93%90%B8)
+ (code_point >= 0x13430 && code_point <= 0x13438) ||
+ // Shorthand format controls (%F0%9B%B2%A0 -- %F0%9B%B2%A3)
+ (code_point >= 0x1BCA0 && code_point <= 0x1BCA3) ||
+ // Beams and slurs (%F0%9D%85%B3 -- %F0%9D%85%BA)
+ (code_point >= 0x1D173 && code_point <= 0x1D17A) ||
+ // Tags, Variation Selectors, nulls
+ (code_point >= 0xE0000 && code_point <= 0xE0FFF));
}
// Unescapes |escaped_text| according to |rules|, returning the resulting
diff --git a/chromium/net/base/escape_unittest.cc b/chromium/net/base/escape_unittest.cc
index 57be3713769..a6dd0d5d048 100644
--- a/chromium/net/base/escape_unittest.cc
+++ b/chromium/net/base/escape_unittest.cc
@@ -204,6 +204,48 @@ TEST(EscapeTest, UnescapeURLComponent) {
{"(%E2%80%AF)(%E2%81%9F)(%E3%80%80)", UnescapeRule::NORMAL,
"(%E2%80%AF)(%E2%81%9F)(%E3%80%80)"},
+ // Default Ignorable and Formatting characters should not be unescaped.
+ {"(%E2%81%A5)(%EF%BF%B0)(%EF%BF%B8)", UnescapeRule::NORMAL,
+ "(%E2%81%A5)(%EF%BF%B0)(%EF%BF%B8)"},
+ {"(%F3%A0%82%80)(%F3%A0%83%BF)(%F3%A0%87%B0)", UnescapeRule::NORMAL,
+ "(%F3%A0%82%80)(%F3%A0%83%BF)(%F3%A0%87%B0)"},
+ {"(%F3%A0%BF%BF)(%C2%AD)(%CD%8F)", UnescapeRule::NORMAL,
+ "(%F3%A0%BF%BF)(%C2%AD)(%CD%8F)"},
+ {"(%D8%80%20)(%D8%85)(%DB%9D)(%DC%8F)(%E0%A3%A2)", UnescapeRule::NORMAL,
+ "(%D8%80%20)(%D8%85)(%DB%9D)(%DC%8F)(%E0%A3%A2)"},
+ {"(%E1%85%9F)(%E1%85%A0)(%E1%9E%B4)(%E1%9E%B5)", UnescapeRule::NORMAL,
+ "(%E1%85%9F)(%E1%85%A0)(%E1%9E%B4)(%E1%9E%B5)"},
+ {"(%E1%A0%8B)(%E1%A0%8C)(%E1%A0%8D)(%E1%A0%8E)", UnescapeRule::NORMAL,
+ "(%E1%A0%8B)(%E1%A0%8C)(%E1%A0%8D)(%E1%A0%8E)"},
+ {"(%E2%80%8B)(%E2%80%8C)(%E2%80%8D)(%E2%81%A0)", UnescapeRule::NORMAL,
+ "(%E2%80%8B)(%E2%80%8C)(%E2%80%8D)(%E2%81%A0)"},
+ {"(%E2%81%A1)(%E2%81%A2)(%E2%81%A3)(%E2%81%A4)", UnescapeRule::NORMAL,
+ "(%E2%81%A1)(%E2%81%A2)(%E2%81%A3)(%E2%81%A4)"},
+ {"(%E3%85%A4)(%EF%BB%BF)(%EF%BE%A0)(%EF%BF%B9)", UnescapeRule::NORMAL,
+ "(%E3%85%A4)(%EF%BB%BF)(%EF%BE%A0)(%EF%BF%B9)"},
+ {"(%EF%BF%BB)(%F0%91%82%BD)(%F0%91%83%8D)", UnescapeRule::NORMAL,
+ "(%EF%BF%BB)(%F0%91%82%BD)(%F0%91%83%8D)"},
+ {"(%F0%93%90%B0)(%F0%93%90%B8)", UnescapeRule::NORMAL,
+ "(%F0%93%90%B0)(%F0%93%90%B8)"},
+ // General Punctuation - Deprecated (U+206A--206F)
+ {"(%E2%81%AA)(%E2%81%AD)(%E2%81%AF)", UnescapeRule::NORMAL,
+ "(%E2%81%AA)(%E2%81%AD)(%E2%81%AF)"},
+ // Variation selectors (U+FE00--FE0F)
+ {"(%EF%B8%80)(%EF%B8%8C)(%EF%B8%8D)", UnescapeRule::NORMAL,
+ "(%EF%B8%80)(%EF%B8%8C)(%EF%B8%8D)"},
+ // Shorthand format controls (U+1BCA0--1BCA3)
+ {"(%F0%9B%B2%A0)(%F0%9B%B2%A1)(%F0%9B%B2%A3)", UnescapeRule::NORMAL,
+ "(%F0%9B%B2%A0)(%F0%9B%B2%A1)(%F0%9B%B2%A3)"},
+ // Musical symbols beams and slurs (U+1D173--1D17A)
+ {"(%F0%9D%85%B3)(%F0%9D%85%B9)(%F0%9D%85%BA)", UnescapeRule::NORMAL,
+ "(%F0%9D%85%B3)(%F0%9D%85%B9)(%F0%9D%85%BA)"},
+ // Tags block (U+E0000--E007F), includes unassigned points
+ {"(%F3%A0%80%80)(%F3%A0%80%81)(%F3%A0%81%8F)", UnescapeRule::NORMAL,
+ "(%F3%A0%80%80)(%F3%A0%80%81)(%F3%A0%81%8F)"},
+ // Ideographic-specific variation selectors (U+E0100--E01EF)
+ {"(%F3%A0%84%80)(%F3%A0%84%90)(%F3%A0%87%AF)", UnescapeRule::NORMAL,
+ "(%F3%A0%84%80)(%F3%A0%84%90)(%F3%A0%87%AF)"},
+
// Two spoofing characters in a row should not be unescaped.
{"%D8%9C%D8%9C", UnescapeRule::NORMAL, "%D8%9C%D8%9C"},
// Non-spoofing characters surrounded by spoofing characters should be