summaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go15
-rw-r--r--libgo/go/regexp/onepass.go7
-rw-r--r--libgo/go/regexp/regexp.go2
-rw-r--r--libgo/go/regexp/syntax/doc.go48
-rw-r--r--libgo/go/regexp/syntax/parse.go41
-rw-r--r--libgo/go/regexp/syntax/parse_test.go13
-rw-r--r--libgo/go/regexp/syntax/regexp.go2
7 files changed, 95 insertions, 33 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index 301a1dfcd83..01ea3742a8b 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -6,6 +6,7 @@ package regexp
import (
"reflect"
+ "regexp/syntax"
"strings"
"testing"
)
@@ -473,9 +474,19 @@ func TestSplit(t *testing.T) {
}
}
-// This ran out of stack before issue 7608 was fixed.
+// Check that one-pass cutoff does trigger.
func TestOnePassCutoff(t *testing.T) {
- MustCompile(`^(?:x{1,1000}){1,1000}$`)
+ re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl)
+ if err != nil {
+ t.Fatalf("parse: %v", err)
+ }
+ p, err := syntax.Compile(re.Simplify())
+ if err != nil {
+ t.Fatalf("compile: %v", err)
+ }
+ if compileOnePass(p) != notOnePass {
+ t.Fatalf("makeOnePass succeeded; wanted notOnePass")
+ }
}
func BenchmarkLiteral(b *testing.B) {
diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go
index 501fb28af66..e6f42856387 100644
--- a/libgo/go/regexp/onepass.go
+++ b/libgo/go/regexp/onepass.go
@@ -1,4 +1,6 @@
// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
package regexp
@@ -9,9 +11,6 @@ import (
"unicode"
)
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
// "One-pass" regexp execution.
// Some regexps can be analyzed to determine that they never need
// backtracking: they are guaranteed to run in one pass over the string
@@ -484,7 +483,7 @@ func makeOnePass(p *onePassProg) *onePassProg {
}
}
if p != notOnePass {
- for i, _ := range p.Inst {
+ for i := range p.Inst {
p.Inst[i].Rune = onePassRunes[i]
}
}
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index 0b8336a04fb..b615acdf0e5 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -452,7 +452,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string {
return string(b)
}
-// ReplaceAllStringLiteral returns a copy of src, replacing matches of the Regexp
+// ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp
// with the replacement string repl. The replacement repl is substituted directly,
// without using Expand.
func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
diff --git a/libgo/go/regexp/syntax/doc.go b/libgo/go/regexp/syntax/doc.go
index 8e72c90d3eb..e5e71f14f59 100644
--- a/libgo/go/regexp/syntax/doc.go
+++ b/libgo/go/regexp/syntax/doc.go
@@ -21,8 +21,8 @@ Single characters:
[^xyz] negated character class
\d Perl character class
\D negated Perl character class
- [:alpha:] ASCII character class
- [:^alpha:] negated ASCII character class
+ [[:alpha:]] ASCII character class
+ [[:^alpha:]] negated ASCII character class
\pN Unicode character class (one-letter name)
\p{Greek} Unicode character class
\PN negated Unicode character class (one-letter name)
@@ -46,14 +46,14 @@ Repetitions:
x{n,}? n or more x, prefer fewer
x{n}? exactly n x
-Implementation restriction: The counting forms x{n} etc. (but not the other
-forms x* etc.) have an upper limit of n=1000. Negative or higher explicit
-counts yield the parse error ErrInvalidRepeatSize.
+Implementation restriction: The counting forms x{n,m}, x{n,}, and x{n}
+reject forms that create a minimum or maximum repetition count above 1000.
+Unlimited repetitions are not subject to this restriction.
Grouping:
(re) numbered capturing group (submatch)
(?P<name>re) named & numbered capturing group (submatch)
- (?:re) non-capturing group (submatch)
+ (?:re) non-capturing group
(?flags) set flags within current group; non-capturing
(?flags:re) set flags during re; non-capturing
@@ -69,7 +69,7 @@ Empty strings:
$ at end of text (like \z not \Z) or line (flag m=true)
\A at beginning of text
\b at ASCII word boundary (\w on one side and \W, \A, or \z on the other)
- \B not an ASCII word boundary
+ \B not at ASCII word boundary
\z at end of text
Escape sequences:
@@ -103,29 +103,29 @@ Named character classes as character class elements:
[\p{Name}] named Unicode property inside character class (== \p{Name})
[^\p{Name}] named Unicode property inside negated character class (== \P{Name})
-Perl character classes:
+Perl character classes (all ASCII-only):
\d digits (== [0-9])
\D not digits (== [^0-9])
\s whitespace (== [\t\n\f\r ])
\S not whitespace (== [^\t\n\f\r ])
- \w ASCII word characters (== [0-9A-Za-z_])
- \W not ASCII word characters (== [^0-9A-Za-z_])
+ \w word characters (== [0-9A-Za-z_])
+ \W not word characters (== [^0-9A-Za-z_])
ASCII character classes:
- [:alnum:] alphanumeric (== [0-9A-Za-z])
- [:alpha:] alphabetic (== [A-Za-z])
- [:ascii:] ASCII (== [\x00-\x7F])
- [:blank:] blank (== [\t ])
- [:cntrl:] control (== [\x00-\x1F\x7F])
- [:digit:] digits (== [0-9])
- [:graph:] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])
- [:lower:] lower case (== [a-z])
- [:print:] printable (== [ -~] == [ [:graph:]])
- [:punct:] punctuation (== [!-/:-@[-`{-~])
- [:space:] whitespace (== [\t\n\v\f\r ])
- [:upper:] upper case (== [A-Z])
- [:word:] word characters (== [0-9A-Za-z_])
- [:xdigit:] hex digit (== [0-9A-Fa-f])
+ [[:alnum:]] alphanumeric (== [0-9A-Za-z])
+ [[:alpha:]] alphabetic (== [A-Za-z])
+ [[:ascii:]] ASCII (== [\x00-\x7F])
+ [[:blank:]] blank (== [\t ])
+ [[:cntrl:]] control (== [\x00-\x1F\x7F])
+ [[:digit:]] digits (== [0-9])
+ [[:graph:]] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])
+ [[:lower:]] lower case (== [a-z])
+ [[:print:]] printable (== [ -~] == [ [:graph:]])
+ [[:punct:]] punctuation (== [!-/:-@[-`{-~])
+ [[:space:]] whitespace (== [\t\n\v\f\r ])
+ [[:upper:]] upper case (== [A-Z])
+ [[:word:]] word characters (== [0-9A-Za-z_])
+ [[:xdigit:]] hex digit (== [0-9A-Fa-f])
*/
package syntax
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go
index cb25dca3956..d579a4069b1 100644
--- a/libgo/go/regexp/syntax/parse.go
+++ b/libgo/go/regexp/syntax/parse.go
@@ -244,6 +244,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
if sub.Op >= opPseudo {
return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
}
+
re := p.newRegexp(op)
re.Min = min
re.Max = max
@@ -251,9 +252,47 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
re.Sub = re.Sub0[:1]
re.Sub[0] = sub
p.stack[n-1] = re
+
+ if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
+ return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
+ }
+
return after, nil
}
+// repeatIsValid reports whether the repetition re is valid.
+// Valid means that the combination of the top-level repetition
+// and any inner repetitions does not exceed n copies of the
+// innermost thing.
+// This function rewalks the regexp tree and is called for every repetition,
+// so we have to worry about inducing quadratic behavior in the parser.
+// We avoid this by only calling repeatIsValid when min or max >= 2.
+// In that case the depth of any >= 2 nesting can only get to 9 without
+// triggering a parse error, so each subtree can only be rewalked 9 times.
+func repeatIsValid(re *Regexp, n int) bool {
+ if re.Op == OpRepeat {
+ m := re.Max
+ if m == 0 {
+ return true
+ }
+ if m < 0 {
+ m = re.Min
+ }
+ if m > n {
+ return false
+ }
+ if m > 0 {
+ n /= m
+ }
+ }
+ for _, sub := range re.Sub {
+ if !repeatIsValid(sub, n) {
+ return false
+ }
+ }
+ return true
+}
+
// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
func (p *parser) concat() *Regexp {
p.maybeConcat(-1, 0)
@@ -1639,7 +1678,7 @@ const (
// minimum and maximum runes involved in folding.
// checked during test.
minFold = 0x0041
- maxFold = 0x1044f
+ maxFold = 0x118df
)
// appendFoldedRange returns the result of appending the range lo-hi
diff --git a/libgo/go/regexp/syntax/parse_test.go b/libgo/go/regexp/syntax/parse_test.go
index f3089294c6a..c4a1117ff86 100644
--- a/libgo/go/regexp/syntax/parse_test.go
+++ b/libgo/go/regexp/syntax/parse_test.go
@@ -200,6 +200,10 @@ var parseTests = []parseTest{
`cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
{`x{2}y|x{2}[0-9]y`,
`cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
+
+ // Valid repetitions.
+ {`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
+ {`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},
}
const testFlags = MatchNL | PerlX | UnicodeGroups
@@ -262,6 +266,10 @@ func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
continue
}
+ if tt.Dump == "" {
+ // It parsed. That's all we care about.
+ continue
+ }
d := dump(re)
if d != tt.Dump {
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
@@ -470,6 +478,7 @@ var invalidRegexps = []string{
`(?i)[a-Z]`,
`a{100000}`,
`a{100000,}`,
+ "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
}
var onlyPerl = []string{
@@ -527,6 +536,10 @@ func TestToStringEquivalentParse(t *testing.T) {
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
continue
}
+ if tt.Dump == "" {
+ // It parsed. That's all we care about.
+ continue
+ }
d := dump(re)
if d != tt.Dump {
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
diff --git a/libgo/go/regexp/syntax/regexp.go b/libgo/go/regexp/syntax/regexp.go
index 329a90e0129..cea7d9e04fe 100644
--- a/libgo/go/regexp/syntax/regexp.go
+++ b/libgo/go/regexp/syntax/regexp.go
@@ -39,7 +39,7 @@ const (
OpEmptyMatch // matches empty string
OpLiteral // matches Runes sequence
OpCharClass // matches Runes interpreted as range pair list
- OpAnyCharNotNL // matches any character
+ OpAnyCharNotNL // matches any character except newline
OpAnyChar // matches any character
OpBeginLine // matches empty string at beginning of line
OpEndLine // matches empty string at end of line