summaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
authorian <ian@138bc75d-0d04-0410-961f-82ee72b054a4>2015-01-15 00:27:56 +0000
committerian <ian@138bc75d-0d04-0410-961f-82ee72b054a4>2015-01-15 00:27:56 +0000
commitf11c215565ea0c863197b9666581d69c225619c2 (patch)
tree58a1724fee16d2b03c65678c4dd9b50bb97137a9 /libgo/go/regexp
parent2bbc72db3287d2bff87b9640e85830337aac0672 (diff)
downloadgcc-f11c215565ea0c863197b9666581d69c225619c2.tar.gz
libgo, compiler: Upgrade libgo to Go 1.4, except for runtime.
This upgrades all of libgo other than the runtime package to the Go 1.4 release. In Go 1.4 much of the runtime was rewritten into Go. Merging that code will take more time and will not change the API, so I'm putting it off for now. There are a few runtime changes anyhow, to accomodate other packages that rely on minor modifications to the runtime support. The compiler changes slightly to add a one-bit flag to each type descriptor kind that is stored directly in an interface, which for gccgo is currently only pointer types. Another one-bit flag (gcprog) is reserved because it is used by the gc compiler, but gccgo does not currently use it. There is another error check in the compiler since I ran across it during testing. gotools/: * Makefile.am (go_cmd_go_files): Sort entries. Add generate.go. * Makefile.in: Rebuild. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@219627 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go15
-rw-r--r--libgo/go/regexp/onepass.go7
-rw-r--r--libgo/go/regexp/regexp.go2
-rw-r--r--libgo/go/regexp/syntax/doc.go48
-rw-r--r--libgo/go/regexp/syntax/parse.go41
-rw-r--r--libgo/go/regexp/syntax/parse_test.go13
-rw-r--r--libgo/go/regexp/syntax/regexp.go2
7 files changed, 95 insertions, 33 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index 301a1dfcd83..01ea3742a8b 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -6,6 +6,7 @@ package regexp
import (
"reflect"
+ "regexp/syntax"
"strings"
"testing"
)
@@ -473,9 +474,19 @@ func TestSplit(t *testing.T) {
}
}
-// This ran out of stack before issue 7608 was fixed.
+// Check that one-pass cutoff does trigger.
func TestOnePassCutoff(t *testing.T) {
- MustCompile(`^(?:x{1,1000}){1,1000}$`)
+ re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl)
+ if err != nil {
+ t.Fatalf("parse: %v", err)
+ }
+ p, err := syntax.Compile(re.Simplify())
+ if err != nil {
+ t.Fatalf("compile: %v", err)
+ }
+ if compileOnePass(p) != notOnePass {
+ t.Fatalf("makeOnePass succeeded; wanted notOnePass")
+ }
}
func BenchmarkLiteral(b *testing.B) {
diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go
index 501fb28af66..e6f42856387 100644
--- a/libgo/go/regexp/onepass.go
+++ b/libgo/go/regexp/onepass.go
@@ -1,4 +1,6 @@
// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
package regexp
@@ -9,9 +11,6 @@ import (
"unicode"
)
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
// "One-pass" regexp execution.
// Some regexps can be analyzed to determine that they never need
// backtracking: they are guaranteed to run in one pass over the string
@@ -484,7 +483,7 @@ func makeOnePass(p *onePassProg) *onePassProg {
}
}
if p != notOnePass {
- for i, _ := range p.Inst {
+ for i := range p.Inst {
p.Inst[i].Rune = onePassRunes[i]
}
}
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index 0b8336a04fb..b615acdf0e5 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -452,7 +452,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string {
return string(b)
}
-// ReplaceAllStringLiteral returns a copy of src, replacing matches of the Regexp
+// ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp
// with the replacement string repl. The replacement repl is substituted directly,
// without using Expand.
func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
diff --git a/libgo/go/regexp/syntax/doc.go b/libgo/go/regexp/syntax/doc.go
index 8e72c90d3eb..e5e71f14f59 100644
--- a/libgo/go/regexp/syntax/doc.go
+++ b/libgo/go/regexp/syntax/doc.go
@@ -21,8 +21,8 @@ Single characters:
[^xyz] negated character class
\d Perl character class
\D negated Perl character class
- [:alpha:] ASCII character class
- [:^alpha:] negated ASCII character class
+ [[:alpha:]] ASCII character class
+ [[:^alpha:]] negated ASCII character class
\pN Unicode character class (one-letter name)
\p{Greek} Unicode character class
\PN negated Unicode character class (one-letter name)
@@ -46,14 +46,14 @@ Repetitions:
x{n,}? n or more x, prefer fewer
x{n}? exactly n x
-Implementation restriction: The counting forms x{n} etc. (but not the other
-forms x* etc.) have an upper limit of n=1000. Negative or higher explicit
-counts yield the parse error ErrInvalidRepeatSize.
+Implementation restriction: The counting forms x{n,m}, x{n,}, and x{n}
+reject forms that create a minimum or maximum repetition count above 1000.
+Unlimited repetitions are not subject to this restriction.
Grouping:
(re) numbered capturing group (submatch)
(?P<name>re) named & numbered capturing group (submatch)
- (?:re) non-capturing group (submatch)
+ (?:re) non-capturing group
(?flags) set flags within current group; non-capturing
(?flags:re) set flags during re; non-capturing
@@ -69,7 +69,7 @@ Empty strings:
$ at end of text (like \z not \Z) or line (flag m=true)
\A at beginning of text
\b at ASCII word boundary (\w on one side and \W, \A, or \z on the other)
- \B not an ASCII word boundary
+ \B not at ASCII word boundary
\z at end of text
Escape sequences:
@@ -103,29 +103,29 @@ Named character classes as character class elements:
[\p{Name}] named Unicode property inside character class (== \p{Name})
[^\p{Name}] named Unicode property inside negated character class (== \P{Name})
-Perl character classes:
+Perl character classes (all ASCII-only):
\d digits (== [0-9])
\D not digits (== [^0-9])
\s whitespace (== [\t\n\f\r ])
\S not whitespace (== [^\t\n\f\r ])
- \w ASCII word characters (== [0-9A-Za-z_])
- \W not ASCII word characters (== [^0-9A-Za-z_])
+ \w word characters (== [0-9A-Za-z_])
+ \W not word characters (== [^0-9A-Za-z_])
ASCII character classes:
- [:alnum:] alphanumeric (== [0-9A-Za-z])
- [:alpha:] alphabetic (== [A-Za-z])
- [:ascii:] ASCII (== [\x00-\x7F])
- [:blank:] blank (== [\t ])
- [:cntrl:] control (== [\x00-\x1F\x7F])
- [:digit:] digits (== [0-9])
- [:graph:] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])
- [:lower:] lower case (== [a-z])
- [:print:] printable (== [ -~] == [ [:graph:]])
- [:punct:] punctuation (== [!-/:-@[-`{-~])
- [:space:] whitespace (== [\t\n\v\f\r ])
- [:upper:] upper case (== [A-Z])
- [:word:] word characters (== [0-9A-Za-z_])
- [:xdigit:] hex digit (== [0-9A-Fa-f])
+ [[:alnum:]] alphanumeric (== [0-9A-Za-z])
+ [[:alpha:]] alphabetic (== [A-Za-z])
+ [[:ascii:]] ASCII (== [\x00-\x7F])
+ [[:blank:]] blank (== [\t ])
+ [[:cntrl:]] control (== [\x00-\x1F\x7F])
+ [[:digit:]] digits (== [0-9])
+ [[:graph:]] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])
+ [[:lower:]] lower case (== [a-z])
+ [[:print:]] printable (== [ -~] == [ [:graph:]])
+ [[:punct:]] punctuation (== [!-/:-@[-`{-~])
+ [[:space:]] whitespace (== [\t\n\v\f\r ])
+ [[:upper:]] upper case (== [A-Z])
+ [[:word:]] word characters (== [0-9A-Za-z_])
+ [[:xdigit:]] hex digit (== [0-9A-Fa-f])
*/
package syntax
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go
index cb25dca3956..d579a4069b1 100644
--- a/libgo/go/regexp/syntax/parse.go
+++ b/libgo/go/regexp/syntax/parse.go
@@ -244,6 +244,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
if sub.Op >= opPseudo {
return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
}
+
re := p.newRegexp(op)
re.Min = min
re.Max = max
@@ -251,9 +252,47 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
re.Sub = re.Sub0[:1]
re.Sub[0] = sub
p.stack[n-1] = re
+
+ if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
+ return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
+ }
+
return after, nil
}
+// repeatIsValid reports whether the repetition re is valid.
+// Valid means that the combination of the top-level repetition
+// and any inner repetitions does not exceed n copies of the
+// innermost thing.
+// This function rewalks the regexp tree and is called for every repetition,
+// so we have to worry about inducing quadratic behavior in the parser.
+// We avoid this by only calling repeatIsValid when min or max >= 2.
+// In that case the depth of any >= 2 nesting can only get to 9 without
+// triggering a parse error, so each subtree can only be rewalked 9 times.
+func repeatIsValid(re *Regexp, n int) bool {
+ if re.Op == OpRepeat {
+ m := re.Max
+ if m == 0 {
+ return true
+ }
+ if m < 0 {
+ m = re.Min
+ }
+ if m > n {
+ return false
+ }
+ if m > 0 {
+ n /= m
+ }
+ }
+ for _, sub := range re.Sub {
+ if !repeatIsValid(sub, n) {
+ return false
+ }
+ }
+ return true
+}
+
// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
func (p *parser) concat() *Regexp {
p.maybeConcat(-1, 0)
@@ -1639,7 +1678,7 @@ const (
// minimum and maximum runes involved in folding.
// checked during test.
minFold = 0x0041
- maxFold = 0x1044f
+ maxFold = 0x118df
)
// appendFoldedRange returns the result of appending the range lo-hi
diff --git a/libgo/go/regexp/syntax/parse_test.go b/libgo/go/regexp/syntax/parse_test.go
index f3089294c6a..c4a1117ff86 100644
--- a/libgo/go/regexp/syntax/parse_test.go
+++ b/libgo/go/regexp/syntax/parse_test.go
@@ -200,6 +200,10 @@ var parseTests = []parseTest{
`cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
{`x{2}y|x{2}[0-9]y`,
`cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
+
+ // Valid repetitions.
+ {`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
+ {`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},
}
const testFlags = MatchNL | PerlX | UnicodeGroups
@@ -262,6 +266,10 @@ func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
continue
}
+ if tt.Dump == "" {
+ // It parsed. That's all we care about.
+ continue
+ }
d := dump(re)
if d != tt.Dump {
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
@@ -470,6 +478,7 @@ var invalidRegexps = []string{
`(?i)[a-Z]`,
`a{100000}`,
`a{100000,}`,
+ "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
}
var onlyPerl = []string{
@@ -527,6 +536,10 @@ func TestToStringEquivalentParse(t *testing.T) {
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
continue
}
+ if tt.Dump == "" {
+ // It parsed. That's all we care about.
+ continue
+ }
d := dump(re)
if d != tt.Dump {
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
diff --git a/libgo/go/regexp/syntax/regexp.go b/libgo/go/regexp/syntax/regexp.go
index 329a90e0129..cea7d9e04fe 100644
--- a/libgo/go/regexp/syntax/regexp.go
+++ b/libgo/go/regexp/syntax/regexp.go
@@ -39,7 +39,7 @@ const (
OpEmptyMatch // matches empty string
OpLiteral // matches Runes sequence
OpCharClass // matches Runes interpreted as range pair list
- OpAnyCharNotNL // matches any character
+ OpAnyCharNotNL // matches any character except newline
OpAnyChar // matches any character
OpBeginLine // matches empty string at beginning of line
OpEndLine // matches empty string at end of line