summaryrefslogtreecommitdiff
path: root/src/regexp/regexp.go
diff options
context:
space:
mode:
authorMartin Möhrmann <moehrmann@google.com>2017-03-04 07:18:26 +0100
committerBrad Fitzpatrick <bradfitz@golang.org>2017-03-23 00:08:20 +0000
commite74c6cd3c05fda74fc8cac7a24b22b8b55a2239d (patch)
tree9e22276360a350d1771840bccd91679480f809ee /src/regexp/regexp.go
parent8a16d7d40a371e61f6d30604224039cf9a46d106 (diff)
downloadgo-git-e74c6cd3c05fda74fc8cac7a24b22b8b55a2239d.tar.gz
regexp: add ASCII fast path for context methods
The step method implementations check directly if the next rune only needs one byte to be decoded and avoid calling utf8.DecodeRune for such ASCII characters. Introduce the same fast path optimization for rune decoding for the context methods. Results for regexp benchmarks that use the context methods: name old time/op new time/op delta AnchoredLiteralShortNonMatch-4 97.5ns ± 1% 94.8ns ± 2% -2.80% (p=0.000 n=45+43) AnchoredShortMatch-4 163ns ± 1% 160ns ± 1% -1.84% (p=0.000 n=46+47) NotOnePassShortA-4 742ns ± 2% 742ns ± 2% ~ (p=0.440 n=49+50) NotOnePassShortB-4 535ns ± 1% 533ns ± 2% -0.37% (p=0.005 n=46+48) OnePassLongPrefix-4 169ns ± 2% 166ns ± 2% -2.06% (p=0.000 n=50+49) Change-Id: Ib302d9e8c63333f02695369fcf9963974362e335 Reviewed-on: https://go-review.googlesource.com/38256 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/regexp/regexp.go')
-rw-r--r--src/regexp/regexp.go32
1 files changed, 24 insertions, 8 deletions
diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go
index 01093d4bd0..4b34d53c8a 100644
--- a/src/regexp/regexp.go
+++ b/src/regexp/regexp.go
@@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int {
func (i *inputString) context(pos int) syntax.EmptyOp {
r1, r2 := endOfText, endOfText
- if pos > 0 && pos <= len(i.str) {
- r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
+ // 0 < pos && pos <= len(i.str)
+ if uint(pos-1) < uint(len(i.str)) {
+ r1 = rune(i.str[pos-1])
+ if r1 >= utf8.RuneSelf {
+ r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
+ }
}
- if pos < len(i.str) {
- r2, _ = utf8.DecodeRuneInString(i.str[pos:])
+ // 0 <= pos && pos < len(i.str)
+ if uint(pos) < uint(len(i.str)) {
+ r2 = rune(i.str[pos])
+ if r2 >= utf8.RuneSelf {
+ r2, _ = utf8.DecodeRuneInString(i.str[pos:])
+ }
}
return syntax.EmptyOpContext(r1, r2)
}
@@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
func (i *inputBytes) context(pos int) syntax.EmptyOp {
r1, r2 := endOfText, endOfText
- if pos > 0 && pos <= len(i.str) {
- r1, _ = utf8.DecodeLastRune(i.str[:pos])
+ // 0 < pos && pos <= len(i.str)
+ if uint(pos-1) < uint(len(i.str)) {
+ r1 = rune(i.str[pos-1])
+ if r1 >= utf8.RuneSelf {
+ r1, _ = utf8.DecodeLastRune(i.str[:pos])
+ }
}
- if pos < len(i.str) {
- r2, _ = utf8.DecodeRune(i.str[pos:])
+ // 0 <= pos && pos < len(i.str)
+ if uint(pos) < uint(len(i.str)) {
+ r2 = rune(i.str[pos])
+ if r2 >= utf8.RuneSelf {
+ r2, _ = utf8.DecodeRune(i.str[pos:])
+ }
}
return syntax.EmptyOpContext(r1, r2)
}