diff options
author | Martin Möhrmann <moehrmann@google.com> | 2017-03-04 07:18:26 +0100 |
---|---|---|
committer | Brad Fitzpatrick <bradfitz@golang.org> | 2017-03-23 00:08:20 +0000 |
commit | e74c6cd3c05fda74fc8cac7a24b22b8b55a2239d (patch) | |
tree | 9e22276360a350d1771840bccd91679480f809ee /src/regexp/regexp.go | |
parent | 8a16d7d40a371e61f6d30604224039cf9a46d106 (diff) | |
download | go-git-e74c6cd3c05fda74fc8cac7a24b22b8b55a2239d.tar.gz |
regexp: add ASCII fast path for context methods
The step method implementations check directly if the next rune
only needs one byte to be decoded and avoid calling utf8.DecodeRune
for such ASCII characters.
Introduce the same fast path optimization for rune decoding
for the context methods.
Results for regexp benchmarks that use the context methods:
name old time/op new time/op delta
AnchoredLiteralShortNonMatch-4 97.5ns ± 1% 94.8ns ± 2% -2.80% (p=0.000 n=45+43)
AnchoredShortMatch-4 163ns ± 1% 160ns ± 1% -1.84% (p=0.000 n=46+47)
NotOnePassShortA-4 742ns ± 2% 742ns ± 2% ~ (p=0.440 n=49+50)
NotOnePassShortB-4 535ns ± 1% 533ns ± 2% -0.37% (p=0.005 n=46+48)
OnePassLongPrefix-4 169ns ± 2% 166ns ± 2% -2.06% (p=0.000 n=50+49)
Change-Id: Ib302d9e8c63333f02695369fcf9963974362e335
Reviewed-on: https://go-review.googlesource.com/38256
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/regexp/regexp.go')
-rw-r--r-- | src/regexp/regexp.go | 32 |
1 files changed, 24 insertions, 8 deletions
diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go index 01093d4bd0..4b34d53c8a 100644 --- a/src/regexp/regexp.go +++ b/src/regexp/regexp.go @@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int { func (i *inputString) context(pos int) syntax.EmptyOp { r1, r2 := endOfText, endOfText - if pos > 0 && pos <= len(i.str) { - r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) + // 0 < pos && pos <= len(i.str) + if uint(pos-1) < uint(len(i.str)) { + r1 = rune(i.str[pos-1]) + if r1 >= utf8.RuneSelf { + r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) + } } - if pos < len(i.str) { - r2, _ = utf8.DecodeRuneInString(i.str[pos:]) + // 0 <= pos && pos < len(i.str) + if uint(pos) < uint(len(i.str)) { + r2 = rune(i.str[pos]) + if r2 >= utf8.RuneSelf { + r2, _ = utf8.DecodeRuneInString(i.str[pos:]) + } } return syntax.EmptyOpContext(r1, r2) } @@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int { func (i *inputBytes) context(pos int) syntax.EmptyOp { r1, r2 := endOfText, endOfText - if pos > 0 && pos <= len(i.str) { - r1, _ = utf8.DecodeLastRune(i.str[:pos]) + // 0 < pos && pos <= len(i.str) + if uint(pos-1) < uint(len(i.str)) { + r1 = rune(i.str[pos-1]) + if r1 >= utf8.RuneSelf { + r1, _ = utf8.DecodeLastRune(i.str[:pos]) + } } - if pos < len(i.str) { - r2, _ = utf8.DecodeRune(i.str[pos:]) + // 0 <= pos && pos < len(i.str) + if uint(pos) < uint(len(i.str)) { + r2 = rune(i.str[pos]) + if r2 >= utf8.RuneSelf { + r2, _ = utf8.DecodeRune(i.str[pos:]) + } } return syntax.EmptyOpContext(r1, r2) } |