regexp: add ASCII fast path for context methods

The step method implementations check directly if the next rune only needs one byte to be decoded and avoid calling utf8.DecodeRune for such ASCII characters. Introduce the same fast path optimization for rune decoding for the context methods. Results for regexp benchmarks that use the context methods: name old time/op new time/op delta AnchoredLiteralShortNonMatch-4 97.5ns ± 1% 94.8ns ± 2% -2.80% (p=0.000 n=45+43) AnchoredShortMatch-4 163ns ± 1% 160ns ± 1% -1.84% (p=0.000 n=46+47) NotOnePassShortA-4 742ns ± 2% 742ns ± 2% ~ (p=0.440 n=49+50) NotOnePassShortB-4 535ns ± 1% 533ns ± 2% -0.37% (p=0.005 n=46+48) OnePassLongPrefix-4 169ns ± 2% 166ns ± 2% -2.06% (p=0.000 n=50+49) Change-Id: Ib302d9e8c63333f02695369fcf9963974362e335 Reviewed-on: https://go-review.googlesource.com/38256 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
author: Martin Möhrmann <moehrmann@google.com> 2017-03-04 07:18:26 +0100
committer: Brad Fitzpatrick <bradfitz@golang.org> 2017-03-23 00:08:20 +0000
commit: e74c6cd3c05fda74fc8cac7a24b22b8b55a2239d (patch)
tree: 9e22276360a350d1771840bccd91679480f809ee /src/regexp/regexp.go
parent: 8a16d7d40a371e61f6d30604224039cf9a46d106 (diff)
download: go-git-e74c6cd3c05fda74fc8cac7a24b22b8b55a2239d.tar.gz
1 files changed, 24 insertions, 8 deletions
diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go
index 01093d4bd0..4b34d53c8a 100644
--- a/src/regexp/regexp.go
+++ b/src/regexp/regexp.go
@@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int {
 
 func (i *inputString) context(pos int) syntax.EmptyOp {
 	r1, r2 := endOfText, endOfText
-	if pos > 0 && pos <= len(i.str) {
-		r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
+	// 0 < pos && pos <= len(i.str)
+	if uint(pos-1) < uint(len(i.str)) {
+		r1 = rune(i.str[pos-1])
+		if r1 >= utf8.RuneSelf {
+			r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
+		}
 	}
-	if pos < len(i.str) {
-		r2, _ = utf8.DecodeRuneInString(i.str[pos:])
+	// 0 <= pos && pos < len(i.str)
+	if uint(pos) < uint(len(i.str)) {
+		r2 = rune(i.str[pos])
+		if r2 >= utf8.RuneSelf {
+			r2, _ = utf8.DecodeRuneInString(i.str[pos:])
+		}
 	}
 	return syntax.EmptyOpContext(r1, r2)
 }
@@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
 
 func (i *inputBytes) context(pos int) syntax.EmptyOp {
 	r1, r2 := endOfText, endOfText
-	if pos > 0 && pos <= len(i.str) {
-		r1, _ = utf8.DecodeLastRune(i.str[:pos])
+	// 0 < pos && pos <= len(i.str)
+	if uint(pos-1) < uint(len(i.str)) {
+		r1 = rune(i.str[pos-1])
+		if r1 >= utf8.RuneSelf {
+			r1, _ = utf8.DecodeLastRune(i.str[:pos])
+		}
 	}
-	if pos < len(i.str) {
-		r2, _ = utf8.DecodeRune(i.str[pos:])
+	// 0 <= pos && pos < len(i.str)
+	if uint(pos) < uint(len(i.str)) {
+		r2 = rune(i.str[pos])
+		if r2 >= utf8.RuneSelf {
+			r2, _ = utf8.DecodeRune(i.str[pos:])
+		}
 	}
 	return syntax.EmptyOpContext(r1, r2)
 }
author	Martin Möhrmann <moehrmann@google.com>	2017-03-04 07:18:26 +0100
committer	Brad Fitzpatrick <bradfitz@golang.org>	2017-03-23 00:08:20 +0000
commit	e74c6cd3c05fda74fc8cac7a24b22b8b55a2239d (patch)
tree	9e22276360a350d1771840bccd91679480f809ee /src/regexp/regexp.go
parent	8a16d7d40a371e61f6d30604224039cf9a46d106 (diff)
download	go-git-e74c6cd3c05fda74fc8cac7a24b22b8b55a2239d.tar.gz