diff options
author | Martin Möhrmann <moehrmann@google.com> | 2017-03-06 09:34:39 +0100 |
---|---|---|
committer | Martin Möhrmann <moehrmann@google.com> | 2017-04-04 06:26:11 +0000 |
commit | bebfd4ba415cbfee578f64177fe1c59dab5a1df8 (patch) | |
tree | aea39e71a38f793ff5b62c628af35c1ba0ecf2ea /src/strings/strings_test.go | |
parent | 5cadc91b3ced9614b1055c448f7784a15907fff5 (diff) | |
download | go-git-bebfd4ba415cbfee578f64177fe1c59dab5a1df8.tar.gz |
strings: speed up Fields
- use a string lookup to detect if a single byte is a space character
- determine the exact number of fields for ASCII and
a possibly underestimated number of fields for non ASCII strings
by doing a separate byte for byte scan of the input string
before collecting the fields in an extra pass
- provide a fast path for ASCII only strings when collecting the fields
- avoid utf8.DecodeRuneInString and unicode.IsSpace for ASCII characters
Used golang.org/cl/33108 from Joe Tsai as starting point.
name old time/op new time/op delta
Fields/ASCII/16 284ns ± 1% 116ns ± 2% -59.30% (p=0.000 n=9+10)
Fields/ASCII/256 3.81µs ± 1% 0.80µs ± 1% -79.10% (p=0.000 n=10+10)
Fields/ASCII/4096 61.4µs ± 1% 12.3µs ± 1% -79.96% (p=0.000 n=10+9)
Fields/ASCII/65536 982µs ± 1% 235µs ± 0% -76.04% (p=0.000 n=10+9)
Fields/ASCII/1048576 16.7ms ± 2% 5.4ms ± 1% -67.52% (p=0.000 n=10+10)
Fields/Mixed/16 314ns ± 1% 168ns ± 1% -46.33% (p=0.000 n=9+10)
Fields/Mixed/256 3.92µs ± 1% 1.17µs ± 1% -70.19% (p=0.000 n=10+10)
Fields/Mixed/4096 69.1µs ± 1% 19.0µs ± 1% -72.53% (p=0.000 n=10+10)
Fields/Mixed/65536 1.12ms ± 1% 0.39ms ± 0% -65.37% (p=0.000 n=10+9)
Fields/Mixed/1048576 19.0ms ± 2% 7.3ms ± 4% -61.75% (p=0.000 n=10+9)
name old speed new speed delta
Fields/ASCII/16 56.3MB/s ± 1% 138.1MB/s ± 2% +145.31% (p=0.000 n=9+10)
Fields/ASCII/256 67.1MB/s ± 1% 321.0MB/s ± 1% +378.26% (p=0.000 n=10+10)
Fields/ASCII/4096 66.7MB/s ± 1% 333.0MB/s ± 1% +398.97% (p=0.000 n=10+9)
Fields/ASCII/65536 66.7MB/s ± 1% 278.4MB/s ± 0% +317.39% (p=0.000 n=10+9)
Fields/ASCII/1048576 62.7MB/s ± 2% 192.9MB/s ± 1% +207.82% (p=0.000 n=10+10)
Fields/Mixed/16 51.0MB/s ± 2% 94.9MB/s ± 1% +85.87% (p=0.000 n=10+10)
Fields/Mixed/256 65.4MB/s ± 1% 219.2MB/s ± 1% +235.33% (p=0.000 n=10+10)
Fields/Mixed/4096 59.3MB/s ± 1% 215.7MB/s ± 1% +263.98% (p=0.000 n=10+10)
Fields/Mixed/65536 58.6MB/s ± 1% 169.1MB/s ± 0% +188.73% (p=0.000 n=10+9)
Fields/Mixed/1048576 55.1MB/s ± 2% 144.0MB/s ± 4% +161.44% (p=0.000 n=10+9)
Updates #19789
Updates #17856
Change-Id: If2ce1479542702e9cd65a82a462ba55ac8eb3876
Reviewed-on: https://go-review.googlesource.com/37959
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
Diffstat (limited to 'src/strings/strings_test.go')
-rw-r--r-- | src/strings/strings_test.go | 54 |
1 files changed, 47 insertions, 7 deletions
diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index 97041eb9ac..58314a6868 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -452,6 +452,7 @@ var fieldstests = []FieldsTest{ {"", []string{}}, {" ", []string{}}, {" \t ", []string{}}, + {"\u2000", []string{}}, {" abc ", []string{"abc"}}, {"1 2 3 4", []string{"1", "2", "3", "4"}}, {"1 2 3 4", []string{"1", "2", "3", "4"}}, @@ -459,6 +460,9 @@ var fieldstests = []FieldsTest{ {"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}}, {"\u2000\u2001\u2002", []string{}}, {"\n™\t™\n", []string{"™", "™"}}, + {"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}}, + {"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}}, + {"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}}, {faces, []string{faces}}, } @@ -1473,19 +1477,55 @@ var makeFieldsInput = func() string { return string(x) } -var fieldsInput = makeFieldsInput() +var makeFieldsInputASCII = func() string { + x := make([]byte, 1<<20) + // Input is ~10% space, rest ASCII non-space. + for i := range x { + if rand.Intn(10) == 0 { + x[i] = ' ' + } else { + x[i] = 'x' + } + } + return string(x) +} + +var stringdata = []struct{ name, data string }{ + {"ASCII", makeFieldsInputASCII()}, + {"Mixed", makeFieldsInput()}, +} func BenchmarkFields(b *testing.B) { - b.SetBytes(int64(len(fieldsInput))) - for i := 0; i < b.N; i++ { - Fields(fieldsInput) + for _, sd := range stringdata { + b.Run(sd.name, func(b *testing.B) { + for j := 1 << 4; j <= 1<<20; j <<= 4 { + b.Run(fmt.Sprintf("%d", j), func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(j)) + data := sd.data[:j] + for i := 0; i < b.N; i++ { + Fields(data) + } + }) + } + }) } } func BenchmarkFieldsFunc(b *testing.B) { - b.SetBytes(int64(len(fieldsInput))) - for i := 0; i < b.N; i++ { - FieldsFunc(fieldsInput, unicode.IsSpace) + for _, sd := range stringdata { + b.Run(sd.name, func(b *testing.B) { + for j := 1 << 4; j <= 1<<20; j <<= 4 { + b.Run(fmt.Sprintf("%d", j), func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(j)) + data := sd.data[:j] + for i := 0; i < b.N; i++ { + FieldsFunc(data, unicode.IsSpace) + } + }) + } + }) } } |