diff options
author | Ian Lance Taylor <iant@golang.org> | 2017-09-14 17:11:35 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2017-09-14 17:11:35 +0000 |
commit | bc998d034f45d1828a8663b2eed928faf22a7d01 (patch) | |
tree | 8d262a22ca7318f4bcd64269fe8fe9e45bcf8d0f /libgo/go/strings | |
parent | a41a6142df74219f596e612d3a7775f68ca6e96f (diff) | |
download | gcc-bc998d034f45d1828a8663b2eed928faf22a7d01.tar.gz |
libgo: update to go1.9
Reviewed-on: https://go-review.googlesource.com/63753
From-SVN: r252767
Diffstat (limited to 'libgo/go/strings')
-rw-r--r-- | libgo/go/strings/example_test.go | 47 | ||||
-rw-r--r-- | libgo/go/strings/replace_test.go | 41 | ||||
-rw-r--r-- | libgo/go/strings/strings.go | 274 | ||||
-rw-r--r-- | libgo/go/strings/strings_amd64.go | 43 | ||||
-rw-r--r-- | libgo/go/strings/strings_generic.go | 22 | ||||
-rw-r--r-- | libgo/go/strings/strings_s390x.go | 32 | ||||
-rw-r--r-- | libgo/go/strings/strings_test.go | 103 |
7 files changed, 452 insertions, 110 deletions
diff --git a/libgo/go/strings/example_test.go b/libgo/go/strings/example_test.go index 3f9d63b5a4a..e9621522ef2 100644 --- a/libgo/go/strings/example_test.go +++ b/libgo/go/strings/example_test.go @@ -23,6 +23,16 @@ func ExampleFieldsFunc() { // Output: Fields are: ["foo1" "bar2" "baz3"] } +func ExampleCompare() { + fmt.Println(strings.Compare("a", "b")) + fmt.Println(strings.Compare("a", "a")) + fmt.Println(strings.Compare("b", "a")) + // Output: + // -1 + // 0 + // 1 +} + func ExampleContains() { fmt.Println(strings.Contains("seafood", "foo")) fmt.Println(strings.Contains("seafood", "bar")) @@ -47,6 +57,16 @@ func ExampleContainsAny() { // false } +func ExampleContainsRune() { + // Finds whether a string contains a particular Unicode code point. + // The code point for the lowercase letter "a", for example, is 97. + fmt.Println(strings.ContainsRune("aardvark", 97)) + fmt.Println(strings.ContainsRune("timeout", 97)) + // Output: + // true + // false +} + func ExampleCount() { fmt.Println(strings.Count("cheese", "e")) fmt.Println(strings.Count("five", "")) // before & after each rune @@ -109,6 +129,15 @@ func ExampleIndexAny() { // -1 } +func ExampleIndexByte() { + fmt.Println(strings.IndexByte("golang", 'g')) + fmt.Println(strings.IndexByte("gophers", 'h')) + fmt.Println(strings.IndexByte("golang", 'x')) + // Output: + // 0 + // 3 + // -1 +} func ExampleIndexRune() { fmt.Println(strings.IndexRune("chicken", 'k')) fmt.Println(strings.IndexRune("chicken", 'd')) @@ -127,6 +156,16 @@ func ExampleLastIndex() { // -1 } +func ExampleLastIndexAny() { + fmt.Println(strings.LastIndexAny("go gopher", "go")) + fmt.Println(strings.LastIndexAny("go gopher", "rodent")) + fmt.Println(strings.LastIndexAny("go gopher", "fail")) + // Output: + // 4 + // 8 + // -1 +} + func ExampleJoin() { s := []string{"foo", "bar", "baz"} fmt.Println(strings.Join(s, ", ")) @@ -195,6 +234,14 @@ func ExampleTrim() { // Output: ["Achtung! Achtung"] } +func ExampleTrimFunc() { + f := func(c rune) bool { + return !unicode.IsLetter(c) && !unicode.IsNumber(c) + } + fmt.Printf("[%q]", strings.TrimFunc(" Achtung1! Achtung2,...", f)) + // Output: ["Achtung1! Achtung2"] +} + func ExampleMap() { rot13 := func(r rune) rune { switch { diff --git a/libgo/go/strings/replace_test.go b/libgo/go/strings/replace_test.go index 77e48b988bc..34b5badfadf 100644 --- a/libgo/go/strings/replace_test.go +++ b/libgo/go/strings/replace_test.go @@ -540,3 +540,44 @@ func BenchmarkByteByteMap(b *testing.B) { Map(fn, str) } } + +var mapdata = []struct{ name, data string }{ + {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"}, + {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"}, +} + +func BenchmarkMap(b *testing.B) { + mapidentity := func(r rune) rune { + return r + } + + b.Run("identity", func(b *testing.B) { + for _, md := range mapdata { + b.Run(md.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + Map(mapidentity, md.data) + } + }) + } + }) + + mapchange := func(r rune) rune { + if 'a' <= r && r <= 'z' { + return r + 'A' - 'a' + } + if 'α' <= r && r <= 'ω' { + return r + 'Α' - 'α' + } + return r + } + + b.Run("change", func(b *testing.B) { + for _, md := range mapdata { + b.Run(md.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + Map(mapchange, md.data) + } + }) + } + }) +} diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go index 60a281a6ac5..0c836c09d46 100644 --- a/libgo/go/strings/strings.go +++ b/libgo/go/strings/strings.go @@ -72,22 +72,20 @@ func hashStrRev(sep string) (uint32, uint32) { return hash, pow } -// Count counts the number of non-overlapping instances of sep in s. -// If sep is an empty string, Count returns 1 + the number of Unicode code points in s. -func Count(s, sep string) int { - n := 0 - // special cases - if len(sep) == 0 { +// countGeneric implements Count. +func countGeneric(s, substr string) int { + // special case + if len(substr) == 0 { return utf8.RuneCountInString(s) + 1 } - offset := 0 + n := 0 for { - i := Index(s[offset:], sep) + i := Index(s, substr) if i == -1 { return n } n++ - offset += i + len(sep) + s = s[i+len(substr):] } } @@ -106,16 +104,16 @@ func ContainsRune(s string, r rune) bool { return IndexRune(s, r) >= 0 } -// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. -func LastIndex(s, sep string) int { - n := len(sep) +// LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s. +func LastIndex(s, substr string) int { + n := len(substr) switch { case n == 0: return len(s) case n == 1: - return LastIndexByte(s, sep[0]) + return LastIndexByte(s, substr[0]) case n == len(s): - if sep == s { + if substr == s { return 0 } return -1 @@ -123,20 +121,20 @@ func LastIndex(s, sep string) int { return -1 } // Rabin-Karp search from the end of the string - hashsep, pow := hashStrRev(sep) + hashss, pow := hashStrRev(substr) last := len(s) - n var h uint32 for i := len(s) - 1; i >= last; i-- { h = h*primeRK + uint32(s[i]) } - if h == hashsep && s[last:] == sep { + if h == hashss && s[last:] == substr { return last } for i := last - 1; i >= 0; i-- { h *= primeRK h += uint32(s[i]) h -= pow * uint32(s[i+n]) - if h == hashsep && s[i:i+n] == sep { + if h == hashss && s[i:i+n] == substr { return i } } @@ -240,61 +238,187 @@ func genSplit(s, sep string, sepSave, n int) []string { if n < 0 { n = Count(s, sep) + 1 } - c := sep[0] - start := 0 + a := make([]string, n) - na := 0 - for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { - if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) { - a[na] = s[start : i+sepSave] - na++ - start = i + len(sep) - i += len(sep) - 1 - } + n-- + i := 0 + for i < n { + m := Index(s, sep) + if m < 0 { + break + } + a[i] = s[:m+sepSave] + s = s[m+len(sep):] + i++ } - a[na] = s[start:] - return a[0 : na+1] + a[i] = s + return a[:i+1] } // SplitN slices s into substrings separated by sep and returns a slice of // the substrings between those separators. -// If sep is empty, SplitN splits after each UTF-8 sequence. +// // The count determines the number of substrings to return: // n > 0: at most n substrings; the last substring will be the unsplit remainder. // n == 0: the result is nil (zero substrings) // n < 0: all substrings +// +// Edge cases for s and sep (for example, empty strings) are handled +// as described in the documentation for Split. func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } // SplitAfterN slices s into substrings after each instance of sep and // returns a slice of those substrings. -// If sep is empty, SplitAfterN splits after each UTF-8 sequence. +// // The count determines the number of substrings to return: // n > 0: at most n substrings; the last substring will be the unsplit remainder. // n == 0: the result is nil (zero substrings) // n < 0: all substrings +// +// Edge cases for s and sep (for example, empty strings) are handled +// as described in the documentation for SplitAfter. func SplitAfterN(s, sep string, n int) []string { return genSplit(s, sep, len(sep), n) } // Split slices s into all substrings separated by sep and returns a slice of // the substrings between those separators. -// If sep is empty, Split splits after each UTF-8 sequence. +// +// If s does not contain sep and sep is not empty, Split returns a +// slice of length 1 whose only element is s. +// +// If sep is empty, Split splits after each UTF-8 sequence. If both s +// and sep are empty, Split returns an empty slice. +// // It is equivalent to SplitN with a count of -1. func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } // SplitAfter slices s into all substrings after each instance of sep and // returns a slice of those substrings. -// If sep is empty, SplitAfter splits after each UTF-8 sequence. +// +// If s does not contain sep and sep is not empty, SplitAfter returns +// a slice of length 1 whose only element is s. +// +// If sep is empty, SplitAfter splits after each UTF-8 sequence. If +// both s and sep are empty, SplitAfter returns an empty slice. +// // It is equivalent to SplitAfterN with a count of -1. func SplitAfter(s, sep string) []string { return genSplit(s, sep, len(sep), -1) } +var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} + // Fields splits the string s around each instance of one or more consecutive white space // characters, as defined by unicode.IsSpace, returning an array of substrings of s or an // empty list if s contains only white space. func Fields(s string) []string { - return FieldsFunc(s, unicode.IsSpace) + // First count the fields. + // This is an exact count if s is ASCII, otherwise it is an approximation. + n := 0 + wasSpace := 1 + // setBits is used to track which bits are set in the bytes of s. + setBits := uint8(0) + for i := 0; i < len(s); i++ { + r := s[i] + setBits |= r + isSpace := int(asciiSpace[r]) + n += wasSpace & ^isSpace + wasSpace = isSpace + } + + if setBits < utf8.RuneSelf { // ASCII fast path + a := make([]string, n) + na := 0 + fieldStart := 0 + i := 0 + // Skip spaces in the front of the input. + for i < len(s) && asciiSpace[s[i]] != 0 { + i++ + } + fieldStart = i + for i < len(s) { + if asciiSpace[s[i]] == 0 { + i++ + continue + } + a[na] = s[fieldStart:i] + na++ + i++ + // Skip spaces in between fields. + for i < len(s) && asciiSpace[s[i]] != 0 { + i++ + } + fieldStart = i + } + if fieldStart < len(s) { // Last field might end at EOF. + a[na] = s[fieldStart:] + } + return a + } + + // Some runes in the input string are not ASCII. + // Same general approach as in the ASCII path but + // uses DecodeRuneInString and unicode.IsSpace if + // a non-ASCII rune needs to be decoded and checked + // if it corresponds to a space. + a := make([]string, 0, n) + fieldStart := 0 + i := 0 + // Skip spaces in the front of the input. + for i < len(s) { + if c := s[i]; c < utf8.RuneSelf { + if asciiSpace[c] == 0 { + break + } + i++ + } else { + r, w := utf8.DecodeRuneInString(s[i:]) + if !unicode.IsSpace(r) { + break + } + i += w + } + } + fieldStart = i + for i < len(s) { + if c := s[i]; c < utf8.RuneSelf { + if asciiSpace[c] == 0 { + i++ + continue + } + a = append(a, s[fieldStart:i]) + i++ + } else { + r, w := utf8.DecodeRuneInString(s[i:]) + if !unicode.IsSpace(r) { + i += w + continue + } + a = append(a, s[fieldStart:i]) + i += w + } + // Skip spaces in between fields. + for i < len(s) { + if c := s[i]; c < utf8.RuneSelf { + if asciiSpace[c] == 0 { + break + } + i++ + } else { + r, w := utf8.DecodeRuneInString(s[i:]) + if !unicode.IsSpace(r) { + break + } + i += w + } + } + fieldStart = i + } + if fieldStart < len(s) { // Last field might end at EOF. + a = append(a, s[fieldStart:]) + } + return a } // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) @@ -383,40 +507,71 @@ func Map(mapping func(rune) rune, s string) string { // In the worst case, the string can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's // fine. It could also shrink but that falls out naturally. - maxbytes := len(s) // length of b - nbytes := 0 // number of bytes encoded in b + // The output buffer b is initialized on demand, the first // time a character differs. var b []byte + // nbytes is the number of bytes encoded in b. + var nbytes int for i, c := range s { r := mapping(c) - if b == nil { - if r == c { - continue - } - b = make([]byte, maxbytes) - nbytes = copy(b, s[:i]) + if r == c { + continue } + + b = make([]byte, len(s)+utf8.UTFMax) + nbytes = copy(b, s[:i]) if r >= 0 { - wid := 1 - if r >= utf8.RuneSelf { - wid = utf8.RuneLen(r) + if r <= utf8.RuneSelf { + b[nbytes] = byte(r) + nbytes++ + } else { + nbytes += utf8.EncodeRune(b[nbytes:], r) } - if nbytes+wid > maxbytes { - // Grow the buffer. - maxbytes = maxbytes*2 + utf8.UTFMax - nb := make([]byte, maxbytes) - copy(nb, b[0:nbytes]) - b = nb - } - nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) } + + if c == utf8.RuneError { + // RuneError is the result of either decoding + // an invalid sequence or '\uFFFD'. Determine + // the correct number of bytes we need to advance. + _, w := utf8.DecodeRuneInString(s[i:]) + i += w + } else { + i += utf8.RuneLen(c) + } + + s = s[i:] + break } + if b == nil { return s } - return string(b[0:nbytes]) + + for _, c := range s { + r := mapping(c) + + // common case + if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) { + b[nbytes] = byte(r) + nbytes++ + continue + } + + // b is not big enough or r is not a ASCII rune. + if r >= 0 { + if nbytes+utf8.UTFMax >= len(b) { + // Grow the buffer. + nb := make([]byte, 2*len(b)) + copy(nb, b[:nbytes]) + b = nb + } + nbytes += utf8.EncodeRune(b[nbytes:], r) + } + } + + return string(b[:nbytes]) } // Repeat returns a new string consisting of count copies of the string s. @@ -561,17 +716,10 @@ func LastIndexFunc(s string, f func(rune) bool) int { // truth==false, the sense of the predicate function is // inverted. func indexFunc(s string, f func(rune) bool, truth bool) int { - start := 0 - for start < len(s) { - wid := 1 - r := rune(s[start]) - if r >= utf8.RuneSelf { - r, wid = utf8.DecodeRuneInString(s[start:]) - } + for i, r := range s { if f(r) == truth { - return start + return i } - start += wid } return -1 } diff --git a/libgo/go/strings/strings_amd64.go b/libgo/go/strings/strings_amd64.go index e55afd53d01..9648912fd5b 100644 --- a/libgo/go/strings/strings_amd64.go +++ b/libgo/go/strings/strings_amd64.go @@ -6,44 +6,46 @@ package strings +import "internal/cpu" + //go:noescape // indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s. // indexShortStr requires 2 <= len(c) <= shortStringLen -func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s -func supportAVX2() bool // ../runtime/asm_$GOARCH.s +func indexShortStr(s, c string) int // ../runtime/asm_amd64.s +func countByte(s string, c byte) int // ../runtime/asm_amd64.s var shortStringLen int func init() { - if supportAVX2() { + if cpu.X86.HasAVX2 { shortStringLen = 63 } else { shortStringLen = 31 } } -// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. -func Index(s, sep string) int { - n := len(sep) +// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. +func Index(s, substr string) int { + n := len(substr) switch { case n == 0: return 0 case n == 1: - return IndexByte(s, sep[0]) + return IndexByte(s, substr[0]) case n == len(s): - if sep == s { + if substr == s { return 0 } return -1 case n > len(s): return -1 case n <= shortStringLen: - // Use brute force when s and sep both are small + // Use brute force when s and substr both are small if len(s) <= 64 { - return indexShortStr(s, sep) + return indexShortStr(s, substr) } - c := sep[0] + c := substr[0] i := 0 t := s[:len(s)-n+1] fails := 0 @@ -57,7 +59,7 @@ func Index(s, sep string) int { } i += o } - if s[i:i+n] == sep { + if s[i:i+n] == substr { return i } fails++ @@ -66,7 +68,7 @@ func Index(s, sep string) int { // Too many means more that 1 error per 8 characters. // Allow some errors in the beginning. if fails > (i+16)/8 { - r := indexShortStr(s[i:], sep) + r := indexShortStr(s[i:], substr) if r >= 0 { return r + i } @@ -76,12 +78,12 @@ func Index(s, sep string) int { return -1 } // Rabin-Karp search - hashsep, pow := hashStr(sep) + hashss, pow := hashStr(substr) var h uint32 for i := 0; i < n; i++ { h = h*primeRK + uint32(s[i]) } - if h == hashsep && s[:n] == sep { + if h == hashss && s[:n] == substr { return 0 } for i := n; i < len(s); { @@ -89,9 +91,18 @@ func Index(s, sep string) int { h += uint32(s[i]) h -= pow * uint32(s[i-n]) i++ - if h == hashsep && s[i-n:i] == sep { + if h == hashss && s[i-n:i] == substr { return i - n } } return -1 } + +// Count counts the number of non-overlapping instances of substr in s. +// If substr is an empty string, Count returns 1 + the number of Unicode code points in s. +func Count(s, substr string) int { + if len(substr) == 1 && cpu.X86.HasPOPCNT { + return countByte(s, byte(substr[0])) + } + return countGeneric(s, substr) +} diff --git a/libgo/go/strings/strings_generic.go b/libgo/go/strings/strings_generic.go index a3ad515444b..9844201db30 100644 --- a/libgo/go/strings/strings_generic.go +++ b/libgo/go/strings/strings_generic.go @@ -9,16 +9,16 @@ package strings // TODO: implements short string optimization on non amd64 platforms // and get rid of strings_amd64.go -// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. -func Index(s, sep string) int { - n := len(sep) +// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. +func Index(s, substr string) int { + n := len(substr) switch { case n == 0: return 0 case n == 1: - return IndexByte(s, sep[0]) + return IndexByte(s, substr[0]) case n == len(s): - if sep == s { + if substr == s { return 0 } return -1 @@ -26,12 +26,12 @@ func Index(s, sep string) int { return -1 } // Rabin-Karp search - hashsep, pow := hashStr(sep) + hashss, pow := hashStr(substr) var h uint32 for i := 0; i < n; i++ { h = h*primeRK + uint32(s[i]) } - if h == hashsep && s[:n] == sep { + if h == hashss && s[:n] == substr { return 0 } for i := n; i < len(s); { @@ -39,9 +39,15 @@ func Index(s, sep string) int { h += uint32(s[i]) h -= pow * uint32(s[i-n]) i++ - if h == hashsep && s[i-n:i] == sep { + if h == hashss && s[i-n:i] == substr { return i - n } } return -1 } + +// Count counts the number of non-overlapping instances of substr in s. +// If substr is an empty string, Count returns 1 + the number of Unicode code points in s. +func Count(s, substr string) int { + return countGeneric(s, substr) +} diff --git a/libgo/go/strings/strings_s390x.go b/libgo/go/strings/strings_s390x.go index b47702fd51a..b05fb2b025a 100644 --- a/libgo/go/strings/strings_s390x.go +++ b/libgo/go/strings/strings_s390x.go @@ -26,27 +26,27 @@ func init() { } } -// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. -func Index(s, sep string) int { - n := len(sep) +// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. +func Index(s, substr string) int { + n := len(substr) switch { case n == 0: return 0 case n == 1: - return IndexByte(s, sep[0]) + return IndexByte(s, substr[0]) case n == len(s): - if sep == s { + if substr == s { return 0 } return -1 case n > len(s): return -1 case n <= shortStringLen: - // Use brute force when s and sep both are small + // Use brute force when s and substr both are small if len(s) <= 64 { - return indexShortStr(s, sep) + return indexShortStr(s, substr) } - c := sep[0] + c := substr[0] i := 0 t := s[:len(s)-n+1] fails := 0 @@ -60,7 +60,7 @@ func Index(s, sep string) int { } i += o } - if s[i:i+n] == sep { + if s[i:i+n] == substr { return i } fails++ @@ -69,7 +69,7 @@ func Index(s, sep string) int { // Too many means more that 1 error per 8 characters. // Allow some errors in the beginning. if fails > (i+16)/8 { - r := indexShortStr(s[i:], sep) + r := indexShortStr(s[i:], substr) if r >= 0 { return r + i } @@ -79,12 +79,12 @@ func Index(s, sep string) int { return -1 } // Rabin-Karp search - hashsep, pow := hashStr(sep) + hashss, pow := hashStr(substr) var h uint32 for i := 0; i < n; i++ { h = h*primeRK + uint32(s[i]) } - if h == hashsep && s[:n] == sep { + if h == hashss && s[:n] == substr { return 0 } for i := n; i < len(s); { @@ -92,9 +92,15 @@ func Index(s, sep string) int { h += uint32(s[i]) h -= pow * uint32(s[i-n]) i++ - if h == hashsep && s[i-n:i] == sep { + if h == hashss && s[i-n:i] == substr { return i - n } } return -1 } + +// Count counts the number of non-overlapping instances of substr in s. +// If substr is an empty string, Count returns 1 + the number of Unicode code points in s. +func Count(s, substr string) int { + return countGeneric(s, substr) +} diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go index 449fb502d64..0fddaf0e4a6 100644 --- a/libgo/go/strings/strings_test.go +++ b/libgo/go/strings/strings_test.go @@ -456,6 +456,7 @@ var fieldstests = []FieldsTest{ {"", []string{}}, {" ", []string{}}, {" \t ", []string{}}, + {"\u2000", []string{}}, {" abc ", []string{"abc"}}, {"1 2 3 4", []string{"1", "2", "3", "4"}}, {"1 2 3 4", []string{"1", "2", "3", "4"}}, @@ -463,6 +464,9 @@ var fieldstests = []FieldsTest{ {"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}}, {"\u2000\u2001\u2002", []string{}}, {"\n™\t™\n", []string{"™", "™"}}, + {"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}}, + {"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}}, + {"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}}, {faces, []string{faces}}, } @@ -629,6 +633,19 @@ func TestMap(t *testing.T) { (*reflect.StringHeader)(unsafe.Pointer(&m)).Data { t.Error("unexpected copy during identity map") } + + // 7. Handle invalid UTF-8 sequence + replaceNotLatin := func(r rune) rune { + if unicode.Is(unicode.Latin, r) { + return r + } + return '?' + } + m = Map(replaceNotLatin, "Hello\255World") + expect = "Hello?World" + if m != expect { + t.Errorf("replace invalid sequence: expected %q got %q", expect, m) + } } func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } @@ -1444,6 +1461,24 @@ func BenchmarkCountTortureOverlapping(b *testing.B) { } } +func BenchmarkCountByte(b *testing.B) { + indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20} + benchStr := Repeat(benchmarkString, + (indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString)) + benchFunc := func(b *testing.B, benchStr string) { + b.SetBytes(int64(len(benchStr))) + for i := 0; i < b.N; i++ { + Count(benchStr, "=") + } + } + for _, size := range indexSizes { + b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { + benchFunc(b, benchStr[:size]) + }) + } + +} + var makeFieldsInput = func() string { x := make([]byte, 1<<20) // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space. @@ -1464,40 +1499,88 @@ var makeFieldsInput = func() string { return string(x) } -var fieldsInput = makeFieldsInput() +var makeFieldsInputASCII = func() string { + x := make([]byte, 1<<20) + // Input is ~10% space, rest ASCII non-space. + for i := range x { + if rand.Intn(10) == 0 { + x[i] = ' ' + } else { + x[i] = 'x' + } + } + return string(x) +} + +var stringdata = []struct{ name, data string }{ + {"ASCII", makeFieldsInputASCII()}, + {"Mixed", makeFieldsInput()}, +} func BenchmarkFields(b *testing.B) { - b.SetBytes(int64(len(fieldsInput))) - for i := 0; i < b.N; i++ { - Fields(fieldsInput) + for _, sd := range stringdata { + b.Run(sd.name, func(b *testing.B) { + for j := 1 << 4; j <= 1<<20; j <<= 4 { + b.Run(fmt.Sprintf("%d", j), func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(j)) + data := sd.data[:j] + for i := 0; i < b.N; i++ { + Fields(data) + } + }) + } + }) } } func BenchmarkFieldsFunc(b *testing.B) { - b.SetBytes(int64(len(fieldsInput))) - for i := 0; i < b.N; i++ { - FieldsFunc(fieldsInput, unicode.IsSpace) + for _, sd := range stringdata { + b.Run(sd.name, func(b *testing.B) { + for j := 1 << 4; j <= 1<<20; j <<= 4 { + b.Run(fmt.Sprintf("%d", j), func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(j)) + data := sd.data[:j] + for i := 0; i < b.N; i++ { + FieldsFunc(data, unicode.IsSpace) + } + }) + } + }) } } -func BenchmarkSplit1(b *testing.B) { +func BenchmarkSplitEmptySeparator(b *testing.B) { for i := 0; i < b.N; i++ { Split(benchInputHard, "") } } -func BenchmarkSplit2(b *testing.B) { +func BenchmarkSplitSingleByteSeparator(b *testing.B) { for i := 0; i < b.N; i++ { Split(benchInputHard, "/") } } -func BenchmarkSplit3(b *testing.B) { +func BenchmarkSplitMultiByteSeparator(b *testing.B) { for i := 0; i < b.N; i++ { Split(benchInputHard, "hello") } } +func BenchmarkSplitNSingleByteSeparator(b *testing.B) { + for i := 0; i < b.N; i++ { + SplitN(benchInputHard, "/", 10) + } +} + +func BenchmarkSplitNMultiByteSeparator(b *testing.B) { + for i := 0; i < b.N; i++ { + SplitN(benchInputHard, "hello", 10) + } +} + func BenchmarkRepeat(b *testing.B) { for i := 0; i < b.N; i++ { Repeat("-", 80) |