summaryrefslogtreecommitdiff
path: root/libgo/go/strings
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2017-09-14 17:11:35 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2017-09-14 17:11:35 +0000
commitbc998d034f45d1828a8663b2eed928faf22a7d01 (patch)
tree8d262a22ca7318f4bcd64269fe8fe9e45bcf8d0f /libgo/go/strings
parenta41a6142df74219f596e612d3a7775f68ca6e96f (diff)
downloadgcc-bc998d034f45d1828a8663b2eed928faf22a7d01.tar.gz
libgo: update to go1.9
Reviewed-on: https://go-review.googlesource.com/63753 From-SVN: r252767
Diffstat (limited to 'libgo/go/strings')
-rw-r--r--libgo/go/strings/example_test.go47
-rw-r--r--libgo/go/strings/replace_test.go41
-rw-r--r--libgo/go/strings/strings.go274
-rw-r--r--libgo/go/strings/strings_amd64.go43
-rw-r--r--libgo/go/strings/strings_generic.go22
-rw-r--r--libgo/go/strings/strings_s390x.go32
-rw-r--r--libgo/go/strings/strings_test.go103
7 files changed, 452 insertions, 110 deletions
diff --git a/libgo/go/strings/example_test.go b/libgo/go/strings/example_test.go
index 3f9d63b5a4a..e9621522ef2 100644
--- a/libgo/go/strings/example_test.go
+++ b/libgo/go/strings/example_test.go
@@ -23,6 +23,16 @@ func ExampleFieldsFunc() {
// Output: Fields are: ["foo1" "bar2" "baz3"]
}
+func ExampleCompare() {
+ fmt.Println(strings.Compare("a", "b"))
+ fmt.Println(strings.Compare("a", "a"))
+ fmt.Println(strings.Compare("b", "a"))
+ // Output:
+ // -1
+ // 0
+ // 1
+}
+
func ExampleContains() {
fmt.Println(strings.Contains("seafood", "foo"))
fmt.Println(strings.Contains("seafood", "bar"))
@@ -47,6 +57,16 @@ func ExampleContainsAny() {
// false
}
+func ExampleContainsRune() {
+ // Finds whether a string contains a particular Unicode code point.
+ // The code point for the lowercase letter "a", for example, is 97.
+ fmt.Println(strings.ContainsRune("aardvark", 97))
+ fmt.Println(strings.ContainsRune("timeout", 97))
+ // Output:
+ // true
+ // false
+}
+
func ExampleCount() {
fmt.Println(strings.Count("cheese", "e"))
fmt.Println(strings.Count("five", "")) // before & after each rune
@@ -109,6 +129,15 @@ func ExampleIndexAny() {
// -1
}
+func ExampleIndexByte() {
+ fmt.Println(strings.IndexByte("golang", 'g'))
+ fmt.Println(strings.IndexByte("gophers", 'h'))
+ fmt.Println(strings.IndexByte("golang", 'x'))
+ // Output:
+ // 0
+ // 3
+ // -1
+}
func ExampleIndexRune() {
fmt.Println(strings.IndexRune("chicken", 'k'))
fmt.Println(strings.IndexRune("chicken", 'd'))
@@ -127,6 +156,16 @@ func ExampleLastIndex() {
// -1
}
+func ExampleLastIndexAny() {
+ fmt.Println(strings.LastIndexAny("go gopher", "go"))
+ fmt.Println(strings.LastIndexAny("go gopher", "rodent"))
+ fmt.Println(strings.LastIndexAny("go gopher", "fail"))
+ // Output:
+ // 4
+ // 8
+ // -1
+}
+
func ExampleJoin() {
s := []string{"foo", "bar", "baz"}
fmt.Println(strings.Join(s, ", "))
@@ -195,6 +234,14 @@ func ExampleTrim() {
// Output: ["Achtung! Achtung"]
}
+func ExampleTrimFunc() {
+ f := func(c rune) bool {
+ return !unicode.IsLetter(c) && !unicode.IsNumber(c)
+ }
+ fmt.Printf("[%q]", strings.TrimFunc(" Achtung1! Achtung2,...", f))
+ // Output: ["Achtung1! Achtung2"]
+}
+
func ExampleMap() {
rot13 := func(r rune) rune {
switch {
diff --git a/libgo/go/strings/replace_test.go b/libgo/go/strings/replace_test.go
index 77e48b988bc..34b5badfadf 100644
--- a/libgo/go/strings/replace_test.go
+++ b/libgo/go/strings/replace_test.go
@@ -540,3 +540,44 @@ func BenchmarkByteByteMap(b *testing.B) {
Map(fn, str)
}
}
+
+var mapdata = []struct{ name, data string }{
+ {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
+ {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
+}
+
+func BenchmarkMap(b *testing.B) {
+ mapidentity := func(r rune) rune {
+ return r
+ }
+
+ b.Run("identity", func(b *testing.B) {
+ for _, md := range mapdata {
+ b.Run(md.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Map(mapidentity, md.data)
+ }
+ })
+ }
+ })
+
+ mapchange := func(r rune) rune {
+ if 'a' <= r && r <= 'z' {
+ return r + 'A' - 'a'
+ }
+ if 'α' <= r && r <= 'ω' {
+ return r + 'Α' - 'α'
+ }
+ return r
+ }
+
+ b.Run("change", func(b *testing.B) {
+ for _, md := range mapdata {
+ b.Run(md.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Map(mapchange, md.data)
+ }
+ })
+ }
+ })
+}
diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go
index 60a281a6ac5..0c836c09d46 100644
--- a/libgo/go/strings/strings.go
+++ b/libgo/go/strings/strings.go
@@ -72,22 +72,20 @@ func hashStrRev(sep string) (uint32, uint32) {
return hash, pow
}
-// Count counts the number of non-overlapping instances of sep in s.
-// If sep is an empty string, Count returns 1 + the number of Unicode code points in s.
-func Count(s, sep string) int {
- n := 0
- // special cases
- if len(sep) == 0 {
+// countGeneric implements Count.
+func countGeneric(s, substr string) int {
+ // special case
+ if len(substr) == 0 {
return utf8.RuneCountInString(s) + 1
}
- offset := 0
+ n := 0
for {
- i := Index(s[offset:], sep)
+ i := Index(s, substr)
if i == -1 {
return n
}
n++
- offset += i + len(sep)
+ s = s[i+len(substr):]
}
}
@@ -106,16 +104,16 @@ func ContainsRune(s string, r rune) bool {
return IndexRune(s, r) >= 0
}
-// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
-func LastIndex(s, sep string) int {
- n := len(sep)
+// LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s.
+func LastIndex(s, substr string) int {
+ n := len(substr)
switch {
case n == 0:
return len(s)
case n == 1:
- return LastIndexByte(s, sep[0])
+ return LastIndexByte(s, substr[0])
case n == len(s):
- if sep == s {
+ if substr == s {
return 0
}
return -1
@@ -123,20 +121,20 @@ func LastIndex(s, sep string) int {
return -1
}
// Rabin-Karp search from the end of the string
- hashsep, pow := hashStrRev(sep)
+ hashss, pow := hashStrRev(substr)
last := len(s) - n
var h uint32
for i := len(s) - 1; i >= last; i-- {
h = h*primeRK + uint32(s[i])
}
- if h == hashsep && s[last:] == sep {
+ if h == hashss && s[last:] == substr {
return last
}
for i := last - 1; i >= 0; i-- {
h *= primeRK
h += uint32(s[i])
h -= pow * uint32(s[i+n])
- if h == hashsep && s[i:i+n] == sep {
+ if h == hashss && s[i:i+n] == substr {
return i
}
}
@@ -240,61 +238,187 @@ func genSplit(s, sep string, sepSave, n int) []string {
if n < 0 {
n = Count(s, sep) + 1
}
- c := sep[0]
- start := 0
+
a := make([]string, n)
- na := 0
- for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
- if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
- a[na] = s[start : i+sepSave]
- na++
- start = i + len(sep)
- i += len(sep) - 1
- }
+ n--
+ i := 0
+ for i < n {
+ m := Index(s, sep)
+ if m < 0 {
+ break
+ }
+ a[i] = s[:m+sepSave]
+ s = s[m+len(sep):]
+ i++
}
- a[na] = s[start:]
- return a[0 : na+1]
+ a[i] = s
+ return a[:i+1]
}
// SplitN slices s into substrings separated by sep and returns a slice of
// the substrings between those separators.
-// If sep is empty, SplitN splits after each UTF-8 sequence.
+//
// The count determines the number of substrings to return:
// n > 0: at most n substrings; the last substring will be the unsplit remainder.
// n == 0: the result is nil (zero substrings)
// n < 0: all substrings
+//
+// Edge cases for s and sep (for example, empty strings) are handled
+// as described in the documentation for Split.
func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
// SplitAfterN slices s into substrings after each instance of sep and
// returns a slice of those substrings.
-// If sep is empty, SplitAfterN splits after each UTF-8 sequence.
+//
// The count determines the number of substrings to return:
// n > 0: at most n substrings; the last substring will be the unsplit remainder.
// n == 0: the result is nil (zero substrings)
// n < 0: all substrings
+//
+// Edge cases for s and sep (for example, empty strings) are handled
+// as described in the documentation for SplitAfter.
func SplitAfterN(s, sep string, n int) []string {
return genSplit(s, sep, len(sep), n)
}
// Split slices s into all substrings separated by sep and returns a slice of
// the substrings between those separators.
-// If sep is empty, Split splits after each UTF-8 sequence.
+//
+// If s does not contain sep and sep is not empty, Split returns a
+// slice of length 1 whose only element is s.
+//
+// If sep is empty, Split splits after each UTF-8 sequence. If both s
+// and sep are empty, Split returns an empty slice.
+//
// It is equivalent to SplitN with a count of -1.
func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
// SplitAfter slices s into all substrings after each instance of sep and
// returns a slice of those substrings.
-// If sep is empty, SplitAfter splits after each UTF-8 sequence.
+//
+// If s does not contain sep and sep is not empty, SplitAfter returns
+// a slice of length 1 whose only element is s.
+//
+// If sep is empty, SplitAfter splits after each UTF-8 sequence. If
+// both s and sep are empty, SplitAfter returns an empty slice.
+//
// It is equivalent to SplitAfterN with a count of -1.
func SplitAfter(s, sep string) []string {
return genSplit(s, sep, len(sep), -1)
}
+var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
+
// Fields splits the string s around each instance of one or more consecutive white space
// characters, as defined by unicode.IsSpace, returning an array of substrings of s or an
// empty list if s contains only white space.
func Fields(s string) []string {
- return FieldsFunc(s, unicode.IsSpace)
+ // First count the fields.
+ // This is an exact count if s is ASCII, otherwise it is an approximation.
+ n := 0
+ wasSpace := 1
+ // setBits is used to track which bits are set in the bytes of s.
+ setBits := uint8(0)
+ for i := 0; i < len(s); i++ {
+ r := s[i]
+ setBits |= r
+ isSpace := int(asciiSpace[r])
+ n += wasSpace & ^isSpace
+ wasSpace = isSpace
+ }
+
+ if setBits < utf8.RuneSelf { // ASCII fast path
+ a := make([]string, n)
+ na := 0
+ fieldStart := 0
+ i := 0
+ // Skip spaces in the front of the input.
+ for i < len(s) && asciiSpace[s[i]] != 0 {
+ i++
+ }
+ fieldStart = i
+ for i < len(s) {
+ if asciiSpace[s[i]] == 0 {
+ i++
+ continue
+ }
+ a[na] = s[fieldStart:i]
+ na++
+ i++
+ // Skip spaces in between fields.
+ for i < len(s) && asciiSpace[s[i]] != 0 {
+ i++
+ }
+ fieldStart = i
+ }
+ if fieldStart < len(s) { // Last field might end at EOF.
+ a[na] = s[fieldStart:]
+ }
+ return a
+ }
+
+ // Some runes in the input string are not ASCII.
+ // Same general approach as in the ASCII path but
+ // uses DecodeRuneInString and unicode.IsSpace if
+ // a non-ASCII rune needs to be decoded and checked
+ // if it corresponds to a space.
+ a := make([]string, 0, n)
+ fieldStart := 0
+ i := 0
+ // Skip spaces in the front of the input.
+ for i < len(s) {
+ if c := s[i]; c < utf8.RuneSelf {
+ if asciiSpace[c] == 0 {
+ break
+ }
+ i++
+ } else {
+ r, w := utf8.DecodeRuneInString(s[i:])
+ if !unicode.IsSpace(r) {
+ break
+ }
+ i += w
+ }
+ }
+ fieldStart = i
+ for i < len(s) {
+ if c := s[i]; c < utf8.RuneSelf {
+ if asciiSpace[c] == 0 {
+ i++
+ continue
+ }
+ a = append(a, s[fieldStart:i])
+ i++
+ } else {
+ r, w := utf8.DecodeRuneInString(s[i:])
+ if !unicode.IsSpace(r) {
+ i += w
+ continue
+ }
+ a = append(a, s[fieldStart:i])
+ i += w
+ }
+ // Skip spaces in between fields.
+ for i < len(s) {
+ if c := s[i]; c < utf8.RuneSelf {
+ if asciiSpace[c] == 0 {
+ break
+ }
+ i++
+ } else {
+ r, w := utf8.DecodeRuneInString(s[i:])
+ if !unicode.IsSpace(r) {
+ break
+ }
+ i += w
+ }
+ }
+ fieldStart = i
+ }
+ if fieldStart < len(s) { // Last field might end at EOF.
+ a = append(a, s[fieldStart:])
+ }
+ return a
}
// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
@@ -383,40 +507,71 @@ func Map(mapping func(rune) rune, s string) string {
// In the worst case, the string can grow when mapped, making
// things unpleasant. But it's so rare we barge in assuming it's
// fine. It could also shrink but that falls out naturally.
- maxbytes := len(s) // length of b
- nbytes := 0 // number of bytes encoded in b
+
// The output buffer b is initialized on demand, the first
// time a character differs.
var b []byte
+ // nbytes is the number of bytes encoded in b.
+ var nbytes int
for i, c := range s {
r := mapping(c)
- if b == nil {
- if r == c {
- continue
- }
- b = make([]byte, maxbytes)
- nbytes = copy(b, s[:i])
+ if r == c {
+ continue
}
+
+ b = make([]byte, len(s)+utf8.UTFMax)
+ nbytes = copy(b, s[:i])
if r >= 0 {
- wid := 1
- if r >= utf8.RuneSelf {
- wid = utf8.RuneLen(r)
+ if r <= utf8.RuneSelf {
+ b[nbytes] = byte(r)
+ nbytes++
+ } else {
+ nbytes += utf8.EncodeRune(b[nbytes:], r)
}
- if nbytes+wid > maxbytes {
- // Grow the buffer.
- maxbytes = maxbytes*2 + utf8.UTFMax
- nb := make([]byte, maxbytes)
- copy(nb, b[0:nbytes])
- b = nb
- }
- nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
}
+
+ if c == utf8.RuneError {
+ // RuneError is the result of either decoding
+ // an invalid sequence or '\uFFFD'. Determine
+ // the correct number of bytes we need to advance.
+ _, w := utf8.DecodeRuneInString(s[i:])
+ i += w
+ } else {
+ i += utf8.RuneLen(c)
+ }
+
+ s = s[i:]
+ break
}
+
if b == nil {
return s
}
- return string(b[0:nbytes])
+
+ for _, c := range s {
+ r := mapping(c)
+
+ // common case
+ if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) {
+ b[nbytes] = byte(r)
+ nbytes++
+ continue
+ }
+
+ // b is not big enough or r is not a ASCII rune.
+ if r >= 0 {
+ if nbytes+utf8.UTFMax >= len(b) {
+ // Grow the buffer.
+ nb := make([]byte, 2*len(b))
+ copy(nb, b[:nbytes])
+ b = nb
+ }
+ nbytes += utf8.EncodeRune(b[nbytes:], r)
+ }
+ }
+
+ return string(b[:nbytes])
}
// Repeat returns a new string consisting of count copies of the string s.
@@ -561,17 +716,10 @@ func LastIndexFunc(s string, f func(rune) bool) int {
// truth==false, the sense of the predicate function is
// inverted.
func indexFunc(s string, f func(rune) bool, truth bool) int {
- start := 0
- for start < len(s) {
- wid := 1
- r := rune(s[start])
- if r >= utf8.RuneSelf {
- r, wid = utf8.DecodeRuneInString(s[start:])
- }
+ for i, r := range s {
if f(r) == truth {
- return start
+ return i
}
- start += wid
}
return -1
}
diff --git a/libgo/go/strings/strings_amd64.go b/libgo/go/strings/strings_amd64.go
index e55afd53d01..9648912fd5b 100644
--- a/libgo/go/strings/strings_amd64.go
+++ b/libgo/go/strings/strings_amd64.go
@@ -6,44 +6,46 @@
package strings
+import "internal/cpu"
+
//go:noescape
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
// indexShortStr requires 2 <= len(c) <= shortStringLen
-func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s
-func supportAVX2() bool // ../runtime/asm_$GOARCH.s
+func indexShortStr(s, c string) int // ../runtime/asm_amd64.s
+func countByte(s string, c byte) int // ../runtime/asm_amd64.s
var shortStringLen int
func init() {
- if supportAVX2() {
+ if cpu.X86.HasAVX2 {
shortStringLen = 63
} else {
shortStringLen = 31
}
}
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep string) int {
- n := len(sep)
+// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
+func Index(s, substr string) int {
+ n := len(substr)
switch {
case n == 0:
return 0
case n == 1:
- return IndexByte(s, sep[0])
+ return IndexByte(s, substr[0])
case n == len(s):
- if sep == s {
+ if substr == s {
return 0
}
return -1
case n > len(s):
return -1
case n <= shortStringLen:
- // Use brute force when s and sep both are small
+ // Use brute force when s and substr both are small
if len(s) <= 64 {
- return indexShortStr(s, sep)
+ return indexShortStr(s, substr)
}
- c := sep[0]
+ c := substr[0]
i := 0
t := s[:len(s)-n+1]
fails := 0
@@ -57,7 +59,7 @@ func Index(s, sep string) int {
}
i += o
}
- if s[i:i+n] == sep {
+ if s[i:i+n] == substr {
return i
}
fails++
@@ -66,7 +68,7 @@ func Index(s, sep string) int {
// Too many means more that 1 error per 8 characters.
// Allow some errors in the beginning.
if fails > (i+16)/8 {
- r := indexShortStr(s[i:], sep)
+ r := indexShortStr(s[i:], substr)
if r >= 0 {
return r + i
}
@@ -76,12 +78,12 @@ func Index(s, sep string) int {
return -1
}
// Rabin-Karp search
- hashsep, pow := hashStr(sep)
+ hashss, pow := hashStr(substr)
var h uint32
for i := 0; i < n; i++ {
h = h*primeRK + uint32(s[i])
}
- if h == hashsep && s[:n] == sep {
+ if h == hashss && s[:n] == substr {
return 0
}
for i := n; i < len(s); {
@@ -89,9 +91,18 @@ func Index(s, sep string) int {
h += uint32(s[i])
h -= pow * uint32(s[i-n])
i++
- if h == hashsep && s[i-n:i] == sep {
+ if h == hashss && s[i-n:i] == substr {
return i - n
}
}
return -1
}
+
+// Count counts the number of non-overlapping instances of substr in s.
+// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
+func Count(s, substr string) int {
+ if len(substr) == 1 && cpu.X86.HasPOPCNT {
+ return countByte(s, byte(substr[0]))
+ }
+ return countGeneric(s, substr)
+}
diff --git a/libgo/go/strings/strings_generic.go b/libgo/go/strings/strings_generic.go
index a3ad515444b..9844201db30 100644
--- a/libgo/go/strings/strings_generic.go
+++ b/libgo/go/strings/strings_generic.go
@@ -9,16 +9,16 @@ package strings
// TODO: implements short string optimization on non amd64 platforms
// and get rid of strings_amd64.go
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep string) int {
- n := len(sep)
+// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
+func Index(s, substr string) int {
+ n := len(substr)
switch {
case n == 0:
return 0
case n == 1:
- return IndexByte(s, sep[0])
+ return IndexByte(s, substr[0])
case n == len(s):
- if sep == s {
+ if substr == s {
return 0
}
return -1
@@ -26,12 +26,12 @@ func Index(s, sep string) int {
return -1
}
// Rabin-Karp search
- hashsep, pow := hashStr(sep)
+ hashss, pow := hashStr(substr)
var h uint32
for i := 0; i < n; i++ {
h = h*primeRK + uint32(s[i])
}
- if h == hashsep && s[:n] == sep {
+ if h == hashss && s[:n] == substr {
return 0
}
for i := n; i < len(s); {
@@ -39,9 +39,15 @@ func Index(s, sep string) int {
h += uint32(s[i])
h -= pow * uint32(s[i-n])
i++
- if h == hashsep && s[i-n:i] == sep {
+ if h == hashss && s[i-n:i] == substr {
return i - n
}
}
return -1
}
+
+// Count counts the number of non-overlapping instances of substr in s.
+// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
+func Count(s, substr string) int {
+ return countGeneric(s, substr)
+}
diff --git a/libgo/go/strings/strings_s390x.go b/libgo/go/strings/strings_s390x.go
index b47702fd51a..b05fb2b025a 100644
--- a/libgo/go/strings/strings_s390x.go
+++ b/libgo/go/strings/strings_s390x.go
@@ -26,27 +26,27 @@ func init() {
}
}
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep string) int {
- n := len(sep)
+// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
+func Index(s, substr string) int {
+ n := len(substr)
switch {
case n == 0:
return 0
case n == 1:
- return IndexByte(s, sep[0])
+ return IndexByte(s, substr[0])
case n == len(s):
- if sep == s {
+ if substr == s {
return 0
}
return -1
case n > len(s):
return -1
case n <= shortStringLen:
- // Use brute force when s and sep both are small
+ // Use brute force when s and substr both are small
if len(s) <= 64 {
- return indexShortStr(s, sep)
+ return indexShortStr(s, substr)
}
- c := sep[0]
+ c := substr[0]
i := 0
t := s[:len(s)-n+1]
fails := 0
@@ -60,7 +60,7 @@ func Index(s, sep string) int {
}
i += o
}
- if s[i:i+n] == sep {
+ if s[i:i+n] == substr {
return i
}
fails++
@@ -69,7 +69,7 @@ func Index(s, sep string) int {
// Too many means more that 1 error per 8 characters.
// Allow some errors in the beginning.
if fails > (i+16)/8 {
- r := indexShortStr(s[i:], sep)
+ r := indexShortStr(s[i:], substr)
if r >= 0 {
return r + i
}
@@ -79,12 +79,12 @@ func Index(s, sep string) int {
return -1
}
// Rabin-Karp search
- hashsep, pow := hashStr(sep)
+ hashss, pow := hashStr(substr)
var h uint32
for i := 0; i < n; i++ {
h = h*primeRK + uint32(s[i])
}
- if h == hashsep && s[:n] == sep {
+ if h == hashss && s[:n] == substr {
return 0
}
for i := n; i < len(s); {
@@ -92,9 +92,15 @@ func Index(s, sep string) int {
h += uint32(s[i])
h -= pow * uint32(s[i-n])
i++
- if h == hashsep && s[i-n:i] == sep {
+ if h == hashss && s[i-n:i] == substr {
return i - n
}
}
return -1
}
+
+// Count counts the number of non-overlapping instances of substr in s.
+// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
+func Count(s, substr string) int {
+ return countGeneric(s, substr)
+}
diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go
index 449fb502d64..0fddaf0e4a6 100644
--- a/libgo/go/strings/strings_test.go
+++ b/libgo/go/strings/strings_test.go
@@ -456,6 +456,7 @@ var fieldstests = []FieldsTest{
{"", []string{}},
{" ", []string{}},
{" \t ", []string{}},
+ {"\u2000", []string{}},
{" abc ", []string{"abc"}},
{"1 2 3 4", []string{"1", "2", "3", "4"}},
{"1 2 3 4", []string{"1", "2", "3", "4"}},
@@ -463,6 +464,9 @@ var fieldstests = []FieldsTest{
{"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
{"\u2000\u2001\u2002", []string{}},
{"\n™\t™\n", []string{"™", "™"}},
+ {"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}},
+ {"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}},
+ {"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}},
{faces, []string{faces}},
}
@@ -629,6 +633,19 @@ func TestMap(t *testing.T) {
(*reflect.StringHeader)(unsafe.Pointer(&m)).Data {
t.Error("unexpected copy during identity map")
}
+
+ // 7. Handle invalid UTF-8 sequence
+ replaceNotLatin := func(r rune) rune {
+ if unicode.Is(unicode.Latin, r) {
+ return r
+ }
+ return '?'
+ }
+ m = Map(replaceNotLatin, "Hello\255World")
+ expect = "Hello?World"
+ if m != expect {
+ t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
+ }
}
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
@@ -1444,6 +1461,24 @@ func BenchmarkCountTortureOverlapping(b *testing.B) {
}
}
+func BenchmarkCountByte(b *testing.B) {
+ indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
+ benchStr := Repeat(benchmarkString,
+ (indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString))
+ benchFunc := func(b *testing.B, benchStr string) {
+ b.SetBytes(int64(len(benchStr)))
+ for i := 0; i < b.N; i++ {
+ Count(benchStr, "=")
+ }
+ }
+ for _, size := range indexSizes {
+ b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
+ benchFunc(b, benchStr[:size])
+ })
+ }
+
+}
+
var makeFieldsInput = func() string {
x := make([]byte, 1<<20)
// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
@@ -1464,40 +1499,88 @@ var makeFieldsInput = func() string {
return string(x)
}
-var fieldsInput = makeFieldsInput()
+var makeFieldsInputASCII = func() string {
+ x := make([]byte, 1<<20)
+ // Input is ~10% space, rest ASCII non-space.
+ for i := range x {
+ if rand.Intn(10) == 0 {
+ x[i] = ' '
+ } else {
+ x[i] = 'x'
+ }
+ }
+ return string(x)
+}
+
+var stringdata = []struct{ name, data string }{
+ {"ASCII", makeFieldsInputASCII()},
+ {"Mixed", makeFieldsInput()},
+}
func BenchmarkFields(b *testing.B) {
- b.SetBytes(int64(len(fieldsInput)))
- for i := 0; i < b.N; i++ {
- Fields(fieldsInput)
+ for _, sd := range stringdata {
+ b.Run(sd.name, func(b *testing.B) {
+ for j := 1 << 4; j <= 1<<20; j <<= 4 {
+ b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
+ b.ReportAllocs()
+ b.SetBytes(int64(j))
+ data := sd.data[:j]
+ for i := 0; i < b.N; i++ {
+ Fields(data)
+ }
+ })
+ }
+ })
}
}
func BenchmarkFieldsFunc(b *testing.B) {
- b.SetBytes(int64(len(fieldsInput)))
- for i := 0; i < b.N; i++ {
- FieldsFunc(fieldsInput, unicode.IsSpace)
+ for _, sd := range stringdata {
+ b.Run(sd.name, func(b *testing.B) {
+ for j := 1 << 4; j <= 1<<20; j <<= 4 {
+ b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
+ b.ReportAllocs()
+ b.SetBytes(int64(j))
+ data := sd.data[:j]
+ for i := 0; i < b.N; i++ {
+ FieldsFunc(data, unicode.IsSpace)
+ }
+ })
+ }
+ })
}
}
-func BenchmarkSplit1(b *testing.B) {
+func BenchmarkSplitEmptySeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "")
}
}
-func BenchmarkSplit2(b *testing.B) {
+func BenchmarkSplitSingleByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "/")
}
}
-func BenchmarkSplit3(b *testing.B) {
+func BenchmarkSplitMultiByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "hello")
}
}
+func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ SplitN(benchInputHard, "/", 10)
+ }
+}
+
+func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ SplitN(benchInputHard, "hello", 10)
+ }
+}
+
func BenchmarkRepeat(b *testing.B) {
for i := 0; i < b.N; i++ {
Repeat("-", 80)