From fbc7e9ddbe572fe493d0c88f3e42f9bb74ffea02 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 25 Oct 2011 22:22:09 -0700 Subject: bytes, strings: use rune Various rune-based APIs change. R=golang-dev, r CC=golang-dev http://codereview.appspot.com/5306044 --- src/pkg/strings/reader.go | 6 +-- src/pkg/strings/replace_test.go | 6 +-- src/pkg/strings/strings.go | 95 +++++++++++++++++++++-------------------- src/pkg/strings/strings_test.go | 76 ++++++++++++++++----------------- 4 files changed, 92 insertions(+), 91 deletions(-) (limited to 'src/pkg/strings') diff --git a/src/pkg/strings/reader.go b/src/pkg/strings/reader.go index eb515de00..f4385a437 100644 --- a/src/pkg/strings/reader.go +++ b/src/pkg/strings/reader.go @@ -60,16 +60,16 @@ func (r *Reader) UnreadByte() os.Error { // If no bytes are available, the error returned is os.EOF. // If the bytes are an erroneous UTF-8 encoding, it // consumes one byte and returns U+FFFD, 1. -func (r *Reader) ReadRune() (rune int, size int, err os.Error) { +func (r *Reader) ReadRune() (ch rune, size int, err os.Error) { if r.i >= len(r.s) { return 0, 0, os.EOF } r.prevRune = r.i if c := r.s[r.i]; c < utf8.RuneSelf { r.i++ - return int(c), 1, nil + return rune(c), 1, nil } - rune, size = utf8.DecodeRuneInString(r.s[r.i:]) + ch, size = utf8.DecodeRuneInString(r.s[r.i:]) r.i += size return } diff --git a/src/pkg/strings/replace_test.go b/src/pkg/strings/replace_test.go index e337856c6..23c7e2e53 100644 --- a/src/pkg/strings/replace_test.go +++ b/src/pkg/strings/replace_test.go @@ -159,12 +159,12 @@ func BenchmarkByteByteReplaces(b *testing.B) { // BenchmarkByteByteMap compares byteByteImpl against Map. func BenchmarkByteByteMap(b *testing.B) { str := Repeat("a", 100) + Repeat("b", 100) - fn := func(r int) int { + fn := func(r rune) rune { switch r { case 'a': - return int('A') + return 'A' case 'b': - return int('B') + return 'B' } return r } diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go index 58301febd..4f6e8a6fe 100644 --- a/src/pkg/strings/strings.go +++ b/src/pkg/strings/strings.go @@ -21,11 +21,12 @@ func explode(s string, n int) []string { n = l } a := make([]string, n) - var size, rune int + var size int + var ch rune i, cur := 0, 0 for ; i+1 < n; i++ { - rune, size = utf8.DecodeRuneInString(s[cur:]) - a[i] = string(rune) + ch, size = utf8.DecodeRuneInString(s[cur:]) + a[i] = string(ch) cur += size } // add the rest, if there is any @@ -117,11 +118,11 @@ func LastIndex(s, sep string) int { } // IndexRune returns the index of the first instance of the Unicode code point -// rune, or -1 if rune is not present in s. -func IndexRune(s string, rune int) int { +// r, or -1 if rune is not present in s. +func IndexRune(s string, r rune) int { switch { - case rune < 0x80: - b := byte(rune) + case r < 0x80: + b := byte(r) for i := 0; i < len(s); i++ { if s[i] == b { return i @@ -129,7 +130,7 @@ func IndexRune(s string, rune int) int { } default: for i, c := range s { - if c == rune { + if c == r { return i } } @@ -241,7 +242,7 @@ func Fields(s string) []string { // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) // and returns an array of slices of s. If all code points in s satisfy f(c) or the // string is empty, an empty slice is returned. -func FieldsFunc(s string, f func(int) bool) []string { +func FieldsFunc(s string, f func(rune) bool) []string { // First count the fields. n := 0 inField := false @@ -310,7 +311,7 @@ func HasSuffix(s, suffix string) bool { // Map returns a copy of the string s with all its characters modified // according to the mapping function. If mapping returns a negative value, the character is // dropped from the string with no replacement. -func Map(mapping func(rune int) int, s string) string { +func Map(mapping func(rune) rune, s string) string { // In the worst case, the string can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's // fine. It could also shrink but that falls out naturally. @@ -321,18 +322,18 @@ func Map(mapping func(rune int) int, s string) string { var b []byte for i, c := range s { - rune := mapping(c) + r := mapping(c) if b == nil { - if rune == c { + if r == c { continue } b = make([]byte, maxbytes) nbytes = copy(b, s[:i]) } - if rune >= 0 { + if r >= 0 { wid := 1 - if rune >= utf8.RuneSelf { - wid = utf8.RuneLen(rune) + if r >= utf8.RuneSelf { + wid = utf8.RuneLen(r) } if nbytes+wid > maxbytes { // Grow the buffer. @@ -341,7 +342,7 @@ func Map(mapping func(rune int) int, s string) string { copy(nb, b[0:nbytes]) b = nb } - nbytes += utf8.EncodeRune(b[nbytes:maxbytes], rune) + nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) } } if b == nil { @@ -375,44 +376,44 @@ func ToTitle(s string) string { return Map(unicode.ToTitle, s) } // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their // upper case, giving priority to the special casing rules. func ToUpperSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r int) int { return _case.ToUpper(r) }, s) + return Map(func(r rune) rune { return _case.ToUpper(r) }, s) } // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their // lower case, giving priority to the special casing rules. func ToLowerSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r int) int { return _case.ToLower(r) }, s) + return Map(func(r rune) rune { return _case.ToLower(r) }, s) } // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their // title case, giving priority to the special casing rules. func ToTitleSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r int) int { return _case.ToTitle(r) }, s) + return Map(func(r rune) rune { return _case.ToTitle(r) }, s) } // isSeparator reports whether the rune could mark a word boundary. // TODO: update when package unicode captures more of the properties. -func isSeparator(rune int) bool { +func isSeparator(r rune) bool { // ASCII alphanumerics and underscore are not separators - if rune <= 0x7F { + if r <= 0x7F { switch { - case '0' <= rune && rune <= '9': + case '0' <= r && r <= '9': return false - case 'a' <= rune && rune <= 'z': + case 'a' <= r && r <= 'z': return false - case 'A' <= rune && rune <= 'Z': + case 'A' <= r && r <= 'Z': return false - case rune == '_': + case r == '_': return false } return true } // Letters and digits are not separators - if unicode.IsLetter(rune) || unicode.IsDigit(rune) { + if unicode.IsLetter(r) || unicode.IsDigit(r) { return false } // Otherwise, all we can do for now is treat spaces as separators. - return unicode.IsSpace(rune) + return unicode.IsSpace(r) } // BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly. @@ -423,9 +424,9 @@ func Title(s string) string { // Use a closure here to remember state. // Hackish but effective. Depends on Map scanning in order and calling // the closure once per rune. - prev := ' ' + prev := rune(' ') return Map( - func(r int) int { + func(r rune) rune { if isSeparator(prev) { prev = r return unicode.ToTitle(r) @@ -438,7 +439,7 @@ func Title(s string) string { // TrimLeftFunc returns a slice of the string s with all leading // Unicode code points c satisfying f(c) removed. -func TrimLeftFunc(s string, f func(r int) bool) string { +func TrimLeftFunc(s string, f func(rune) bool) string { i := indexFunc(s, f, false) if i == -1 { return "" @@ -448,7 +449,7 @@ func TrimLeftFunc(s string, f func(r int) bool) string { // TrimRightFunc returns a slice of the string s with all trailing // Unicode code points c satisfying f(c) removed. -func TrimRightFunc(s string, f func(r int) bool) string { +func TrimRightFunc(s string, f func(rune) bool) string { i := lastIndexFunc(s, f, false) if i >= 0 && s[i] >= utf8.RuneSelf { _, wid := utf8.DecodeRuneInString(s[i:]) @@ -461,34 +462,34 @@ func TrimRightFunc(s string, f func(r int) bool) string { // TrimFunc returns a slice of the string s with all leading // and trailing Unicode code points c satisfying f(c) removed. -func TrimFunc(s string, f func(r int) bool) string { +func TrimFunc(s string, f func(rune) bool) string { return TrimRightFunc(TrimLeftFunc(s, f), f) } // IndexFunc returns the index into s of the first Unicode // code point satisfying f(c), or -1 if none do. -func IndexFunc(s string, f func(r int) bool) int { +func IndexFunc(s string, f func(rune) bool) int { return indexFunc(s, f, true) } // LastIndexFunc returns the index into s of the last // Unicode code point satisfying f(c), or -1 if none do. -func LastIndexFunc(s string, f func(r int) bool) int { +func LastIndexFunc(s string, f func(rune) bool) int { return lastIndexFunc(s, f, true) } // indexFunc is the same as IndexFunc except that if // truth==false, the sense of the predicate function is // inverted. -func indexFunc(s string, f func(r int) bool, truth bool) int { +func indexFunc(s string, f func(rune) bool, truth bool) int { start := 0 for start < len(s) { wid := 1 - rune := int(s[start]) - if rune >= utf8.RuneSelf { - rune, wid = utf8.DecodeRuneInString(s[start:]) + r := rune(s[start]) + if r >= utf8.RuneSelf { + r, wid = utf8.DecodeRuneInString(s[start:]) } - if f(rune) == truth { + if f(r) == truth { return start } start += wid @@ -499,19 +500,19 @@ func indexFunc(s string, f func(r int) bool, truth bool) int { // lastIndexFunc is the same as LastIndexFunc except that if // truth==false, the sense of the predicate function is // inverted. -func lastIndexFunc(s string, f func(r int) bool, truth bool) int { +func lastIndexFunc(s string, f func(rune) bool, truth bool) int { for i := len(s); i > 0; { - rune, size := utf8.DecodeLastRuneInString(s[0:i]) + r, size := utf8.DecodeLastRuneInString(s[0:i]) i -= size - if f(rune) == truth { + if f(r) == truth { return i } } return -1 } -func makeCutsetFunc(cutset string) func(rune int) bool { - return func(rune int) bool { return IndexRune(cutset, rune) != -1 } +func makeCutsetFunc(cutset string) func(rune) bool { + return func(r rune) bool { return IndexRune(cutset, r) != -1 } } // Trim returns a slice of the string s with all leading and @@ -589,15 +590,15 @@ func Replace(s, old, new string, n int) string { func EqualFold(s, t string) bool { for s != "" && t != "" { // Extract first rune from each string. - var sr, tr int + var sr, tr rune if s[0] < utf8.RuneSelf { - sr, s = int(s[0]), s[1:] + sr, s = rune(s[0]), s[1:] } else { r, size := utf8.DecodeRuneInString(s) sr, s = r, s[size:] } if t[0] < utf8.RuneSelf { - tr, t = int(t[0]), t[1:] + tr, t = rune(t[0]), t[1:] } else { r, size := utf8.DecodeRuneInString(t) tr, t = r, t[size:] diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go index 0859ddd96..4132996c1 100644 --- a/src/pkg/strings/strings_test.go +++ b/src/pkg/strings/strings_test.go @@ -122,7 +122,7 @@ func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexA var indexRuneTests = []struct { s string - rune int + rune rune out int }{ {"a A x", 'A', 2}, @@ -312,7 +312,7 @@ var FieldsFuncTests = []FieldsTest{ } func TestFieldsFunc(t *testing.T) { - pred := func(c int) bool { return c == 'X' } + pred := func(c rune) bool { return c == 'X' } for _, tt := range FieldsFuncTests { a := FieldsFunc(tt.s, pred) if !eq(a, tt.a) { @@ -374,31 +374,31 @@ var trimSpaceTests = []StringTest{ {"x ☺ ", "x ☺"}, } -func tenRunes(rune int) string { - r := make([]int, 10) +func tenRunes(ch rune) string { + r := make([]rune, 10) for i := range r { - r[i] = rune + r[i] = ch } return string(r) } // User-defined self-inverse mapping function -func rot13(rune int) int { - step := 13 - if rune >= 'a' && rune <= 'z' { - return ((rune - 'a' + step) % 26) + 'a' +func rot13(r rune) rune { + step := rune(13) + if r >= 'a' && r <= 'z' { + return ((r - 'a' + step) % 26) + 'a' } - if rune >= 'A' && rune <= 'Z' { - return ((rune - 'A' + step) % 26) + 'A' + if r >= 'A' && r <= 'Z' { + return ((r - 'A' + step) % 26) + 'A' } - return rune + return r } func TestMap(t *testing.T) { // Run a couple of awful growth/shrinkage tests a := tenRunes('a') // 1. Grow. This triggers two reallocations in Map. - maxRune := func(rune int) int { return unicode.MaxRune } + maxRune := func(rune) rune { return unicode.MaxRune } m := Map(maxRune, a) expect := tenRunes(unicode.MaxRune) if m != expect { @@ -406,7 +406,7 @@ func TestMap(t *testing.T) { } // 2. Shrink - minRune := func(rune int) int { return 'a' } + minRune := func(rune) rune { return 'a' } m = Map(minRune, tenRunes(unicode.MaxRune)) expect = a if m != expect { @@ -428,9 +428,9 @@ func TestMap(t *testing.T) { } // 5. Drop - dropNotLatin := func(rune int) int { - if unicode.Is(unicode.Latin, rune) { - return rune + dropNotLatin := func(r rune) rune { + if unicode.Is(unicode.Latin, r) { + return r } return -1 } @@ -441,8 +441,8 @@ func TestMap(t *testing.T) { } // 6. Identity - identity := func(rune int) int { - return rune + identity := func(r rune) rune { + return r } orig := "Input string that we expect not to be copied." m = Map(identity, orig) @@ -457,8 +457,8 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) } func BenchmarkMapNoChanges(b *testing.B) { - identity := func(rune int) int { - return rune + identity := func(r rune) rune { + return r } for i := 0; i < b.N; i++ { Map(identity, "Some string that won't be modified.") @@ -536,7 +536,7 @@ func TestTrim(t *testing.T) { } type predicate struct { - f func(r int) bool + f func(rune) bool name string } @@ -544,7 +544,7 @@ var isSpace = predicate{unicode.IsSpace, "IsSpace"} var isDigit = predicate{unicode.IsDigit, "IsDigit"} var isUpper = predicate{unicode.IsUpper, "IsUpper"} var isValidRune = predicate{ - func(r int) bool { + func(r rune) bool { return r != utf8.RuneError }, "IsValidRune", @@ -552,7 +552,7 @@ var isValidRune = predicate{ func not(p predicate) predicate { return predicate{ - func(r int) bool { + func(r rune) bool { return !p.f(r) }, "not " + p.name, @@ -645,9 +645,9 @@ func TestCaseConsistency(t *testing.T) { if testing.Short() { numRunes = 1000 } - a := make([]int, numRunes) + a := make([]rune, numRunes) for i := range a { - a[i] = i + a[i] = rune(i) } s := string(a) // convert the cases. @@ -706,7 +706,7 @@ func TestRepeat(t *testing.T) { } } -func runesEqual(a, b []int) bool { +func runesEqual(a, b []rune) bool { if len(a) != len(b) { return false } @@ -720,30 +720,30 @@ func runesEqual(a, b []int) bool { var RunesTests = []struct { in string - out []int + out []rune lossy bool }{ - {"", []int{}, false}, - {" ", []int{32}, false}, - {"ABC", []int{65, 66, 67}, false}, - {"abc", []int{97, 98, 99}, false}, - {"\u65e5\u672c\u8a9e", []int{26085, 26412, 35486}, false}, - {"ab\x80c", []int{97, 98, 0xFFFD, 99}, true}, - {"ab\xc0c", []int{97, 98, 0xFFFD, 99}, true}, + {"", []rune{}, false}, + {" ", []rune{32}, false}, + {"ABC", []rune{65, 66, 67}, false}, + {"abc", []rune{97, 98, 99}, false}, + {"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false}, + {"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true}, + {"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true}, } func TestRunes(t *testing.T) { for _, tt := range RunesTests { - a := []int(tt.in) + a := []rune(tt.in) if !runesEqual(a, tt.out) { - t.Errorf("[]int(%q) = %v; want %v", tt.in, a, tt.out) + t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out) continue } if !tt.lossy { // can only test reassembly if we didn't lose information s := string(a) if s != tt.in { - t.Errorf("string([]int(%q)) = %x; want %x", tt.in, s, tt.in) + t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in) } } } -- cgit v1.2.1