diff options
Diffstat (limited to 'libgo/go/strconv/quote.go')
-rw-r--r-- | libgo/go/strconv/quote.go | 181 |
1 files changed, 136 insertions, 45 deletions
diff --git a/libgo/go/strconv/quote.go b/libgo/go/strconv/quote.go index 61dbcae70f4..8a73f9d3b28 100644 --- a/libgo/go/strconv/quote.go +++ b/libgo/go/strconv/quote.go @@ -5,17 +5,15 @@ package strconv import ( - "bytes" - "strings" - "unicode" "unicode/utf8" ) const lowerhex = "0123456789abcdef" func quoteWith(s string, quote byte, ASCIIonly bool) string { - var buf bytes.Buffer - buf.WriteByte(quote) + var runeTmp [utf8.UTFMax]byte + buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. + buf = append(buf, quote) for width := 0; len(s) > 0; s = s[width:] { r := rune(s[0]) width = 1 @@ -23,71 +21,72 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string { r, width = utf8.DecodeRuneInString(s) } if width == 1 && r == utf8.RuneError { - buf.WriteString(`\x`) - buf.WriteByte(lowerhex[s[0]>>4]) - buf.WriteByte(lowerhex[s[0]&0xF]) + buf = append(buf, `\x`...) + buf = append(buf, lowerhex[s[0]>>4]) + buf = append(buf, lowerhex[s[0]&0xF]) continue } if r == rune(quote) || r == '\\' { // always backslashed - buf.WriteByte('\\') - buf.WriteByte(byte(r)) + buf = append(buf, '\\') + buf = append(buf, byte(r)) continue } if ASCIIonly { - if r <= unicode.MaxASCII && unicode.IsPrint(r) { - buf.WriteRune(r) + if r < utf8.RuneSelf && IsPrint(r) { + buf = append(buf, byte(r)) continue } - } else if unicode.IsPrint(r) { - buf.WriteRune(r) + } else if IsPrint(r) { + n := utf8.EncodeRune(runeTmp[:], r) + buf = append(buf, runeTmp[:n]...) continue } switch r { case '\a': - buf.WriteString(`\a`) + buf = append(buf, `\a`...) case '\b': - buf.WriteString(`\b`) + buf = append(buf, `\b`...) case '\f': - buf.WriteString(`\f`) + buf = append(buf, `\f`...) case '\n': - buf.WriteString(`\n`) + buf = append(buf, `\n`...) case '\r': - buf.WriteString(`\r`) + buf = append(buf, `\r`...) case '\t': - buf.WriteString(`\t`) + buf = append(buf, `\t`...) case '\v': - buf.WriteString(`\v`) + buf = append(buf, `\v`...) default: switch { case r < ' ': - buf.WriteString(`\x`) - buf.WriteByte(lowerhex[s[0]>>4]) - buf.WriteByte(lowerhex[s[0]&0xF]) - case r > unicode.MaxRune: + buf = append(buf, `\x`...) + buf = append(buf, lowerhex[s[0]>>4]) + buf = append(buf, lowerhex[s[0]&0xF]) + case r > utf8.MaxRune: r = 0xFFFD fallthrough case r < 0x10000: - buf.WriteString(`\u`) + buf = append(buf, `\u`...) for s := 12; s >= 0; s -= 4 { - buf.WriteByte(lowerhex[r>>uint(s)&0xF]) + buf = append(buf, lowerhex[r>>uint(s)&0xF]) } default: - buf.WriteString(`\U`) + buf = append(buf, `\U`...) for s := 28; s >= 0; s -= 4 { - buf.WriteByte(lowerhex[r>>uint(s)&0xF]) + buf = append(buf, lowerhex[r>>uint(s)&0xF]) } } } } - buf.WriteByte(quote) - return buf.String() + buf = append(buf, quote) + return string(buf) } // Quote returns a double-quoted Go string literal representing s. The // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for // control characters and non-printable characters as defined by -// unicode.IsPrint. +// IsPrint. func Quote(s string) string { return quoteWith(s, '"', false) } @@ -100,8 +99,7 @@ func AppendQuote(dst []byte, s string) []byte { // QuoteToASCII returns a double-quoted Go string literal representing s. // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for -// non-ASCII characters and non-printable characters as defined by -// unicode.IsPrint. +// non-ASCII characters and non-printable characters as defined by IsPrint. func QuoteToASCII(s string) string { return quoteWith(s, '"', true) } @@ -114,8 +112,7 @@ func AppendQuoteToASCII(dst []byte, s string) []byte { // QuoteRune returns a single-quoted Go character literal representing the // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) -// for control characters and non-printable characters as defined by -// unicode.IsPrint. +// for control characters and non-printable characters as defined by IsPrint. func QuoteRune(r rune) string { // TODO: avoid the allocation here. return quoteWith(string(r), '\'', false) @@ -130,7 +127,7 @@ func AppendQuoteRune(dst []byte, r rune) []byte { // QuoteRuneToASCII returns a single-quoted Go character literal representing // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, // \u0100) for non-ASCII characters and non-printable characters as defined -// by unicode.IsPrint. +// by IsPrint. func QuoteRuneToASCII(r rune) string { // TODO: avoid the allocation here. return quoteWith(string(r), '\'', true) @@ -245,7 +242,7 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, value = v break } - if v > unicode.MaxRune { + if v > utf8.MaxRune { err = ErrSyntax return } @@ -304,7 +301,7 @@ func Unquote(s string) (t string, err error) { s = s[1 : n-1] if quote == '`' { - if strings.Contains(s, "`") { + if contains(s, '`') { return "", ErrSyntax } return s, nil @@ -312,12 +309,12 @@ func Unquote(s string) (t string, err error) { if quote != '"' && quote != '\'' { return "", ErrSyntax } - if strings.Index(s, "\n") >= 0 { + if contains(s, '\n') { return "", ErrSyntax } // Is it trivial? Avoid allocation. - if strings.Index(s, `\`) < 0 && strings.IndexRune(s, rune(quote)) < 0 { + if !contains(s, '\\') && !contains(s, quote) { switch quote { case '"': return s, nil @@ -329,7 +326,8 @@ func Unquote(s string) (t string, err error) { } } - var buf bytes.Buffer + var runeTmp [utf8.UTFMax]byte + buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. for len(s) > 0 { c, multibyte, ss, err := UnquoteChar(s, quote) if err != nil { @@ -337,14 +335,107 @@ func Unquote(s string) (t string, err error) { } s = ss if c < utf8.RuneSelf || !multibyte { - buf.WriteByte(byte(c)) + buf = append(buf, byte(c)) } else { - buf.WriteString(string(c)) + n := utf8.EncodeRune(runeTmp[:], c) + buf = append(buf, runeTmp[:n]...) } if quote == '\'' && len(s) != 0 { // single-quoted must be single character return "", ErrSyntax } } - return buf.String(), nil + return string(buf), nil +} + +// contains reports whether the string contains the byte c. +func contains(s string, c byte) bool { + for i := 0; i < len(s); i++ { + if s[i] == c { + return true + } + } + return false +} + +// bsearch16 returns the smallest i such that a[i] >= x. +// If there is no such i, bsearch16 returns len(a). +func bsearch16(a []uint16, x uint16) int { + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 + if a[h] < x { + i = h + 1 + } else { + j = h + } + } + return i +} + +// bsearch32 returns the smallest i such that a[i] >= x. +// If there is no such i, bsearch32 returns len(a). +func bsearch32(a []uint32, x uint32) int { + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 + if a[h] < x { + i = h + 1 + } else { + j = h + } + } + return i +} + +// TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests +// to give the same answer. It allows this package not to depend on unicode, +// and therefore not pull in all the Unicode tables. If the linker were better +// at tossing unused tables, we could get rid of this implementation. +// That would be nice. + +// IsPrint reports whether the rune is defined as printable by Go, with +// the same definition as unicode.IsPrint: letters, numbers, punctuation, +// symbols and ASCII space. +func IsPrint(r rune) bool { + // Fast check for Latin-1 + if r <= 0xFF { + if 0x20 <= r && r <= 0x7E { + // All the ASCII is printable from space through DEL-1. + return true + } + if 0xA1 <= r && r <= 0xFF { + // Similarly for ¡ through ÿ... + return r != 0xAD // ...except for the bizarre soft hyphen. + } + return false + } + + // Same algorithm, either on uint16 or uint32 value. + // First, find first i such that isPrint[i] >= x. + // This is the index of either the start or end of a pair that might span x. + // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). + // If we find x in a range, make sure x is not in isNotPrint list. + + if 0 <= r && r < 1<<16 { + rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 + i := bsearch16(isPrint, rr) + if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { + return false + } + j := bsearch16(isNotPrint, rr) + return j >= len(isNotPrint) || isNotPrint[j] != rr + } + + rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 + i := bsearch32(isPrint, rr) + if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { + return false + } + if r >= 0x20000 { + return true + } + r -= 0x10000 + j := bsearch16(isNotPrint, uint16(r)) + return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) } |