summaryrefslogtreecommitdiff
path: root/libgo/go/strconv/quote.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/strconv/quote.go')
-rw-r--r--libgo/go/strconv/quote.go181
1 files changed, 136 insertions, 45 deletions
diff --git a/libgo/go/strconv/quote.go b/libgo/go/strconv/quote.go
index 61dbcae70f4..8a73f9d3b28 100644
--- a/libgo/go/strconv/quote.go
+++ b/libgo/go/strconv/quote.go
@@ -5,17 +5,15 @@
package strconv
import (
- "bytes"
- "strings"
- "unicode"
"unicode/utf8"
)
const lowerhex = "0123456789abcdef"
func quoteWith(s string, quote byte, ASCIIonly bool) string {
- var buf bytes.Buffer
- buf.WriteByte(quote)
+ var runeTmp [utf8.UTFMax]byte
+ buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
+ buf = append(buf, quote)
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
@@ -23,71 +21,72 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string {
r, width = utf8.DecodeRuneInString(s)
}
if width == 1 && r == utf8.RuneError {
- buf.WriteString(`\x`)
- buf.WriteByte(lowerhex[s[0]>>4])
- buf.WriteByte(lowerhex[s[0]&0xF])
+ buf = append(buf, `\x`...)
+ buf = append(buf, lowerhex[s[0]>>4])
+ buf = append(buf, lowerhex[s[0]&0xF])
continue
}
if r == rune(quote) || r == '\\' { // always backslashed
- buf.WriteByte('\\')
- buf.WriteByte(byte(r))
+ buf = append(buf, '\\')
+ buf = append(buf, byte(r))
continue
}
if ASCIIonly {
- if r <= unicode.MaxASCII && unicode.IsPrint(r) {
- buf.WriteRune(r)
+ if r < utf8.RuneSelf && IsPrint(r) {
+ buf = append(buf, byte(r))
continue
}
- } else if unicode.IsPrint(r) {
- buf.WriteRune(r)
+ } else if IsPrint(r) {
+ n := utf8.EncodeRune(runeTmp[:], r)
+ buf = append(buf, runeTmp[:n]...)
continue
}
switch r {
case '\a':
- buf.WriteString(`\a`)
+ buf = append(buf, `\a`...)
case '\b':
- buf.WriteString(`\b`)
+ buf = append(buf, `\b`...)
case '\f':
- buf.WriteString(`\f`)
+ buf = append(buf, `\f`...)
case '\n':
- buf.WriteString(`\n`)
+ buf = append(buf, `\n`...)
case '\r':
- buf.WriteString(`\r`)
+ buf = append(buf, `\r`...)
case '\t':
- buf.WriteString(`\t`)
+ buf = append(buf, `\t`...)
case '\v':
- buf.WriteString(`\v`)
+ buf = append(buf, `\v`...)
default:
switch {
case r < ' ':
- buf.WriteString(`\x`)
- buf.WriteByte(lowerhex[s[0]>>4])
- buf.WriteByte(lowerhex[s[0]&0xF])
- case r > unicode.MaxRune:
+ buf = append(buf, `\x`...)
+ buf = append(buf, lowerhex[s[0]>>4])
+ buf = append(buf, lowerhex[s[0]&0xF])
+ case r > utf8.MaxRune:
r = 0xFFFD
fallthrough
case r < 0x10000:
- buf.WriteString(`\u`)
+ buf = append(buf, `\u`...)
for s := 12; s >= 0; s -= 4 {
- buf.WriteByte(lowerhex[r>>uint(s)&0xF])
+ buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
default:
- buf.WriteString(`\U`)
+ buf = append(buf, `\U`...)
for s := 28; s >= 0; s -= 4 {
- buf.WriteByte(lowerhex[r>>uint(s)&0xF])
+ buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
}
}
}
- buf.WriteByte(quote)
- return buf.String()
+ buf = append(buf, quote)
+ return string(buf)
}
// Quote returns a double-quoted Go string literal representing s. The
// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// control characters and non-printable characters as defined by
-// unicode.IsPrint.
+// IsPrint.
func Quote(s string) string {
return quoteWith(s, '"', false)
}
@@ -100,8 +99,7 @@ func AppendQuote(dst []byte, s string) []byte {
// QuoteToASCII returns a double-quoted Go string literal representing s.
// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
-// non-ASCII characters and non-printable characters as defined by
-// unicode.IsPrint.
+// non-ASCII characters and non-printable characters as defined by IsPrint.
func QuoteToASCII(s string) string {
return quoteWith(s, '"', true)
}
@@ -114,8 +112,7 @@ func AppendQuoteToASCII(dst []byte, s string) []byte {
// QuoteRune returns a single-quoted Go character literal representing the
// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
-// for control characters and non-printable characters as defined by
-// unicode.IsPrint.
+// for control characters and non-printable characters as defined by IsPrint.
func QuoteRune(r rune) string {
// TODO: avoid the allocation here.
return quoteWith(string(r), '\'', false)
@@ -130,7 +127,7 @@ func AppendQuoteRune(dst []byte, r rune) []byte {
// QuoteRuneToASCII returns a single-quoted Go character literal representing
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
// \u0100) for non-ASCII characters and non-printable characters as defined
-// by unicode.IsPrint.
+// by IsPrint.
func QuoteRuneToASCII(r rune) string {
// TODO: avoid the allocation here.
return quoteWith(string(r), '\'', true)
@@ -245,7 +242,7 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
value = v
break
}
- if v > unicode.MaxRune {
+ if v > utf8.MaxRune {
err = ErrSyntax
return
}
@@ -304,7 +301,7 @@ func Unquote(s string) (t string, err error) {
s = s[1 : n-1]
if quote == '`' {
- if strings.Contains(s, "`") {
+ if contains(s, '`') {
return "", ErrSyntax
}
return s, nil
@@ -312,12 +309,12 @@ func Unquote(s string) (t string, err error) {
if quote != '"' && quote != '\'' {
return "", ErrSyntax
}
- if strings.Index(s, "\n") >= 0 {
+ if contains(s, '\n') {
return "", ErrSyntax
}
// Is it trivial? Avoid allocation.
- if strings.Index(s, `\`) < 0 && strings.IndexRune(s, rune(quote)) < 0 {
+ if !contains(s, '\\') && !contains(s, quote) {
switch quote {
case '"':
return s, nil
@@ -329,7 +326,8 @@ func Unquote(s string) (t string, err error) {
}
}
- var buf bytes.Buffer
+ var runeTmp [utf8.UTFMax]byte
+ buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
for len(s) > 0 {
c, multibyte, ss, err := UnquoteChar(s, quote)
if err != nil {
@@ -337,14 +335,107 @@ func Unquote(s string) (t string, err error) {
}
s = ss
if c < utf8.RuneSelf || !multibyte {
- buf.WriteByte(byte(c))
+ buf = append(buf, byte(c))
} else {
- buf.WriteString(string(c))
+ n := utf8.EncodeRune(runeTmp[:], c)
+ buf = append(buf, runeTmp[:n]...)
}
if quote == '\'' && len(s) != 0 {
// single-quoted must be single character
return "", ErrSyntax
}
}
- return buf.String(), nil
+ return string(buf), nil
+}
+
+// contains reports whether the string contains the byte c.
+func contains(s string, c byte) bool {
+ for i := 0; i < len(s); i++ {
+ if s[i] == c {
+ return true
+ }
+ }
+ return false
+}
+
+// bsearch16 returns the smallest i such that a[i] >= x.
+// If there is no such i, bsearch16 returns len(a).
+func bsearch16(a []uint16, x uint16) int {
+ i, j := 0, len(a)
+ for i < j {
+ h := i + (j-i)/2
+ if a[h] < x {
+ i = h + 1
+ } else {
+ j = h
+ }
+ }
+ return i
+}
+
+// bsearch32 returns the smallest i such that a[i] >= x.
+// If there is no such i, bsearch32 returns len(a).
+func bsearch32(a []uint32, x uint32) int {
+ i, j := 0, len(a)
+ for i < j {
+ h := i + (j-i)/2
+ if a[h] < x {
+ i = h + 1
+ } else {
+ j = h
+ }
+ }
+ return i
+}
+
+// TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
+// to give the same answer. It allows this package not to depend on unicode,
+// and therefore not pull in all the Unicode tables. If the linker were better
+// at tossing unused tables, we could get rid of this implementation.
+// That would be nice.
+
+// IsPrint reports whether the rune is defined as printable by Go, with
+// the same definition as unicode.IsPrint: letters, numbers, punctuation,
+// symbols and ASCII space.
+func IsPrint(r rune) bool {
+ // Fast check for Latin-1
+ if r <= 0xFF {
+ if 0x20 <= r && r <= 0x7E {
+ // All the ASCII is printable from space through DEL-1.
+ return true
+ }
+ if 0xA1 <= r && r <= 0xFF {
+ // Similarly for ¡ through ÿ...
+ return r != 0xAD // ...except for the bizarre soft hyphen.
+ }
+ return false
+ }
+
+ // Same algorithm, either on uint16 or uint32 value.
+ // First, find first i such that isPrint[i] >= x.
+ // This is the index of either the start or end of a pair that might span x.
+ // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
+ // If we find x in a range, make sure x is not in isNotPrint list.
+
+ if 0 <= r && r < 1<<16 {
+ rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
+ i := bsearch16(isPrint, rr)
+ if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
+ return false
+ }
+ j := bsearch16(isNotPrint, rr)
+ return j >= len(isNotPrint) || isNotPrint[j] != rr
+ }
+
+ rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
+ i := bsearch32(isPrint, rr)
+ if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
+ return false
+ }
+ if r >= 0x20000 {
+ return true
+ }
+ r -= 0x10000
+ j := bsearch16(isNotPrint, uint16(r))
+ return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
}