diff options
author | Rob Pike <r@golang.org> | 2011-06-07 12:23:08 +0000 |
---|---|---|
committer | Rob Pike <r@golang.org> | 2011-06-07 12:23:08 +0000 |
commit | 7e17253a2a164f0b8ec58e4cb3478becc6304da7 (patch) | |
tree | 47b315035f019ca4e1240f29d04a503ce937e4fb /src/pkg/strconv | |
parent | eb2769ffbce4296c3447867620c6c641c686198e (diff) | |
download | go-7e17253a2a164f0b8ec58e4cb3478becc6304da7.tar.gz |
strconv: change Quote to be Unicode-friendly,
add QuoteToASCII.
The Quote and QuoteRune functions now let printable
runes (as defined by unicode.IsPrint) through. When
true 7-bit clean stuff is necessary, there are now two
new functions: QuoteToASCII and QuoteRuneToASCII.
Printf("%q") uses Quote. To get the old behavior, it
will now be necessary to say
Printf("%s", strconv.QuoteToASCII(s))
but that should rarely be necessary.
R=golang-dev, gri, r
CC=golang-dev
http://codereview.appspot.com/4561061
Diffstat (limited to 'src/pkg/strconv')
-rw-r--r-- | src/pkg/strconv/quote.go | 119 | ||||
-rw-r--r-- | src/pkg/strconv/quote_test.go | 62 |
2 files changed, 117 insertions, 64 deletions
diff --git a/src/pkg/strconv/quote.go b/src/pkg/strconv/quote.go index bbc0b2658..98b19d3a2 100644 --- a/src/pkg/strconv/quote.go +++ b/src/pkg/strconv/quote.go @@ -14,56 +14,68 @@ import ( const lowerhex = "0123456789abcdef" -func quoteWith(s string, quote byte) string { +func quoteWith(s string, quote byte, ASCIIonly bool) string { var buf bytes.Buffer buf.WriteByte(quote) - for ; len(s) > 0; s = s[1:] { - switch c := s[0]; { - case c == quote: + for width := 0; len(s) > 0; s = s[width:] { + rune := int(s[0]) + width = 1 + if rune >= utf8.RuneSelf { + rune, width = utf8.DecodeRuneInString(s) + } + if width == 1 && rune == utf8.RuneError { + goto printEscX + } + if rune == int(quote) || rune == '\\' { // always backslashed buf.WriteByte('\\') - buf.WriteByte(quote) - case c == '\\': - buf.WriteString(`\\`) - case ' ' <= c && c <= '~': - buf.WriteString(string(c)) - case c == '\a': + buf.WriteByte(byte(rune)) + continue + } + if ASCIIonly { + if rune <= unicode.MaxASCII && unicode.IsPrint(rune) { + buf.WriteRune(rune) + continue + } + } else if unicode.IsPrint(rune) { + buf.WriteRune(rune) + continue + } + switch rune { + case '\a': buf.WriteString(`\a`) - case c == '\b': + case '\b': buf.WriteString(`\b`) - case c == '\f': + case '\f': buf.WriteString(`\f`) - case c == '\n': + case '\n': buf.WriteString(`\n`) - case c == '\r': + case '\r': buf.WriteString(`\r`) - case c == '\t': + case '\t': buf.WriteString(`\t`) - case c == '\v': + case '\v': buf.WriteString(`\v`) - - case c >= utf8.RuneSelf && utf8.FullRuneInString(s): - r, size := utf8.DecodeRuneInString(s) - if r == utf8.RuneError && size == 1 { - goto EscX - } - s = s[size-1:] // next iteration will slice off 1 more - if r < 0x10000 { + default: + switch { + case rune < ' ': + printEscX: + buf.WriteString(`\x`) + buf.WriteByte(lowerhex[s[0]>>4]) + buf.WriteByte(lowerhex[s[0]&0xF]) + case rune > unicode.MaxRune: + rune = 0xFFFD + fallthrough + case rune < 0x10000: buf.WriteString(`\u`) - for j := uint(0); j < 4; j++ { - buf.WriteByte(lowerhex[(r>>(12-4*j))&0xF]) + for s := 12; s >= 0; s -= 4 { + buf.WriteByte(lowerhex[rune>>uint(s)&0xF]) } - } else { + default: buf.WriteString(`\U`) - for j := uint(0); j < 8; j++ { - buf.WriteByte(lowerhex[(r>>(28-4*j))&0xF]) + for s := 28; s >= 0; s -= 4 { + buf.WriteByte(lowerhex[rune>>uint(s)&0xF]) } } - - default: - EscX: - buf.WriteString(`\x`) - buf.WriteByte(lowerhex[c>>4]) - buf.WriteByte(lowerhex[c&0xF]) } } buf.WriteByte(quote) @@ -71,21 +83,38 @@ func quoteWith(s string, quote byte) string { } -// Quote returns a double-quoted Go string literal -// representing s. The returned string uses Go escape -// sequences (\t, \n, \xFF, \u0100) for control characters -// and non-ASCII characters. +// Quote returns a double-quoted Go string literal representing s. The +// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for +// control characters and non-printable characters as defined by +// unicode.IsPrint. func Quote(s string) string { - return quoteWith(s, '"') + return quoteWith(s, '"', false) +} + +// QuoteToASCII returns a double-quoted Go string literal representing s. +// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for +// non-ASCII characters and non-printable characters as defined by +// unicode.IsPrint. +func QuoteToASCII(s string) string { + return quoteWith(s, '"', true) } -// QuoteRune returns a single-quoted Go character literal -// representing the rune. The returned string uses Go escape -// sequences (\t, \n, \xFF, \u0100) for control characters -// and non-ASCII characters. +// QuoteRune returns a single-quoted Go character literal representing the +// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) +// for control characters and non-printable characters as defined by +// unicode.IsPrint. func QuoteRune(rune int) string { // TODO: avoid the allocation here. - return quoteWith(string(rune), '\'') + return quoteWith(string(rune), '\'', false) +} + +// QuoteRuneToASCII returns a single-quoted Go character literal representing +// the rune. The returned string uses Go escape sequences (\t, \n, \xFF, +// \u0100) for non-ASCII characters and non-printable characters as defined +// by unicode.IsPrint. +func QuoteRuneToASCII(rune int) string { + // TODO: avoid the allocation here. + return quoteWith(string(rune), '\'', true) } // CanBackquote returns whether the string s would be diff --git a/src/pkg/strconv/quote_test.go b/src/pkg/strconv/quote_test.go index 3232d611c..4d615db44 100644 --- a/src/pkg/strconv/quote_test.go +++ b/src/pkg/strconv/quote_test.go @@ -11,17 +11,18 @@ import ( ) type quoteTest struct { - in string - out string + in string + out string + ascii string } var quotetests = []quoteTest{ - {"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`}, - {"\\", `"\\"`}, - {"abc\xffdef", `"abc\xffdef"`}, - {"\u263a", `"\u263a"`}, - {"\U0010ffff", `"\U0010ffff"`}, - {"\x04", `"\x04"`}, + {"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`}, + {"\\", `"\\"`, `"\\"`}, + {"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`}, + {"\u263a", `"☺"`, `"\u263a"`}, + {"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`}, + {"\x04", `"\x04"`, `"\x04"`}, } func TestQuote(t *testing.T) { @@ -32,20 +33,30 @@ func TestQuote(t *testing.T) { } } +func TestQuoteToASCII(t *testing.T) { + for _, tt := range quotetests { + if out := QuoteToASCII(tt.in); out != tt.ascii { + t.Errorf("QuoteToASCII(%s) = %s, want %s", tt.in, out, tt.ascii) + } + } +} + type quoteRuneTest struct { - in int - out string + in int + out string + ascii string } var quoterunetests = []quoteRuneTest{ - {'a', `'a'`}, - {'\a', `'\a'`}, - {'\\', `'\\'`}, - {0xFF, `'\u00ff'`}, - {0x263a, `'\u263a'`}, - {0x0010ffff, `'\U0010ffff'`}, - {0x0010ffff + 1, `'\ufffd'`}, - {0x04, `'\x04'`}, + {'a', `'a'`, `'a'`}, + {'\a', `'\a'`, `'\a'`}, + {'\\', `'\\'`, `'\\'`}, + {0xFF, `'ÿ'`, `'\u00ff'`}, + {0x263a, `'☺'`, `'\u263a'`}, + {0xfffd, `'�'`, `'\ufffd'`}, + {0x0010ffff, `'\U0010ffff'`, `'\U0010ffff'`}, + {0x0010ffff + 1, `'�'`, `'\ufffd'`}, + {0x04, `'\x04'`, `'\x04'`}, } func TestQuoteRune(t *testing.T) { @@ -56,6 +67,14 @@ func TestQuoteRune(t *testing.T) { } } +func TestQuoteRuneToASCII(t *testing.T) { + for _, tt := range quoterunetests { + if out := QuoteRuneToASCII(tt.in); out != tt.ascii { + t.Errorf("QuoteRuneToASCII(%U) = %s, want %s", tt.in, out, tt.ascii) + } + } +} + type canBackquoteTest struct { in string out bool @@ -110,7 +129,12 @@ func TestCanBackquote(t *testing.T) { } } -var unquotetests = []quoteTest{ +type unQuoteTest struct { + in string + out string +} + +var unquotetests = []unQuoteTest{ {`""`, ""}, {`"a"`, "a"}, {`"abc"`, "abc"}, |