summaryrefslogtreecommitdiff
path: root/src/mime
diff options
context:
space:
mode:
authorAlexandre Cesaro <alexandre.cesaro@gmail.com>2015-09-24 23:45:13 +0200
committerBrad Fitzpatrick <bradfitz@golang.org>2015-10-15 00:08:03 +0000
commit65fc379daeda784d085f98d621a9ab712c096148 (patch)
treec90eb3c81088661ac19d60af4b64e5e3471c2475 /src/mime
parent9f60a0a2b01c9d1079ea8991125b471cdaa7eb56 (diff)
downloadgo-git-65fc379daeda784d085f98d621a9ab712c096148.tar.gz
mime: limit UTF-8 encoded-word length to 75 characters
As specified by RFC 2047 section 2, encoded-words may not be more than 75 characters long. We only enforce this rule when the charset is UTF-8, since multi-bytes characters must not be split accross encoded-words (see section 5.3). Fixes #12300 Change-Id: I72a43fc3fe6ddeb3dab54dcdce0837d7ebf658f0 Reviewed-on: https://go-review.googlesource.com/14957 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/mime')
-rw-r--r--src/mime/encodedword.go138
-rw-r--r--src/mime/encodedword_test.go8
2 files changed, 124 insertions, 22 deletions
diff --git a/src/mime/encodedword.go b/src/mime/encodedword.go
index ebf6164bb6..3b414dd5c4 100644
--- a/src/mime/encodedword.go
+++ b/src/mime/encodedword.go
@@ -54,35 +54,129 @@ func (e WordEncoder) encodeWord(charset, s string) string {
buf := getBuffer()
defer putBuffer(buf)
+ e.openWord(buf, charset)
+ if e == BEncoding {
+ e.bEncode(buf, charset, s)
+ } else {
+ e.qEncode(buf, charset, s)
+ }
+ closeWord(buf)
+
+ return buf.String()
+}
+
+const (
+ // The maximum length of an encoded-word is 75 characters.
+ // See RFC 2047, section 2.
+ maxEncodedWordLen = 75
+ // maxContentLen is how much content can be encoded, ignoring the header and
+ // 2-byte footer.
+ maxContentLen = maxEncodedWordLen - len("=?UTF-8?") - len("?=")
+)
+
+var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)
+
+// bEncode encodes s using base64 encoding and writes it to buf.
+func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) {
+ w := base64.NewEncoder(base64.StdEncoding, buf)
+ // If the charset is not UTF-8 or if the content is short, do not bother
+ // splitting the encoded-word.
+ if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
+ io.WriteString(w, s)
+ w.Close()
+ return
+ }
+
+ var currentLen, last, runeLen int
+ for i := 0; i < len(s); i += runeLen {
+ // Multi-byte characters must not be split accross encoded-words.
+ // See RFC 2047, section 5.3.
+ _, runeLen = utf8.DecodeRuneInString(s[i:])
+
+ if currentLen+runeLen <= maxBase64Len {
+ currentLen += runeLen
+ } else {
+ io.WriteString(w, s[last:i])
+ w.Close()
+ e.splitWord(buf, charset)
+ last = i
+ currentLen = runeLen
+ }
+ }
+ io.WriteString(w, s[last:])
+ w.Close()
+}
+
+// qEncode encodes s using Q encoding and writes it to buf. It splits the
+// encoded-words when necessary.
+func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) {
+ // We only split encoded-words when the charset is UTF-8.
+ if !isUTF8(charset) {
+ writeQString(buf, s)
+ return
+ }
+
+ var currentLen, runeLen int
+ for i := 0; i < len(s); i += runeLen {
+ b := s[i]
+ // Multi-byte characters must not be split accross encoded-words.
+ // See RFC 2047, section 5.3.
+ var encLen int
+ if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
+ runeLen, encLen = 1, 1
+ } else {
+ _, runeLen = utf8.DecodeRuneInString(s[i:])
+ encLen = 3 * runeLen
+ }
+
+ if currentLen+encLen > maxContentLen {
+ e.splitWord(buf, charset)
+ currentLen = 0
+ }
+ writeQString(buf, s[i:i+runeLen])
+ currentLen += encLen
+ }
+}
+
+// writeQString encodes s using Q encoding and writes it to buf.
+func writeQString(buf *bytes.Buffer, s string) {
+ for i := 0; i < len(s); i++ {
+ switch b := s[i]; {
+ case b == ' ':
+ buf.WriteByte('_')
+ case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
+ buf.WriteByte(b)
+ default:
+ buf.WriteByte('=')
+ buf.WriteByte(upperhex[b>>4])
+ buf.WriteByte(upperhex[b&0x0f])
+ }
+ }
+}
+
+// openWord writes the beginning of an encoded-word into buf.
+func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) {
buf.WriteString("=?")
buf.WriteString(charset)
buf.WriteByte('?')
buf.WriteByte(byte(e))
buf.WriteByte('?')
+}
- if e == BEncoding {
- w := base64.NewEncoder(base64.StdEncoding, buf)
- io.WriteString(w, s)
- w.Close()
- } else {
- enc := make([]byte, 3)
- for i := 0; i < len(s); i++ {
- b := s[i]
- switch {
- case b == ' ':
- buf.WriteByte('_')
- case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
- buf.WriteByte(b)
- default:
- enc[0] = '='
- enc[1] = upperhex[b>>4]
- enc[2] = upperhex[b&0x0f]
- buf.Write(enc)
- }
- }
- }
+// closeWord writes the end of an encoded-word into buf.
+func closeWord(buf *bytes.Buffer) {
buf.WriteString("?=")
- return buf.String()
+}
+
+// splitWord closes the current encoded-word and opens a new one.
+func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) {
+ closeWord(buf)
+ buf.WriteByte(' ')
+ e.openWord(buf, charset)
+}
+
+func isUTF8(charset string) bool {
+ return strings.EqualFold(charset, "UTF-8")
}
const upperhex = "0123456789ABCDEF"
diff --git a/src/mime/encodedword_test.go b/src/mime/encodedword_test.go
index b30ecba3b9..5fcd7a06dd 100644
--- a/src/mime/encodedword_test.go
+++ b/src/mime/encodedword_test.go
@@ -27,6 +27,14 @@ func TestEncodeWord(t *testing.T) {
{QEncoding, iso88591, "a", "a"},
{QEncoding, utf8, "123 456", "123 456"},
{QEncoding, utf8, "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~", "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~"},
+ {QEncoding, utf8, strings.Repeat("é", 10), "=?utf-8?q?" + strings.Repeat("=C3=A9", 10) + "?="},
+ {QEncoding, utf8, strings.Repeat("é", 11), "=?utf-8?q?" + strings.Repeat("=C3=A9", 10) + "?= =?utf-8?q?=C3=A9?="},
+ {QEncoding, iso88591, strings.Repeat("\xe9", 22), "=?iso-8859-1?q?" + strings.Repeat("=E9", 22) + "?="},
+ {QEncoding, utf8, strings.Repeat("\x80", 22), "=?utf-8?q?" + strings.Repeat("=80", 21) + "?= =?utf-8?q?=80?="},
+ {BEncoding, utf8, strings.Repeat("é", 24), "=?utf-8?b?" + strings.Repeat("w6nDqcOp", 8) + "?="},
+ {BEncoding, utf8, strings.Repeat("é", 27), "=?utf-8?b?" + strings.Repeat("w6nDqcOp", 8) + "?= =?utf-8?b?w6nDqcOp?="},
+ {BEncoding, iso88591, strings.Repeat("\xe9", 45), "=?iso-8859-1?b?" + strings.Repeat("6enp", 15) + "?="},
+ {BEncoding, utf8, strings.Repeat("\x80", 51), "=?utf-8?b?" + strings.Repeat("gICA", 16) + "?= =?utf-8?b?gICA?="},
}
for _, test := range tests {