summaryrefslogtreecommitdiff
path: root/src/compress/lzw
diff options
context:
space:
mode:
authorNigel Tao <nigeltao@golang.org>2021-10-09 16:52:19 +1100
committerNigel Tao <nigeltao@golang.org>2021-10-12 11:00:47 +0000
commit9c1dbdf60edbffeff10f58af21fa055eb0fdd29f (patch)
tree428cc9cf958129b4d03a5f5491d74d9a116433a5 /src/compress/lzw
parentd887d3be5f8f143f2475ddb2ea6c48ceb32def17 (diff)
downloadgo-git-9c1dbdf60edbffeff10f58af21fa055eb0fdd29f.tar.gz
compress/lzw: output a Clear code first, per GIF spec
The TestStartsWithClearCode test is new, but if it existed beforehand, the want strings would be "\x81" and "Hi\x81" without a starting "\x80". Fixes #26108 Fixes #33748 Updates makeworld-the-better-one/didder#7 Updates nothings/stb#1222 Change-Id: I35ac0ed862ba6ee921ba9aee257bc19828abaa82 Reviewed-on: https://go-review.googlesource.com/c/go/+/354710 Trust: Nigel Tao <nigeltao@golang.org> Run-TryBot: Nigel Tao <nigeltao@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Andrew Gerrand <adg@golang.org>
Diffstat (limited to 'src/compress/lzw')
-rw-r--r--src/compress/lzw/writer.go20
-rw-r--r--src/compress/lzw/writer_test.go28
2 files changed, 47 insertions, 1 deletions
diff --git a/src/compress/lzw/writer.go b/src/compress/lzw/writer.go
index 552bdc2ce1..cf06ea80c7 100644
--- a/src/compress/lzw/writer.go
+++ b/src/compress/lzw/writer.go
@@ -137,7 +137,19 @@ func (w *Writer) Write(p []byte) (n int, err error) {
n = len(p)
code := w.savedCode
if code == invalidCode {
- // The first code sent is always a literal code.
+ // This is the first write; send a clear code.
+ // https://www.w3.org/Graphics/GIF/spec-gif89a.txt Appendix F
+ // "Variable-Length-Code LZW Compression" says that "Encoders should
+ // output a Clear code as the first code of each image data stream".
+ //
+ // LZW compression isn't only used by GIF, but it's cheap to follow
+ // that directive unconditionally.
+ clear := uint32(1) << w.litWidth
+ if err := w.write(w, clear); err != nil {
+ return 0, err
+ }
+ // After the starting clear code, the next code sent (for non-empty
+ // input) is always a literal code.
code, p = uint32(p[0]), p[1:]
}
loop:
@@ -202,6 +214,12 @@ func (w *Writer) Close() error {
if err := w.incHi(); err != nil && err != errOutOfCodes {
return err
}
+ } else {
+ // Write the starting clear code, as w.Write did not.
+ clear := uint32(1) << w.litWidth
+ if err := w.write(w, clear); err != nil {
+ return err
+ }
}
// Write the eof code.
eof := uint32(1)<<w.litWidth + 1
diff --git a/src/compress/lzw/writer_test.go b/src/compress/lzw/writer_test.go
index 9f59c8bb18..edf683a512 100644
--- a/src/compress/lzw/writer_test.go
+++ b/src/compress/lzw/writer_test.go
@@ -168,6 +168,34 @@ func TestSmallLitWidth(t *testing.T) {
}
}
+func TestStartsWithClearCode(t *testing.T) {
+ // A literal width of 7 bits means that the code width starts at 8 bits,
+ // which makes it easier to visually inspect the output (provided that the
+ // output is short so codes don't get longer). Each byte is a code:
+ // - ASCII bytes are literal codes,
+ // - 0x80 is the clear code,
+ // - 0x81 is the end code.
+ // - 0x82 and above are copy codes (unused in this test case).
+ for _, empty := range []bool{false, true} {
+ var buf bytes.Buffer
+ w := NewWriter(&buf, LSB, 7)
+ if !empty {
+ w.Write([]byte("Hi"))
+ }
+ w.Close()
+ got := buf.String()
+
+ want := "\x80\x81"
+ if !empty {
+ want = "\x80Hi\x81"
+ }
+
+ if got != want {
+ t.Errorf("empty=%t: got %q, want %q", empty, got, want)
+ }
+ }
+}
+
func BenchmarkEncoder(b *testing.B) {
buf, err := os.ReadFile("../testdata/e.txt")
if err != nil {