diff options
Diffstat (limited to 'libgo/go/bufio')
-rw-r--r-- | libgo/go/bufio/bufio.go | 32 | ||||
-rw-r--r-- | libgo/go/bufio/bufio_test.go | 19 | ||||
-rw-r--r-- | libgo/go/bufio/scan.go | 27 | ||||
-rw-r--r-- | libgo/go/bufio/scan_test.go | 120 |
4 files changed, 169 insertions, 29 deletions
diff --git a/libgo/go/bufio/bufio.go b/libgo/go/bufio/bufio.go index 61ef2619100..d3c68fe6fe5 100644 --- a/libgo/go/bufio/bufio.go +++ b/libgo/go/bufio/bufio.go @@ -30,8 +30,8 @@ var ( // Reader implements buffering for an io.Reader object. type Reader struct { buf []byte - rd io.Reader - r, w int + rd io.Reader // reader provided by the client + r, w int // buf read and write positions err error lastByte int lastRuneSize int @@ -131,18 +131,17 @@ func (b *Reader) Peek(n int) ([]byte, error) { for b.w-b.r < n && b.err == nil { b.fill() // b.w-b.r < len(b.buf) => buffer is not full } - m := b.w - b.r - if m > n { - m = n - } + var err error - if m < n { + if avail := b.w - b.r; avail < n { + // not enough data in buffer + n = avail err = b.readErr() if err == nil { err = ErrBufferFull } } - return b.buf[b.r : b.r+m], err + return b.buf[b.r : b.r+n], err } // Read reads data into p. @@ -173,15 +172,13 @@ func (b *Reader) Read(p []byte) (n int, err error) { return n, b.readErr() } b.fill() // buffer is empty - if b.w == b.r { + if b.r == b.w { return 0, b.readErr() } } - if n > b.w-b.r { - n = b.w - b.r - } - copy(p[0:n], b.buf[b.r:]) + // copy as much as we can + n = copy(p, b.buf[b.r:b.w]) b.r += n b.lastByte = int(b.buf[b.r-1]) b.lastRuneSize = -1 @@ -288,7 +285,7 @@ func (b *Reader) ReadSlice(delim byte) (line []byte, err error) { } // Buffer full? - if n := b.Buffered(); n >= len(b.buf) { + if b.Buffered() >= len(b.buf) { b.r = b.w line = b.buf err = ErrBufferFull @@ -301,6 +298,7 @@ func (b *Reader) ReadSlice(delim byte) (line []byte, err error) { // Handle last byte, if any. if i := len(line) - 1; i >= 0 { b.lastByte = int(line[i]) + b.lastRuneSize = -1 } return @@ -458,11 +456,13 @@ func (b *Reader) WriteTo(w io.Writer) (n int64, err error) { return n, b.readErr() } +var errNegativeWrite = errors.New("bufio: writer returned negative count from Write") + // writeBuf writes the Reader's buffer to the writer. func (b *Reader) writeBuf(w io.Writer) (int64, error) { n, err := w.Write(b.buf[b.r:b.w]) - if n < b.r-b.w { - panic(errors.New("bufio: writer did not write all data")) + if n < 0 { + panic(errNegativeWrite) } b.r += n return int64(n), err diff --git a/libgo/go/bufio/bufio_test.go b/libgo/go/bufio/bufio_test.go index 76d3c8eade8..550dac9173f 100644 --- a/libgo/go/bufio/bufio_test.go +++ b/libgo/go/bufio/bufio_test.go @@ -31,9 +31,6 @@ func newRot13Reader(r io.Reader) *rot13Reader { func (r13 *rot13Reader) Read(p []byte) (int, error) { n, err := r13.r.Read(p) - if err != nil { - return n, err - } for i := 0; i < n; i++ { c := p[i] | 0x20 // lowercase byte if 'a' <= c && c <= 'm' { @@ -42,7 +39,7 @@ func (r13 *rot13Reader) Read(p []byte) (int, error) { p[i] -= 13 } } - return n, nil + return n, err } // Call ReadByte to accumulate the text of a file @@ -438,7 +435,7 @@ func TestUnreadRuneError(t *testing.T) { if err != nil { t.Error("unexpected error on ReadRune (2):", err) } - for _ = range buf { + for range buf { _, err = r.ReadByte() if err != nil { t.Error("unexpected error on ReadByte (2):", err) @@ -463,6 +460,18 @@ func TestUnreadRuneError(t *testing.T) { if r.UnreadRune() == nil { t.Error("expected error after UnreadByte (3)") } + // Test error after ReadSlice. + _, _, err = r.ReadRune() // reset state + if err != nil { + t.Error("unexpected error on ReadRune (4):", err) + } + _, err = r.ReadSlice(0) + if err != io.EOF { + t.Error("unexpected error on ReadSlice (4):", err) + } + if r.UnreadRune() == nil { + t.Error("expected error after ReadSlice (4)") + } } func TestUnreadRuneAtEOF(t *testing.T) { diff --git a/libgo/go/bufio/scan.go b/libgo/go/bufio/scan.go index 715ce071e3b..364d1596139 100644 --- a/libgo/go/bufio/scan.go +++ b/libgo/go/bufio/scan.go @@ -36,6 +36,7 @@ type Scanner struct { start int // First non-processed byte in buf. end int // End of data in buf. err error // Sticky error. + empties int // Count of successive empty tokens. } // SplitFunc is the signature of the split function used to tokenize the @@ -64,8 +65,9 @@ var ( ) const ( - // Maximum size used to buffer a token. The actual maximum token size - // may be smaller as the buffer may need to include, for instance, a newline. + // MaxScanTokenSize is the maximum size used to buffer a token. + // The actual maximum token size may be smaller as the buffer + // may need to include, for instance, a newline. MaxScanTokenSize = 64 * 1024 ) @@ -107,11 +109,15 @@ func (s *Scanner) Text() string { // After Scan returns false, the Err method will return any error that // occurred during scanning, except that if it was io.EOF, Err // will return nil. +// Split panics if the split function returns 100 empty tokens without +// advancing the input. This is a common error mode for scanners. func (s *Scanner) Scan() bool { // Loop until we have a token. for { // See if we can get a token with what we already have. - if s.end > s.start { + // If we've run out of data but have an error, give the split function + // a chance to recover any remaining, possibly empty token. + if s.end > s.start || s.err != nil { advance, token, err := s.split(s.buf[s.start:s.end], s.err != nil) if err != nil { s.setErr(err) @@ -122,6 +128,15 @@ func (s *Scanner) Scan() bool { } s.token = token if token != nil { + if s.err == nil || advance > 0 { + s.empties = 0 + } else { + // Returning tokens not advancing input at EOF. + s.empties++ + if s.empties > 100 { + panic("bufio.Scan: 100 empty tokens without progressing") + } + } return true } } @@ -169,6 +184,7 @@ func (s *Scanner) Scan() bool { break } if n > 0 { + s.empties = 0 break } loop++ @@ -326,9 +342,6 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) { break } } - if atEOF && len(data) == 0 { - return 0, nil, nil - } // Scan until space, marking end of word. for width, i := 0, start; i < len(data); i += width { var r rune @@ -342,5 +355,5 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) { return len(data), data[start:], nil } // Request more data. - return 0, nil, nil + return start, nil, nil } diff --git a/libgo/go/bufio/scan_test.go b/libgo/go/bufio/scan_test.go index 0db7cad2047..eea87cbf7b3 100644 --- a/libgo/go/bufio/scan_test.go +++ b/libgo/go/bufio/scan_test.go @@ -15,6 +15,8 @@ import ( "unicode/utf8" ) +const smallMaxTokenSize = 256 // Much smaller for more efficient testing. + // Test white space table matches the Unicode definition. func TestSpace(t *testing.T) { for r := rune(0); r <= utf8.MaxRune; r++ { @@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) { // Test the line splitter, including some carriage returns but no long lines. func TestScanLongLines(t *testing.T) { - const smallMaxTokenSize = 256 // Much smaller for more efficient testing. // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize. tmp := new(bytes.Buffer) buf := new(bytes.Buffer) @@ -404,3 +405,120 @@ func TestBadReader(t *testing.T) { t.Errorf("unexpected error: %v", err) } } + +func TestScanWordsExcessiveWhiteSpace(t *testing.T) { + const word = "ipsum" + s := strings.Repeat(" ", 4*smallMaxTokenSize) + word + scanner := NewScanner(strings.NewReader(s)) + scanner.MaxTokenSize(smallMaxTokenSize) + scanner.Split(ScanWords) + if !scanner.Scan() { + t.Fatalf("scan failed: %v", scanner.Err()) + } + if token := scanner.Text(); token != word { + t.Fatalf("unexpected token: %v", token) + } +} + +// Test that empty tokens, including at end of line or end of file, are found by the scanner. +// Issue 8672: Could miss final empty token. + +func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error) { + for i := 0; i < len(data); i++ { + if data[i] == ',' { + return i + 1, data[:i], nil + } + } + if !atEOF { + return 0, nil, nil + } + return 0, data, nil +} + +func TestEmptyTokens(t *testing.T) { + s := NewScanner(strings.NewReader("1,2,3,")) + values := []string{"1", "2", "3", ""} + s.Split(commaSplit) + var i int + for i = 0; i < len(values); i++ { + if !s.Scan() { + break + } + if s.Text() != values[i] { + t.Errorf("%d: expected %q got %q", i, values[i], s.Text()) + } + } + if i != len(values) { + t.Errorf("got %d fields, expected %d", i, len(values)) + } + if err := s.Err(); err != nil { + t.Fatal(err) + } +} + +func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) { + if len(data) > 0 { + return 1, data[:1], nil + } + return 0, data, nil +} + +func TestDontLoopForever(t *testing.T) { + s := NewScanner(strings.NewReader("abc")) + s.Split(loopAtEOFSplit) + // Expect a panic + defer func() { + err := recover() + if err == nil { + t.Fatal("should have panicked") + } + if msg, ok := err.(string); !ok || !strings.Contains(msg, "empty tokens") { + panic(err) + } + }() + for count := 0; s.Scan(); count++ { + if count > 1000 { + t.Fatal("looping") + } + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } +} + +func TestBlankLines(t *testing.T) { + s := NewScanner(strings.NewReader(strings.Repeat("\n", 1000))) + for count := 0; s.Scan(); count++ { + if count > 2000 { + t.Fatal("looping") + } + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } +} + +type countdown int + +func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) { + if *c > 0 { + *c-- + return 1, data[:1], nil + } + return 0, nil, nil +} + +// Check that the looping-at-EOF check doesn't trigger for merely empty tokens. +func TestEmptyLinesOK(t *testing.T) { + c := countdown(10000) + s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000))) + s.Split(c.split) + for s.Scan() { + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } + if c != 0 { + t.Fatalf("stopped with %d left to process", c) + } +} |