summaryrefslogtreecommitdiff
path: root/libgo/go/bufio
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/bufio')
-rw-r--r--libgo/go/bufio/bufio.go32
-rw-r--r--libgo/go/bufio/bufio_test.go19
-rw-r--r--libgo/go/bufio/scan.go27
-rw-r--r--libgo/go/bufio/scan_test.go120
4 files changed, 169 insertions, 29 deletions
diff --git a/libgo/go/bufio/bufio.go b/libgo/go/bufio/bufio.go
index 61ef2619100..d3c68fe6fe5 100644
--- a/libgo/go/bufio/bufio.go
+++ b/libgo/go/bufio/bufio.go
@@ -30,8 +30,8 @@ var (
// Reader implements buffering for an io.Reader object.
type Reader struct {
buf []byte
- rd io.Reader
- r, w int
+ rd io.Reader // reader provided by the client
+ r, w int // buf read and write positions
err error
lastByte int
lastRuneSize int
@@ -131,18 +131,17 @@ func (b *Reader) Peek(n int) ([]byte, error) {
for b.w-b.r < n && b.err == nil {
b.fill() // b.w-b.r < len(b.buf) => buffer is not full
}
- m := b.w - b.r
- if m > n {
- m = n
- }
+
var err error
- if m < n {
+ if avail := b.w - b.r; avail < n {
+ // not enough data in buffer
+ n = avail
err = b.readErr()
if err == nil {
err = ErrBufferFull
}
}
- return b.buf[b.r : b.r+m], err
+ return b.buf[b.r : b.r+n], err
}
// Read reads data into p.
@@ -173,15 +172,13 @@ func (b *Reader) Read(p []byte) (n int, err error) {
return n, b.readErr()
}
b.fill() // buffer is empty
- if b.w == b.r {
+ if b.r == b.w {
return 0, b.readErr()
}
}
- if n > b.w-b.r {
- n = b.w - b.r
- }
- copy(p[0:n], b.buf[b.r:])
+ // copy as much as we can
+ n = copy(p, b.buf[b.r:b.w])
b.r += n
b.lastByte = int(b.buf[b.r-1])
b.lastRuneSize = -1
@@ -288,7 +285,7 @@ func (b *Reader) ReadSlice(delim byte) (line []byte, err error) {
}
// Buffer full?
- if n := b.Buffered(); n >= len(b.buf) {
+ if b.Buffered() >= len(b.buf) {
b.r = b.w
line = b.buf
err = ErrBufferFull
@@ -301,6 +298,7 @@ func (b *Reader) ReadSlice(delim byte) (line []byte, err error) {
// Handle last byte, if any.
if i := len(line) - 1; i >= 0 {
b.lastByte = int(line[i])
+ b.lastRuneSize = -1
}
return
@@ -458,11 +456,13 @@ func (b *Reader) WriteTo(w io.Writer) (n int64, err error) {
return n, b.readErr()
}
+var errNegativeWrite = errors.New("bufio: writer returned negative count from Write")
+
// writeBuf writes the Reader's buffer to the writer.
func (b *Reader) writeBuf(w io.Writer) (int64, error) {
n, err := w.Write(b.buf[b.r:b.w])
- if n < b.r-b.w {
- panic(errors.New("bufio: writer did not write all data"))
+ if n < 0 {
+ panic(errNegativeWrite)
}
b.r += n
return int64(n), err
diff --git a/libgo/go/bufio/bufio_test.go b/libgo/go/bufio/bufio_test.go
index 76d3c8eade8..550dac9173f 100644
--- a/libgo/go/bufio/bufio_test.go
+++ b/libgo/go/bufio/bufio_test.go
@@ -31,9 +31,6 @@ func newRot13Reader(r io.Reader) *rot13Reader {
func (r13 *rot13Reader) Read(p []byte) (int, error) {
n, err := r13.r.Read(p)
- if err != nil {
- return n, err
- }
for i := 0; i < n; i++ {
c := p[i] | 0x20 // lowercase byte
if 'a' <= c && c <= 'm' {
@@ -42,7 +39,7 @@ func (r13 *rot13Reader) Read(p []byte) (int, error) {
p[i] -= 13
}
}
- return n, nil
+ return n, err
}
// Call ReadByte to accumulate the text of a file
@@ -438,7 +435,7 @@ func TestUnreadRuneError(t *testing.T) {
if err != nil {
t.Error("unexpected error on ReadRune (2):", err)
}
- for _ = range buf {
+ for range buf {
_, err = r.ReadByte()
if err != nil {
t.Error("unexpected error on ReadByte (2):", err)
@@ -463,6 +460,18 @@ func TestUnreadRuneError(t *testing.T) {
if r.UnreadRune() == nil {
t.Error("expected error after UnreadByte (3)")
}
+ // Test error after ReadSlice.
+ _, _, err = r.ReadRune() // reset state
+ if err != nil {
+ t.Error("unexpected error on ReadRune (4):", err)
+ }
+ _, err = r.ReadSlice(0)
+ if err != io.EOF {
+ t.Error("unexpected error on ReadSlice (4):", err)
+ }
+ if r.UnreadRune() == nil {
+ t.Error("expected error after ReadSlice (4)")
+ }
}
func TestUnreadRuneAtEOF(t *testing.T) {
diff --git a/libgo/go/bufio/scan.go b/libgo/go/bufio/scan.go
index 715ce071e3b..364d1596139 100644
--- a/libgo/go/bufio/scan.go
+++ b/libgo/go/bufio/scan.go
@@ -36,6 +36,7 @@ type Scanner struct {
start int // First non-processed byte in buf.
end int // End of data in buf.
err error // Sticky error.
+ empties int // Count of successive empty tokens.
}
// SplitFunc is the signature of the split function used to tokenize the
@@ -64,8 +65,9 @@ var (
)
const (
- // Maximum size used to buffer a token. The actual maximum token size
- // may be smaller as the buffer may need to include, for instance, a newline.
+ // MaxScanTokenSize is the maximum size used to buffer a token.
+ // The actual maximum token size may be smaller as the buffer
+ // may need to include, for instance, a newline.
MaxScanTokenSize = 64 * 1024
)
@@ -107,11 +109,15 @@ func (s *Scanner) Text() string {
// After Scan returns false, the Err method will return any error that
// occurred during scanning, except that if it was io.EOF, Err
// will return nil.
+// Split panics if the split function returns 100 empty tokens without
+// advancing the input. This is a common error mode for scanners.
func (s *Scanner) Scan() bool {
// Loop until we have a token.
for {
// See if we can get a token with what we already have.
- if s.end > s.start {
+ // If we've run out of data but have an error, give the split function
+ // a chance to recover any remaining, possibly empty token.
+ if s.end > s.start || s.err != nil {
advance, token, err := s.split(s.buf[s.start:s.end], s.err != nil)
if err != nil {
s.setErr(err)
@@ -122,6 +128,15 @@ func (s *Scanner) Scan() bool {
}
s.token = token
if token != nil {
+ if s.err == nil || advance > 0 {
+ s.empties = 0
+ } else {
+ // Returning tokens not advancing input at EOF.
+ s.empties++
+ if s.empties > 100 {
+ panic("bufio.Scan: 100 empty tokens without progressing")
+ }
+ }
return true
}
}
@@ -169,6 +184,7 @@ func (s *Scanner) Scan() bool {
break
}
if n > 0 {
+ s.empties = 0
break
}
loop++
@@ -326,9 +342,6 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
break
}
}
- if atEOF && len(data) == 0 {
- return 0, nil, nil
- }
// Scan until space, marking end of word.
for width, i := 0, start; i < len(data); i += width {
var r rune
@@ -342,5 +355,5 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
return len(data), data[start:], nil
}
// Request more data.
- return 0, nil, nil
+ return start, nil, nil
}
diff --git a/libgo/go/bufio/scan_test.go b/libgo/go/bufio/scan_test.go
index 0db7cad2047..eea87cbf7b3 100644
--- a/libgo/go/bufio/scan_test.go
+++ b/libgo/go/bufio/scan_test.go
@@ -15,6 +15,8 @@ import (
"unicode/utf8"
)
+const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
+
// Test white space table matches the Unicode definition.
func TestSpace(t *testing.T) {
for r := rune(0); r <= utf8.MaxRune; r++ {
@@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
// Test the line splitter, including some carriage returns but no long lines.
func TestScanLongLines(t *testing.T) {
- const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
tmp := new(bytes.Buffer)
buf := new(bytes.Buffer)
@@ -404,3 +405,120 @@ func TestBadReader(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
}
+
+func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
+ const word = "ipsum"
+ s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
+ scanner := NewScanner(strings.NewReader(s))
+ scanner.MaxTokenSize(smallMaxTokenSize)
+ scanner.Split(ScanWords)
+ if !scanner.Scan() {
+ t.Fatalf("scan failed: %v", scanner.Err())
+ }
+ if token := scanner.Text(); token != word {
+ t.Fatalf("unexpected token: %v", token)
+ }
+}
+
+// Test that empty tokens, including at end of line or end of file, are found by the scanner.
+// Issue 8672: Could miss final empty token.
+
+func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
+ for i := 0; i < len(data); i++ {
+ if data[i] == ',' {
+ return i + 1, data[:i], nil
+ }
+ }
+ if !atEOF {
+ return 0, nil, nil
+ }
+ return 0, data, nil
+}
+
+func TestEmptyTokens(t *testing.T) {
+ s := NewScanner(strings.NewReader("1,2,3,"))
+ values := []string{"1", "2", "3", ""}
+ s.Split(commaSplit)
+ var i int
+ for i = 0; i < len(values); i++ {
+ if !s.Scan() {
+ break
+ }
+ if s.Text() != values[i] {
+ t.Errorf("%d: expected %q got %q", i, values[i], s.Text())
+ }
+ }
+ if i != len(values) {
+ t.Errorf("got %d fields, expected %d", i, len(values))
+ }
+ if err := s.Err(); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
+ if len(data) > 0 {
+ return 1, data[:1], nil
+ }
+ return 0, data, nil
+}
+
+func TestDontLoopForever(t *testing.T) {
+ s := NewScanner(strings.NewReader("abc"))
+ s.Split(loopAtEOFSplit)
+ // Expect a panic
+ defer func() {
+ err := recover()
+ if err == nil {
+ t.Fatal("should have panicked")
+ }
+ if msg, ok := err.(string); !ok || !strings.Contains(msg, "empty tokens") {
+ panic(err)
+ }
+ }()
+ for count := 0; s.Scan(); count++ {
+ if count > 1000 {
+ t.Fatal("looping")
+ }
+ }
+ if s.Err() != nil {
+ t.Fatal("after scan:", s.Err())
+ }
+}
+
+func TestBlankLines(t *testing.T) {
+ s := NewScanner(strings.NewReader(strings.Repeat("\n", 1000)))
+ for count := 0; s.Scan(); count++ {
+ if count > 2000 {
+ t.Fatal("looping")
+ }
+ }
+ if s.Err() != nil {
+ t.Fatal("after scan:", s.Err())
+ }
+}
+
+type countdown int
+
+func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) {
+ if *c > 0 {
+ *c--
+ return 1, data[:1], nil
+ }
+ return 0, nil, nil
+}
+
+// Check that the looping-at-EOF check doesn't trigger for merely empty tokens.
+func TestEmptyLinesOK(t *testing.T) {
+ c := countdown(10000)
+ s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000)))
+ s.Split(c.split)
+ for s.Scan() {
+ }
+ if s.Err() != nil {
+ t.Fatal("after scan:", s.Err())
+ }
+ if c != 0 {
+ t.Fatalf("stopped with %d left to process", c)
+ }
+}