summaryrefslogtreecommitdiff
path: root/libgo/go/golang.org/x/mod/modfile/read.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/golang.org/x/mod/modfile/read.go')
-rw-r--r--libgo/go/golang.org/x/mod/modfile/read.go319
1 files changed, 179 insertions, 140 deletions
diff --git a/libgo/go/golang.org/x/mod/modfile/read.go b/libgo/go/golang.org/x/mod/modfile/read.go
index 616d00efdb1..c1f2008ee4c 100644
--- a/libgo/go/golang.org/x/mod/modfile/read.go
+++ b/libgo/go/golang.org/x/mod/modfile/read.go
@@ -2,13 +2,11 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Module file parser.
-// This is a simplified copy of Google's buildifier parser.
-
package modfile
import (
"bytes"
+ "errors"
"fmt"
"os"
"strconv"
@@ -323,18 +321,17 @@ func (x *RParen) Span() (start, end Position) {
// An input represents a single input file being parsed.
type input struct {
// Lexing state.
- filename string // name of input file, for errors
- complete []byte // entire input
- remaining []byte // remaining input
- token []byte // token being scanned
- lastToken string // most recently returned token, for error messages
- pos Position // current input position
- comments []Comment // accumulated comments
- endRule int // position of end of current rule
+ filename string // name of input file, for errors
+ complete []byte // entire input
+ remaining []byte // remaining input
+ tokenStart []byte // token being scanned to end of input
+ token token // next token to be returned by lex, peek
+ pos Position // current input position
+ comments []Comment // accumulated comments
// Parser state.
- file *FileSyntax // returned top-level syntax tree
- parseError error // error encountered during parsing
+ file *FileSyntax // returned top-level syntax tree
+ parseErrors ErrorList // errors encountered during parsing
// Comment assignment state.
pre []Expr // all expressions, in preorder traversal
@@ -352,25 +349,32 @@ func newInput(filename string, data []byte) *input {
// parse parses the input file.
func parse(file string, data []byte) (f *FileSyntax, err error) {
- in := newInput(file, data)
// The parser panics for both routine errors like syntax errors
// and for programmer bugs like array index errors.
// Turn both into error returns. Catching bug panics is
// especially important when processing many files.
+ in := newInput(file, data)
defer func() {
- if e := recover(); e != nil {
- if e == in.parseError {
- err = in.parseError
- } else {
- err = fmt.Errorf("%s:%d:%d: internal error: %v", in.filename, in.pos.Line, in.pos.LineRune, e)
- }
+ if e := recover(); e != nil && e != &in.parseErrors {
+ in.parseErrors = append(in.parseErrors, Error{
+ Filename: in.filename,
+ Pos: in.pos,
+ Err: fmt.Errorf("internal error: %v", e),
+ })
+ }
+ if err == nil && len(in.parseErrors) > 0 {
+ err = in.parseErrors
}
}()
+ // Prime the lexer by reading in the first token. It will be available
+ // in the next peek() or lex() call.
+ in.readToken()
+
// Invoke the parser.
in.parseFile()
- if in.parseError != nil {
- return nil, in.parseError
+ if len(in.parseErrors) > 0 {
+ return nil, in.parseErrors
}
in.file.Name = in.filename
@@ -381,14 +385,14 @@ func parse(file string, data []byte) (f *FileSyntax, err error) {
}
// Error is called to report an error.
-// The reason s is often "syntax error".
// Error does not return: it panics.
func (in *input) Error(s string) {
- if s == "syntax error" && in.lastToken != "" {
- s += " near " + in.lastToken
- }
- in.parseError = fmt.Errorf("%s:%d:%d: %v", in.filename, in.pos.Line, in.pos.LineRune, s)
- panic(in.parseError)
+ in.parseErrors = append(in.parseErrors, Error{
+ Filename: in.filename,
+ Pos: in.pos,
+ Err: errors.New(s),
+ })
+ panic(&in.parseErrors)
}
// eof reports whether the input has reached end of file.
@@ -434,46 +438,68 @@ func (in *input) readRune() int {
return int(r)
}
-type symType struct {
+type token struct {
+ kind tokenKind
pos Position
endPos Position
text string
}
+type tokenKind int
+
+const (
+ _EOF tokenKind = -(iota + 1)
+ _EOLCOMMENT
+ _IDENT
+ _STRING
+ _COMMENT
+
+ // newlines and punctuation tokens are allowed as ASCII codes.
+)
+
+func (k tokenKind) isComment() bool {
+ return k == _COMMENT || k == _EOLCOMMENT
+}
+
+// isEOL returns whether a token terminates a line.
+func (k tokenKind) isEOL() bool {
+ return k == _EOF || k == _EOLCOMMENT || k == '\n'
+}
+
// startToken marks the beginning of the next input token.
-// It must be followed by a call to endToken, once the token has
+// It must be followed by a call to endToken, once the token's text has
// been consumed using readRune.
-func (in *input) startToken(sym *symType) {
- in.token = in.remaining
- sym.text = ""
- sym.pos = in.pos
+func (in *input) startToken() {
+ in.tokenStart = in.remaining
+ in.token.text = ""
+ in.token.pos = in.pos
}
// endToken marks the end of an input token.
-// It records the actual token string in sym.text if the caller
-// has not done that already.
-func (in *input) endToken(sym *symType) {
- if sym.text == "" {
- tok := string(in.token[:len(in.token)-len(in.remaining)])
- sym.text = tok
- in.lastToken = sym.text
- }
- sym.endPos = in.pos
+// It records the actual token string in tok.text.
+func (in *input) endToken(kind tokenKind) {
+ in.token.kind = kind
+ text := string(in.tokenStart[:len(in.tokenStart)-len(in.remaining)])
+ in.token.text = text
+ in.token.endPos = in.pos
+}
+
+// peek returns the kind of the the next token returned by lex.
+func (in *input) peek() tokenKind {
+ return in.token.kind
}
// lex is called from the parser to obtain the next input token.
-// It returns the token value (either a rune like '+' or a symbolic token _FOR)
-// and sets val to the data associated with the token.
-// For all our input tokens, the associated data is
-// val.Pos (the position where the token begins)
-// and val.Token (the input string corresponding to the token).
-func (in *input) lex(sym *symType) int {
+func (in *input) lex() token {
+ tok := in.token
+ in.readToken()
+ return tok
+}
+
+// readToken lexes the next token from the text and stores it in in.token.
+func (in *input) readToken() {
// Skip past spaces, stopping at non-space or EOF.
- countNL := 0 // number of newlines we've skipped past
for !in.eof() {
- // Skip over spaces. Count newlines so we can give the parser
- // information about where top-level blank lines are,
- // for top-level comment assignment.
c := in.peekRune()
if c == ' ' || c == '\t' || c == '\r' {
in.readRune()
@@ -482,7 +508,7 @@ func (in *input) lex(sym *symType) int {
// Comment runs to end of line.
if in.peekPrefix("//") {
- in.startToken(sym)
+ in.startToken()
// Is this comment the only thing on its line?
// Find the last \n before this // and see if it's all
@@ -495,30 +521,23 @@ func (in *input) lex(sym *symType) int {
// Consume comment.
for len(in.remaining) > 0 && in.readRune() != '\n' {
}
- in.endToken(sym)
-
- sym.text = strings.TrimRight(sym.text, "\n")
- in.lastToken = "comment"
// If we are at top level (not in a statement), hand the comment to
// the parser as a _COMMENT token. The grammar is written
// to handle top-level comments itself.
if !suffix {
- // Not in a statement. Tell parser about top-level comment.
- return _COMMENT
+ in.endToken(_COMMENT)
+ return
}
// Otherwise, save comment for later attachment to syntax tree.
- if countNL > 1 {
- in.comments = append(in.comments, Comment{sym.pos, "", false})
- }
- in.comments = append(in.comments, Comment{sym.pos, sym.text, suffix})
- countNL = 1
- return _EOL
+ in.endToken(_EOLCOMMENT)
+ in.comments = append(in.comments, Comment{in.token.pos, in.token.text, suffix})
+ return
}
if in.peekPrefix("/*") {
- in.Error(fmt.Sprintf("mod files must use // comments (not /* */ comments)"))
+ in.Error("mod files must use // comments (not /* */ comments)")
}
// Found non-space non-comment.
@@ -526,35 +545,27 @@ func (in *input) lex(sym *symType) int {
}
// Found the beginning of the next token.
- in.startToken(sym)
- defer in.endToken(sym)
+ in.startToken()
// End of file.
if in.eof() {
- in.lastToken = "EOF"
- return _EOF
+ in.endToken(_EOF)
+ return
}
// Punctuation tokens.
switch c := in.peekRune(); c {
- case '\n':
+ case '\n', '(', ')', '[', ']', '{', '}', ',':
in.readRune()
- return c
-
- case '(':
- in.readRune()
- return c
-
- case ')':
- in.readRune()
- return c
+ in.endToken(tokenKind(c))
+ return
case '"', '`': // quoted string
quote := c
in.readRune()
for {
if in.eof() {
- in.pos = sym.pos
+ in.pos = in.token.pos
in.Error("unexpected EOF in string")
}
if in.peekRune() == '\n' {
@@ -566,14 +577,14 @@ func (in *input) lex(sym *symType) int {
}
if c == '\\' && quote != '`' {
if in.eof() {
- in.pos = sym.pos
+ in.pos = in.token.pos
in.Error("unexpected EOF in string")
}
in.readRune()
}
}
- in.endToken(sym)
- return _STRING
+ in.endToken(_STRING)
+ return
}
// Checked all punctuation. Must be identifier token.
@@ -587,17 +598,23 @@ func (in *input) lex(sym *symType) int {
break
}
if in.peekPrefix("/*") {
- in.Error(fmt.Sprintf("mod files must use // comments (not /* */ comments)"))
+ in.Error("mod files must use // comments (not /* */ comments)")
}
in.readRune()
}
- return _IDENT
+ in.endToken(_IDENT)
}
// isIdent reports whether c is an identifier rune.
-// We treat nearly all runes as identifier runes.
+// We treat most printable runes as identifier runes, except for a handful of
+// ASCII punctuation characters.
func isIdent(c int) bool {
- return c != 0 && !unicode.IsSpace(rune(c))
+ switch r := rune(c); r {
+ case ' ', '(', ')', '[', ']', '{', '}', ',':
+ return false
+ default:
+ return !unicode.IsSpace(r) && unicode.IsPrint(r)
+ }
}
// Comment assignment.
@@ -668,7 +685,7 @@ func (in *input) assignComments() {
for _, x := range in.pre {
start, _ := x.Span()
if debug {
- fmt.Printf("pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
+ fmt.Fprintf(os.Stderr, "pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
}
xcom := x.Comment()
for len(line) > 0 && start.Byte >= line[0].Start.Byte {
@@ -695,7 +712,7 @@ func (in *input) assignComments() {
start, end := x.Span()
if debug {
- fmt.Printf("post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
+ fmt.Fprintf(os.Stderr, "post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
}
// Do not assign suffix comments to end of line block or whole file.
@@ -745,29 +762,29 @@ func reverseComments(list []Comment) {
func (in *input) parseFile() {
in.file = new(FileSyntax)
- var sym symType
var cb *CommentBlock
for {
- tok := in.lex(&sym)
- switch tok {
+ switch in.peek() {
case '\n':
+ in.lex()
if cb != nil {
in.file.Stmt = append(in.file.Stmt, cb)
cb = nil
}
case _COMMENT:
+ tok := in.lex()
if cb == nil {
- cb = &CommentBlock{Start: sym.pos}
+ cb = &CommentBlock{Start: tok.pos}
}
com := cb.Comment()
- com.Before = append(com.Before, Comment{Start: sym.pos, Token: sym.text})
+ com.Before = append(com.Before, Comment{Start: tok.pos, Token: tok.text})
case _EOF:
if cb != nil {
in.file.Stmt = append(in.file.Stmt, cb)
}
return
default:
- in.parseStmt(&sym)
+ in.parseStmt()
if cb != nil {
in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
cb = nil
@@ -776,60 +793,88 @@ func (in *input) parseFile() {
}
}
-func (in *input) parseStmt(sym *symType) {
- start := sym.pos
- end := sym.endPos
- token := []string{sym.text}
+func (in *input) parseStmt() {
+ tok := in.lex()
+ start := tok.pos
+ end := tok.endPos
+ tokens := []string{tok.text}
for {
- tok := in.lex(sym)
- switch tok {
- case '\n', _EOF, _EOL:
+ tok := in.lex()
+ switch {
+ case tok.kind.isEOL():
in.file.Stmt = append(in.file.Stmt, &Line{
Start: start,
- Token: token,
+ Token: tokens,
End: end,
})
return
- case '(':
- in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, token, sym))
- return
+
+ case tok.kind == '(':
+ if next := in.peek(); next.isEOL() {
+ // Start of block: no more tokens on this line.
+ in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, tokens, tok))
+ return
+ } else if next == ')' {
+ rparen := in.lex()
+ if in.peek().isEOL() {
+ // Empty block.
+ in.lex()
+ in.file.Stmt = append(in.file.Stmt, &LineBlock{
+ Start: start,
+ Token: tokens,
+ LParen: LParen{Pos: tok.pos},
+ RParen: RParen{Pos: rparen.pos},
+ })
+ return
+ }
+ // '( )' in the middle of the line, not a block.
+ tokens = append(tokens, tok.text, rparen.text)
+ } else {
+ // '(' in the middle of the line, not a block.
+ tokens = append(tokens, tok.text)
+ }
+
default:
- token = append(token, sym.text)
- end = sym.endPos
+ tokens = append(tokens, tok.text)
+ end = tok.endPos
}
}
}
-func (in *input) parseLineBlock(start Position, token []string, sym *symType) *LineBlock {
+func (in *input) parseLineBlock(start Position, token []string, lparen token) *LineBlock {
x := &LineBlock{
Start: start,
Token: token,
- LParen: LParen{Pos: sym.pos},
+ LParen: LParen{Pos: lparen.pos},
}
var comments []Comment
for {
- tok := in.lex(sym)
- switch tok {
- case _EOL:
- // ignore
+ switch in.peek() {
+ case _EOLCOMMENT:
+ // Suffix comment, will be attached later by assignComments.
+ in.lex()
case '\n':
+ // Blank line. Add an empty comment to preserve it.
+ in.lex()
if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
comments = append(comments, Comment{})
}
case _COMMENT:
- comments = append(comments, Comment{Start: sym.pos, Token: sym.text})
+ tok := in.lex()
+ comments = append(comments, Comment{Start: tok.pos, Token: tok.text})
case _EOF:
in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
case ')':
+ rparen := in.lex()
x.RParen.Before = comments
- x.RParen.Pos = sym.pos
- tok = in.lex(sym)
- if tok != '\n' && tok != _EOF && tok != _EOL {
+ x.RParen.Pos = rparen.pos
+ if !in.peek().isEOL() {
in.Error("syntax error (expected newline after closing paren)")
}
+ in.lex()
return x
default:
- l := in.parseLine(sym)
+ l := in.parseLine()
x.Line = append(x.Line, l)
l.Comment().Before = comments
comments = nil
@@ -837,35 +882,29 @@ func (in *input) parseLineBlock(start Position, token []string, sym *symType) *L
}
}
-func (in *input) parseLine(sym *symType) *Line {
- start := sym.pos
- end := sym.endPos
- token := []string{sym.text}
+func (in *input) parseLine() *Line {
+ tok := in.lex()
+ if tok.kind.isEOL() {
+ in.Error("internal parse error: parseLine at end of line")
+ }
+ start := tok.pos
+ end := tok.endPos
+ tokens := []string{tok.text}
for {
- tok := in.lex(sym)
- switch tok {
- case '\n', _EOF, _EOL:
+ tok := in.lex()
+ if tok.kind.isEOL() {
return &Line{
Start: start,
- Token: token,
+ Token: tokens,
End: end,
InBlock: true,
}
- default:
- token = append(token, sym.text)
- end = sym.endPos
}
+ tokens = append(tokens, tok.text)
+ end = tok.endPos
}
}
-const (
- _EOF = -(1 + iota)
- _EOL
- _IDENT
- _STRING
- _COMMENT
-)
-
var (
slashSlash = []byte("//")
moduleStr = []byte("module")