From 80182d45b5d2ff86da7b6587a2a09d8924dd0a95 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Sun, 18 Oct 2020 20:26:46 -0400 Subject: go/build: refactor per-file info & reader Make code cleaner and a bit more adaptable: instead of an ever-growing list of arguments and results for readImports, put everything in a fileInfo struct, and rename function to readGoInfo. (Not a goInfo struct because it gets used for non-Go source files as well, but that processing is much simpler.) The refactoring simplifies the embed work in the next CL, but this CL makes no semantic changes. For #41191. Change-Id: Id2de2a3b8d351adc1c919dcf79dfbe79fc3d5301 Reviewed-on: https://go-review.googlesource.com/c/go/+/243940 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Jay Conrod --- src/go/build/read.go | 81 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 20 deletions(-) (limited to 'src/go/build/read.go') diff --git a/src/go/build/read.go b/src/go/build/read.go index 29b8cdc786..7c81097c33 100644 --- a/src/go/build/read.go +++ b/src/go/build/read.go @@ -7,7 +7,11 @@ package build import ( "bufio" "errors" + "fmt" + "go/ast" + "go/parser" "io" + "strconv" "unicode/utf8" ) @@ -147,15 +151,11 @@ func (r *importReader) readIdent() { // readString reads a quoted string literal from the input. // If an identifier is not present, readString records a syntax error. -func (r *importReader) readString(save *[]string) { +func (r *importReader) readString() { switch r.nextByte(true) { case '`': - start := len(r.buf) - 1 for r.err == nil { if r.nextByte(false) == '`' { - if save != nil { - *save = append(*save, string(r.buf[start:])) - } break } if r.eof { @@ -163,13 +163,9 @@ func (r *importReader) readString(save *[]string) { } } case '"': - start := len(r.buf) - 1 for r.err == nil { c := r.nextByte(false) if c == '"' { - if save != nil { - *save = append(*save, string(r.buf[start:])) - } break } if r.eof || c == '\n' { @@ -186,17 +182,17 @@ func (r *importReader) readString(save *[]string) { // readImport reads an import clause - optional identifier followed by quoted string - // from the input. -func (r *importReader) readImport(imports *[]string) { +func (r *importReader) readImport() { c := r.peekByte(true) if c == '.' { r.peek = 0 } else if isIdent(c) { r.readIdent() } - r.readString(imports) + r.readString() } -// readComments is like ioutil.ReadAll, except that it only reads the leading +// readComments is like io.ReadAll, except that it only reads the leading // block of comments in the file. func readComments(f io.Reader) ([]byte, error) { r := &importReader{b: bufio.NewReader(f)} @@ -208,9 +204,14 @@ func readComments(f io.Reader) ([]byte, error) { return r.buf, r.err } -// readImports is like ioutil.ReadAll, except that it expects a Go file as input -// and stops reading the input once the imports have completed. -func readImports(f io.Reader, reportSyntaxError bool, imports *[]string) ([]byte, error) { +// readGoInfo expects a Go file as input and reads the file up to and including the import section. +// It records what it learned in *info. +// If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr, +// and info.imports. +// +// It only returns an error if there are problems reading the file, +// not for syntax errors in the file itself. +func readGoInfo(f io.Reader, info *fileInfo) error { r := &importReader{b: bufio.NewReader(f)} r.readKeyword("package") @@ -220,28 +221,68 @@ func readImports(f io.Reader, reportSyntaxError bool, imports *[]string) ([]byte if r.peekByte(true) == '(' { r.nextByte(false) for r.peekByte(true) != ')' && r.err == nil { - r.readImport(imports) + r.readImport() } r.nextByte(false) } else { - r.readImport(imports) + r.readImport() } } + info.header = r.buf + // If we stopped successfully before EOF, we read a byte that told us we were done. // Return all but that last byte, which would cause a syntax error if we let it through. if r.err == nil && !r.eof { - return r.buf[:len(r.buf)-1], nil + info.header = r.buf[:len(r.buf)-1] } // If we stopped for a syntax error, consume the whole file so that // we are sure we don't change the errors that go/parser returns. - if r.err == errSyntax && !reportSyntaxError { + if r.err == errSyntax { r.err = nil for r.err == nil && !r.eof { r.readByte() } + info.header = r.buf + } + if r.err != nil { + return r.err } - return r.buf, r.err + if info.fset == nil { + return nil + } + + // Parse file header & record imports. + info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments) + if info.parseErr != nil { + return nil + } + + for _, decl := range info.parsed.Decls { + d, ok := decl.(*ast.GenDecl) + if !ok { + continue + } + for _, dspec := range d.Specs { + spec, ok := dspec.(*ast.ImportSpec) + if !ok { + continue + } + quoted := spec.Path.Value + path, err := strconv.Unquote(quoted) + if err != nil { + return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted) + } + + doc := spec.Doc + if doc == nil && len(d.Specs) == 1 { + doc = d.Doc + } + info.imports = append(info.imports, fileImport{path, spec.Pos(), doc}) + } + } + + return nil } -- cgit v1.2.1 From 4c7a18d74ae4d574313c01e95de66be06e3a3e82 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 19 Oct 2020 07:46:09 -0400 Subject: go/build: recognize and report //go:embed lines The code in this CL does the work of looking for the "embed" import and then finding and recording the arguments to //go:embed lines in Go source files. The go command will use this information to prepare information about embedded files to pass to the compiler. The tests of the Package fields end up being end-to-end via the go command (added in the CL with the go command changes), like all the other Package fields. For #41191. Change-Id: I0c87b71ca809c0031603cc403c030d3088299e6b Reviewed-on: https://go-review.googlesource.com/c/go/+/243941 Trust: Russ Cox Trust: Jay Conrod Run-TryBot: Russ Cox Reviewed-by: Jay Conrod --- src/go/build/read.go | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 188 insertions(+), 1 deletion(-) (limited to 'src/go/build/read.go') diff --git a/src/go/build/read.go b/src/go/build/read.go index 7c81097c33..6806a51c24 100644 --- a/src/go/build/read.go +++ b/src/go/build/read.go @@ -12,6 +12,8 @@ import ( "go/parser" "io" "strconv" + "strings" + "unicode" "unicode/utf8" ) @@ -61,6 +63,29 @@ func (r *importReader) readByte() byte { return c } +// readByteNoBuf is like readByte but doesn't buffer the byte. +// It exhausts r.buf before reading from r.b. +func (r *importReader) readByteNoBuf() byte { + if len(r.buf) > 0 { + c := r.buf[0] + r.buf = r.buf[1:] + return c + } + c, err := r.b.ReadByte() + if err == nil && c == 0 { + err = errNUL + } + if err != nil { + if err == io.EOF { + r.eof = true + } else if r.err == nil { + r.err = err + } + c = 0 + } + return c +} + // peekByte returns the next byte from the input reader but does not advance beyond it. // If skipSpace is set, peekByte skips leading spaces and comments. func (r *importReader) peekByte(skipSpace bool) byte { @@ -121,6 +146,74 @@ func (r *importReader) nextByte(skipSpace bool) byte { return c } +var goEmbed = []byte("go:embed") + +// findEmbed advances the input reader to the next //go:embed comment. +// It reports whether it found a comment. +// (Otherwise it found an error or EOF.) +func (r *importReader) findEmbed(first bool) bool { + // The import block scan stopped after a non-space character, + // so the reader is not at the start of a line on the first call. + // After that, each //go:embed extraction leaves the reader + // at the end of a line. + startLine := !first + var c byte + for r.err == nil && !r.eof { + c = r.readByteNoBuf() + Reswitch: + switch c { + default: + startLine = false + + case '\n': + startLine = true + + case ' ', '\t': + // leave startLine alone + + case '/': + c = r.readByteNoBuf() + switch c { + default: + startLine = false + goto Reswitch + + case '*': + var c1 byte + for (c != '*' || c1 != '/') && r.err == nil { + if r.eof { + r.syntaxError() + } + c, c1 = c1, r.readByteNoBuf() + } + startLine = false + + case '/': + if startLine { + // Try to read this as a //go:embed comment. + for i := range goEmbed { + c = r.readByteNoBuf() + if c != goEmbed[i] { + goto SkipSlashSlash + } + } + c = r.readByteNoBuf() + if c == ' ' || c == '\t' { + // Found one! + return true + } + } + SkipSlashSlash: + for c != '\n' && r.err == nil && !r.eof { + c = r.readByteNoBuf() + } + startLine = true + } + } + } + return false +} + // readKeyword reads the given keyword from the input. // If the keyword is not present, readKeyword records a syntax error. func (r *importReader) readKeyword(kw string) { @@ -207,7 +300,7 @@ func readComments(f io.Reader) ([]byte, error) { // readGoInfo expects a Go file as input and reads the file up to and including the import section. // It records what it learned in *info. // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr, -// and info.imports. +// info.imports, info.embeds, and info.embedErr. // // It only returns an error if there are problems reading the file, // not for syntax errors in the file itself. @@ -260,6 +353,7 @@ func readGoInfo(f io.Reader, info *fileInfo) error { return nil } + hasEmbed := false for _, decl := range info.parsed.Decls { d, ok := decl.(*ast.GenDecl) if !ok { @@ -275,6 +369,9 @@ func readGoInfo(f io.Reader, info *fileInfo) error { if err != nil { return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted) } + if path == "embed" { + hasEmbed = true + } doc := spec.Doc if doc == nil && len(d.Specs) == 1 { @@ -284,5 +381,95 @@ func readGoInfo(f io.Reader, info *fileInfo) error { } } + // If the file imports "embed", + // we have to look for //go:embed comments + // in the remainder of the file. + // The compiler will enforce the mapping of comments to + // declared variables. We just need to know the patterns. + // If there were //go:embed comments earlier in the file + // (near the package statement or imports), the compiler + // will reject them. They can be (and have already been) ignored. + if hasEmbed { + var line []byte + for first := true; r.findEmbed(first); first = false { + line = line[:0] + for { + c := r.readByteNoBuf() + if c == '\n' || r.err != nil || r.eof { + break + } + line = append(line, c) + } + // Add args if line is well-formed. + // Ignore badly-formed lines - the compiler will report them when it finds them, + // and we can pretend they are not there to help go list succeed with what it knows. + args, err := parseGoEmbed(string(line)) + if err == nil { + info.embeds = append(info.embeds, args...) + } + } + } + return nil } + +// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns. +// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings. +// There is a copy of this code in cmd/compile/internal/gc/noder.go as well. +func parseGoEmbed(args string) ([]string, error) { + var list []string + for args = strings.TrimSpace(args); args != ""; args = strings.TrimSpace(args) { + var path string + Switch: + switch args[0] { + default: + i := len(args) + for j, c := range args { + if unicode.IsSpace(c) { + i = j + break + } + } + path = args[:i] + args = args[i:] + + case '`': + i := strings.Index(args[1:], "`") + if i < 0 { + return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) + } + path = args[1 : 1+i] + args = args[1+i+1:] + + case '"': + i := 1 + for ; i < len(args); i++ { + if args[i] == '\\' { + i++ + continue + } + if args[i] == '"' { + q, err := strconv.Unquote(args[:i+1]) + if err != nil { + return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1]) + } + path = q + args = args[i+1:] + break Switch + } + } + if i >= len(args) { + return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) + } + } + + if args != "" { + r, _ := utf8.DecodeRuneInString(args) + if !unicode.IsSpace(r) { + return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) + } + } + list = append(list, path) + } + return list, nil +} -- cgit v1.2.1