build_read.go

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file is a lightly modified copy go/build/read.go with unused parts
     6  // removed.
     7  
     8  package modindex
     9  
    10  import (
    11  	"bufio"
    12  	"bytes"
    13  	"errors"
    14  	"fmt"
    15  	"go/ast"
    16  	"go/build"
    17  	"go/parser"
    18  	"go/scanner"
    19  	"go/token"
    20  	"io"
    21  	"strconv"
    22  	"strings"
    23  	"unicode"
    24  	"unicode/utf8"
    25  )
    26  
    27  type importReader struct {
    28  	b    *bufio.Reader
    29  	buf  []byte
    30  	peek byte
    31  	err  error
    32  	eof  bool
    33  	nerr int
    34  	pos  token.Position
    35  }
    36  
    37  var bom = []byte{0xef, 0xbb, 0xbf}
    38  
    39  func newImportReader(name string, r io.Reader) *importReader {
    40  	b := bufio.NewReader(r)
    41  	// Remove leading UTF-8 BOM.
    42  	// Per https://golang.org/ref/spec#Source_code_representation:
    43  	// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
    44  	// if it is the first Unicode code point in the source text.
    45  	if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
    46  		b.Discard(3)
    47  	}
    48  	return &importReader{
    49  		b: b,
    50  		pos: token.Position{
    51  			Filename: name,
    52  			Line:     1,
    53  			Column:   1,
    54  		},
    55  	}
    56  }
    57  
    58  func isIdent(c byte) bool {
    59  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
    60  }
    61  
    62  var (
    63  	errSyntax = errors.New("syntax error")
    64  	errNUL    = errors.New("unexpected NUL in input")
    65  )
    66  
    67  // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
    68  func (r *importReader) syntaxError() {
    69  	if r.err == nil {
    70  		r.err = errSyntax
    71  	}
    72  }
    73  
    74  // readByte reads the next byte from the input, saves it in buf, and returns it.
    75  // If an error occurs, readByte records the error in r.err and returns 0.
    76  func (r *importReader) readByte() byte {
    77  	c, err := r.b.ReadByte()
    78  	if err == nil {
    79  		r.buf = append(r.buf, c)
    80  		if c == 0 {
    81  			err = errNUL
    82  		}
    83  	}
    84  	if err != nil {
    85  		if err == io.EOF {
    86  			r.eof = true
    87  		} else if r.err == nil {
    88  			r.err = err
    89  		}
    90  		c = 0
    91  	}
    92  	return c
    93  }
    94  
    95  // readRest reads the entire rest of the file into r.buf.
    96  func (r *importReader) readRest() {
    97  	for {
    98  		if len(r.buf) == cap(r.buf) {
    99  			// Grow the buffer
   100  			r.buf = append(r.buf, 0)[:len(r.buf)]
   101  		}
   102  		n, err := r.b.Read(r.buf[len(r.buf):cap(r.buf)])
   103  		r.buf = r.buf[:len(r.buf)+n]
   104  		if err != nil {
   105  			if err == io.EOF {
   106  				r.eof = true
   107  			} else if r.err == nil {
   108  				r.err = err
   109  			}
   110  			break
   111  		}
   112  	}
   113  }
   114  
   115  // peekByte returns the next byte from the input reader but does not advance beyond it.
   116  // If skipSpace is set, peekByte skips leading spaces and comments.
   117  func (r *importReader) peekByte(skipSpace bool) byte {
   118  	if r.err != nil {
   119  		if r.nerr++; r.nerr > 10000 {
   120  			panic("go/build: import reader looping")
   121  		}
   122  		return 0
   123  	}
   124  
   125  	// Use r.peek as first input byte.
   126  	// Don't just return r.peek here: it might have been left by peekByte(false)
   127  	// and this might be peekByte(true).
   128  	c := r.peek
   129  	if c == 0 {
   130  		c = r.readByte()
   131  	}
   132  	for r.err == nil && !r.eof {
   133  		if skipSpace {
   134  			// For the purposes of this reader, semicolons are never necessary to
   135  			// understand the input and are treated as spaces.
   136  			switch c {
   137  			case ' ', '\f', '\t', '\r', '\n', ';':
   138  				c = r.readByte()
   139  				continue
   140  
   141  			case '/':
   142  				c = r.readByte()
   143  				if c == '/' {
   144  					for c != '\n' && r.err == nil && !r.eof {
   145  						c = r.readByte()
   146  					}
   147  				} else if c == '*' {
   148  					var c1 byte
   149  					for (c != '*' || c1 != '/') && r.err == nil {
   150  						if r.eof {
   151  							r.syntaxError()
   152  						}
   153  						c, c1 = c1, r.readByte()
   154  					}
   155  				} else {
   156  					r.syntaxError()
   157  				}
   158  				c = r.readByte()
   159  				continue
   160  			}
   161  		}
   162  		break
   163  	}
   164  	r.peek = c
   165  	return r.peek
   166  }
   167  
   168  // nextByte is like peekByte but advances beyond the returned byte.
   169  func (r *importReader) nextByte(skipSpace bool) byte {
   170  	c := r.peekByte(skipSpace)
   171  	r.peek = 0
   172  	return c
   173  }
   174  
   175  // readKeyword reads the given keyword from the input.
   176  // If the keyword is not present, readKeyword records a syntax error.
   177  func (r *importReader) readKeyword(kw string) {
   178  	r.peekByte(true)
   179  	for i := 0; i < len(kw); i++ {
   180  		if r.nextByte(false) != kw[i] {
   181  			r.syntaxError()
   182  			return
   183  		}
   184  	}
   185  	if isIdent(r.peekByte(false)) {
   186  		r.syntaxError()
   187  	}
   188  }
   189  
   190  // readIdent reads an identifier from the input.
   191  // If an identifier is not present, readIdent records a syntax error.
   192  func (r *importReader) readIdent() {
   193  	c := r.peekByte(true)
   194  	if !isIdent(c) {
   195  		r.syntaxError()
   196  		return
   197  	}
   198  	for isIdent(r.peekByte(false)) {
   199  		r.peek = 0
   200  	}
   201  }
   202  
   203  // readString reads a quoted string literal from the input.
   204  // If an identifier is not present, readString records a syntax error.
   205  func (r *importReader) readString() {
   206  	switch r.nextByte(true) {
   207  	case '`':
   208  		for r.err == nil {
   209  			if r.nextByte(false) == '`' {
   210  				break
   211  			}
   212  			if r.eof {
   213  				r.syntaxError()
   214  			}
   215  		}
   216  	case '"':
   217  		for r.err == nil {
   218  			c := r.nextByte(false)
   219  			if c == '"' {
   220  				break
   221  			}
   222  			if r.eof || c == '\n' {
   223  				r.syntaxError()
   224  			}
   225  			if c == '\\' {
   226  				r.nextByte(false)
   227  			}
   228  		}
   229  	default:
   230  		r.syntaxError()
   231  	}
   232  }
   233  
   234  // readImport reads an import clause - optional identifier followed by quoted string -
   235  // from the input.
   236  func (r *importReader) readImport() {
   237  	c := r.peekByte(true)
   238  	if c == '.' {
   239  		r.peek = 0
   240  	} else if isIdent(c) {
   241  		r.readIdent()
   242  	}
   243  	r.readString()
   244  }
   245  
   246  // readComments is like io.ReadAll, except that it only reads the leading
   247  // block of comments in the file.
   248  func readComments(f io.Reader) ([]byte, error) {
   249  	r := newImportReader("", f)
   250  	r.peekByte(true)
   251  	if r.err == nil && !r.eof {
   252  		// Didn't reach EOF, so must have found a non-space byte. Remove it.
   253  		r.buf = r.buf[:len(r.buf)-1]
   254  	}
   255  	return r.buf, r.err
   256  }
   257  
   258  // readGoInfo expects a Go file as input and reads the file up to and including the import section.
   259  // It records what it learned in *info.
   260  // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
   261  // info.imports and info.embeds.
   262  //
   263  // It only returns an error if there are problems reading the file,
   264  // not for syntax errors in the file itself.
   265  func readGoInfo(f io.Reader, info *fileInfo) error {
   266  	r := newImportReader(info.name, f)
   267  
   268  	r.readKeyword("package")
   269  	r.readIdent()
   270  	for r.peekByte(true) == 'i' {
   271  		r.readKeyword("import")
   272  		if r.peekByte(true) == '(' {
   273  			r.nextByte(false)
   274  			for r.peekByte(true) != ')' && r.err == nil {
   275  				r.readImport()
   276  			}
   277  			r.nextByte(false)
   278  		} else {
   279  			r.readImport()
   280  		}
   281  	}
   282  
   283  	info.header = r.buf
   284  
   285  	// If we stopped successfully before EOF, we read a byte that told us we were done.
   286  	// Return all but that last byte, which would cause a syntax error if we let it through.
   287  	if r.err == nil && !r.eof {
   288  		info.header = r.buf[:len(r.buf)-1]
   289  	}
   290  
   291  	// If we stopped for a syntax error, consume the whole file so that
   292  	// we are sure we don't change the errors that go/parser returns.
   293  	if r.err == errSyntax {
   294  		r.err = nil
   295  		r.readRest()
   296  		info.header = r.buf
   297  	}
   298  	if r.err != nil {
   299  		return r.err
   300  	}
   301  
   302  	if info.fset == nil {
   303  		return nil
   304  	}
   305  
   306  	// Parse file header & record imports.
   307  	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
   308  	if info.parseErr != nil {
   309  		return nil
   310  	}
   311  
   312  	hasEmbed := false
   313  	for _, decl := range info.parsed.Decls {
   314  		d, ok := decl.(*ast.GenDecl)
   315  		if !ok {
   316  			continue
   317  		}
   318  		for _, dspec := range d.Specs {
   319  			spec, ok := dspec.(*ast.ImportSpec)
   320  			if !ok {
   321  				continue
   322  			}
   323  			quoted := spec.Path.Value
   324  			path, err := strconv.Unquote(quoted)
   325  			if err != nil {
   326  				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
   327  			}
   328  			if !isValidImport(path) {
   329  				// The parser used to return a parse error for invalid import paths, but
   330  				// no longer does, so check for and create the error here instead.
   331  				info.parseErr = scanner.Error{Pos: info.fset.Position(spec.Pos()), Msg: "invalid import path: " + path}
   332  				info.imports = nil
   333  				return nil
   334  			}
   335  			if path == "embed" {
   336  				hasEmbed = true
   337  			}
   338  
   339  			doc := spec.Doc
   340  			if doc == nil && len(d.Specs) == 1 {
   341  				doc = d.Doc
   342  			}
   343  			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
   344  		}
   345  	}
   346  
   347  	// Extract directives.
   348  	for _, group := range info.parsed.Comments {
   349  		if group.Pos() >= info.parsed.Package {
   350  			break
   351  		}
   352  		for _, c := range group.List {
   353  			if strings.HasPrefix(c.Text, "//go:") {
   354  				info.directives = append(info.directives, build.Directive{Text: c.Text, Pos: info.fset.Position(c.Slash)})
   355  			}
   356  		}
   357  	}
   358  
   359  	// If the file imports "embed",
   360  	// we have to look for //go:embed comments
   361  	// in the remainder of the file.
   362  	// The compiler will enforce the mapping of comments to
   363  	// declared variables. We just need to know the patterns.
   364  	// If there were //go:embed comments earlier in the file
   365  	// (near the package statement or imports), the compiler
   366  	// will reject them. They can be (and have already been) ignored.
   367  	if hasEmbed {
   368  		r.readRest()
   369  		fset := token.NewFileSet()
   370  		file := fset.AddFile(r.pos.Filename, -1, len(r.buf))
   371  		var sc scanner.Scanner
   372  		sc.Init(file, r.buf, nil, scanner.ScanComments)
   373  		for {
   374  			pos, tok, lit := sc.Scan()
   375  			if tok == token.EOF {
   376  				break
   377  			}
   378  			if tok == token.COMMENT && strings.HasPrefix(lit, "//go:embed") {
   379  				// Ignore badly-formed lines - the compiler will report them when it finds them,
   380  				// and we can pretend they are not there to help go list succeed with what it knows.
   381  				embs, err := parseGoEmbed(fset, pos, lit)
   382  				if err == nil {
   383  					info.embeds = append(info.embeds, embs...)
   384  				}
   385  			}
   386  		}
   387  	}
   388  
   389  	return nil
   390  }
   391  
   392  // isValidImport checks if the import is a valid import using the more strict
   393  // checks allowed by the implementation restriction in https://go.dev/ref/spec#Import_declarations.
   394  // It was ported from the function of the same name that was removed from the
   395  // parser in CL 424855, when the parser stopped doing these checks.
   396  func isValidImport(s string) bool {
   397  	const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD"
   398  	for _, r := range s {
   399  		if !unicode.IsGraphic(r) || unicode.IsSpace(r) || strings.ContainsRune(illegalChars, r) {
   400  			return false
   401  		}
   402  	}
   403  	return s != ""
   404  }
   405  
   406  // parseGoEmbed parses a "//go:embed" to extract the glob patterns.
   407  // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
   408  // This must match the behavior of cmd/compile/internal/noder.go.
   409  func parseGoEmbed(fset *token.FileSet, pos token.Pos, comment string) ([]fileEmbed, error) {
   410  	dir, ok := ast.ParseDirective(pos, comment)
   411  	if !ok || dir.Tool != "go" || dir.Name != "embed" {
   412  		return nil, nil
   413  	}
   414  	args, err := dir.ParseArgs()
   415  	if err != nil {
   416  		return nil, err
   417  	}
   418  	var list []fileEmbed
   419  	for _, arg := range args {
   420  		list = append(list, fileEmbed{arg.Arg, fset.Position(arg.Pos)})
   421  	}
   422  	return list, nil
   423  }
   424
View as plain text