// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package packages

// See doc.go for package documentation and implementation notes.

import (
	"context"
	"fmt"
	"go/ast"
	"go/parser"
	"go/token"
	"go/types"
	"log"
	"os"
	"sync"

	"golang.org/x/tools/go/gcexportdata"
)

// An Options holds the options for a call to Metadata, TypeCheck
// or WholeProgram to load Go packages from source code.
type Options struct {
	// Fset is the file set for the parser
	// to use when loading the program.
	Fset *token.FileSet

	// Context may be used to cancel a pending call.
	// Context is optional; the default behavior
	// is equivalent to context.Background().
	Context context.Context

	// GOPATH is the effective value of the GOPATH environment variable.
	// If unset, the default is Getenv("GOPATH").
	//
	// TODO(adonovan): this is primarily needed for testing, but it
	// is not a build-system portable concept.
	// Replace with flags/cwd/environ pass-through.
	GOPATH string

	// DisableCgo disables cgo-processing of files that import "C",
	// and removes the 'cgo' build tag, which may affect source file selection.
	// By default, TypeCheck, and WholeProgram queries process such
	// files, and the resulting Package.Srcs describes the generated
	// files seen by the compiler.
	DisableCgo bool

	// TypeChecker contains options relating to the type checker,
	// such as the Sizes function.
	//
	// The following fields of TypeChecker are ignored:
	// - Import: the Loader provides the import machinery.
	// - Error: errors are reported to the Error function, below.
	TypeChecker types.Config

	// Error is called for each error encountered during package loading.
	// Implementations must be concurrency-safe.
	// If nil, the default implementation prints errors to os.Stderr.
	// Errors are additionally recorded in each Package.
	// Error is not used in Metadata mode.
	Error func(error)

	// ParseFile is called to read and parse each file,
	// Implementations must be concurrency-safe.
	// If nil, the default implementation uses parser.ParseFile.
	// A client may supply a custom implementation to,
	// for example, provide alternative contents for files
	// modified in a text editor but unsaved,
	// or to selectively eliminate unwanted function
	// bodies to reduce the load on the type-checker.
	// ParseFile is not used in Metadata mode.
	ParseFile func(fset *token.FileSet, filename string) (*ast.File, error)
}

// Metadata returns the metadata for a set of Go packages,
// but does not parse or type-check their source files.
// The returned packages are the roots of a directed acyclic graph,
// the "import graph", whose edges are represented by Package.Imports
// and whose transitive closure includes all dependencies of the
// initial packages.
//
// The packages are denoted by patterns, using the usual notation of the
// build system (currently "go build", but in future others such as
// Bazel). Clients should not attempt to infer the relationship between
// patterns and the packages they denote, as in general it is complex
// and many-to-many. Metadata reports an error if the patterns denote no
// packages.
//
// If Metadata was unable to expand the specified patterns to a set of
// packages, or if there was a cycle in the dependency graph, it returns
// an error. Otherwise it returns a set of loaded Packages, even if
// errors were encountered while loading some of them; such errors are
// recorded in each Package.
//
func Metadata(o *Options, patterns ...string) ([]*Package, error) {
	l := &loader{mode: metadata}
	if o != nil {
		l.Options = *o
	}
	return l.load(patterns...)
}

// TypeCheck returns metadata, syntax trees, and type information
// for a set of Go packages.
//
// In addition to the information returned by the Metadata function,
// TypeCheck loads, parses, and type-checks each of the requested packages.
// These packages are "source packages", and the resulting Package
// structure provides complete syntax and type information.
// Due to limitations of the type checker, any package that transitively
// depends on a source package must also be loaded from source.
//
// For each immediate dependency of a source package that is not itself
// a source package, type information is obtained from export data
// files produced by the Go compiler; this mode may entail a partial build.
// The Package for these dependencies provides complete package-level type
// information (types.Package), but no syntax trees.
//
// The remaining packages, comprising the indirect dependencies of the
// packages with complete export data, may have partial package-level type
// information or perhaps none at all.
//
// For example, consider the import graph A->B->C->D->E.
// If the requested packages are A and C,
// then packages A, B, C are source packages,
// D is a complete export data package,
// and E is a partial export data package.
// (B must be a source package because it
// transitively depends on C, a source package.)
//
// Each package bears a flag, IllTyped, indicating whether it
// or one of its transitive dependencies contains an error.
// A package that is not IllTyped is buildable.
//
// Use this mode for compiler-like tools
// that analyze one package at a time.
//
func TypeCheck(o *Options, patterns ...string) ([]*Package, error) {
	l := &loader{mode: typeCheck}
	if o != nil {
		l.Options = *o
	}
	return l.load(patterns...)
}

// WholeProgram returns metadata, complete syntax trees, and complete
// type information for a set of Go packages and their entire transitive
// closure of dependencies.
// Every package in the returned import graph is a source package,
// as defined by the documentation for TypeCheck
//
// Use this mode for whole-program analysis tools.
//
func WholeProgram(o *Options, patterns ...string) ([]*Package, error) {
	l := &loader{mode: wholeProgram}
	if o != nil {
		l.Options = *o
	}
	return l.load(patterns...)
}

// Package holds the metadata, and optionally syntax trees
// and type information, for a single Go package.
//
// The import graph, Imports, forms a directed acyclic graph over Packages.
// (Cycle-forming edges are not inserted into the map.)
//
// A Package is not mutated once returned.
type Package struct {
	// ID is a unique, opaque identifier for a package,
	// as determined by the underlying workspace.
	//
	// IDs distinguish packages that have the same PkgPath, such as
	// a regular package and the variant of that package built
	// during testing. (IDs also distinguish packages that would be
	// lumped together by the go/build API, such as a regular
	// package and its external tests.)
	//
	// Clients should not interpret the ID string as its
	// structure varies from one build system to another.
	ID string

	// PkgPath is the path of the package as understood
	// by the Go compiler and by reflect.Type.PkgPath.
	//
	// PkgPaths are unique for each package in a given executable
	// program, but are not necessarily unique within a workspace.
	// For example, an importable package (fmt) and its in-package
	// tests (fmt·test) may have the same PkgPath, but those
	// two packages are never linked together.
	PkgPath string

	// Name is the identifier appearing in the package declaration
	// at the start of each source file in this package.
	// The name of an executable is "main".
	Name string

	// IsTest indicates whether this package is a test.
	IsTest bool

	// Srcs is the list of names of this package's Go
	// source files as presented to the compiler.
	// Names aren't guaranteed to be absolute,
	// but they are openable.
	//
	// In Metadata queries, or if DisableCgo is set,
	// Srcs includes the unmodified source files even
	// if they use cgo (import "C").
	// In all other queries, Srcs contains the files
	// resulting from cgo processing.
	Srcs []string

	// OtherSrcs is the list of names of non-Go source files that the package
	// contains. This includes assembly and C source files. The names are
	// "openable" in the same sense as are Srcs above.
	OtherSrcs []string

	// Imports maps each import path to its package
	// The keys are import paths as they appear in the source files.
	Imports map[string]*Package

	// syntax and type information (only in TypeCheck and WholeProgram modes)
	Fset     *token.FileSet // source position information
	Files    []*ast.File    // syntax trees for the package's Srcs files
	Errors   []error        // non-nil if the package had errors
	Type     *types.Package // type information about the package
	Info     *types.Info    // type-checker deductions
	IllTyped bool           // this package or a dependency has a parse or type error

	// ---- temporary state ----

	// export holds the path to the export data file
	// for this package, if mode == TypeCheck.
	// The export data file contains the package's type information
	// in a compiler-specific format; see
	// golang.org/x/tools/go/{gc,gccgo}exportdata.
	// May be the empty string if the build failed.
	export string

	indirect      bool              // package is a dependency, not explicitly requested
	imports       map[string]string // nominal form of Imports graph
	importErrors  map[string]error  // maps each bad import to its error
	loadOnce      sync.Once
	color         uint8 // for cycle detection
	mark, needsrc bool  // used in TypeCheck mode only
}

func (lpkg *Package) String() string { return lpkg.ID }

// loader holds the working state of a single call to load.
type loader struct {
	mode mode
	cgo  bool
	Options
	exportMu sync.Mutex // enforces mutual exclusion of exportdata operations
}

// The mode determines which packages are visited
// and the level of information reported about each one.
// Modes are ordered by increasing detail.
type mode uint8

const (
	metadata = iota
	typeCheck
	wholeProgram
)

func (ld *loader) load(patterns ...string) ([]*Package, error) {
	if ld.Context == nil {
		ld.Context = context.Background()
	}

	if ld.mode > metadata {
		if ld.Fset == nil {
			ld.Fset = token.NewFileSet()
		}

		ld.cgo = !ld.DisableCgo

		if ld.Error == nil {
			ld.Error = func(e error) {
				fmt.Fprintln(os.Stderr, e)
			}
		}

		if ld.ParseFile == nil {
			ld.ParseFile = func(fset *token.FileSet, filename string) (*ast.File, error) {
				const mode = parser.AllErrors | parser.ParseComments
				return parser.ParseFile(fset, filename, nil, mode)
			}
		}
	}

	if ld.GOPATH == "" {
		ld.GOPATH = os.Getenv("GOPATH")
	}

	// Do the metadata query and partial build.
	// TODO(adonovan): support alternative build systems at this seam.
	list, err := golistPackages(ld.Context, ld.GOPATH, ld.cgo, ld.mode == typeCheck, patterns)
	if err != nil {
		return nil, err
	}
	pkgs := make(map[string]*Package)
	var initial []*Package
	for _, pkg := range list {
		pkgs[pkg.ID] = pkg

		// Record the set of initial packages
		// corresponding to the patterns.
		if !pkg.indirect {
			initial = append(initial, pkg)

			if ld.mode == typeCheck {
				pkg.needsrc = true
			}
		}
	}
	if len(pkgs) == 0 {
		return nil, fmt.Errorf("no packages to load")
	}

	// Materialize the import graph.

	const (
		white = 0 // new
		grey  = 1 // in progress
		black = 2 // complete
	)

	// visit traverses the import graph, depth-first,
	// and materializes the graph as Packages.Imports.
	//
	// Valid imports are saved in the Packages.Import map.
	// Invalid imports (cycles and missing nodes) are saved in the importErrors map.
	// Thus, even in the presence of both kinds of errors, the Import graph remains a DAG.
	//
	// visit returns whether the package is initial or has a transitive
	// dependency on an initial package. These are the only packages
	// for which we load source code in typeCheck mode.
	var stack []*Package
	var visit func(lpkg *Package) bool
	visit = func(lpkg *Package) bool {
		switch lpkg.color {
		case black:
			return lpkg.needsrc
		case grey:
			panic("internal error: grey node")
		}
		lpkg.color = grey
		stack = append(stack, lpkg) // push

		imports := make(map[string]*Package)
		for importPath, id := range lpkg.imports {
			var importErr error
			imp := pkgs[id]
			if imp == nil {
				// (includes package "C" when DisableCgo)
				importErr = fmt.Errorf("missing package: %q", id)
			} else if imp.color == grey {
				importErr = fmt.Errorf("import cycle: %s", stack)
			}
			if importErr != nil {
				if lpkg.importErrors == nil {
					lpkg.importErrors = make(map[string]error)
				}
				lpkg.importErrors[importPath] = importErr
				continue
			}

			if visit(imp) {
				lpkg.needsrc = true
			}
			imports[importPath] = imp
		}
		lpkg.imports = nil // no longer needed
		lpkg.Imports = imports

		stack = stack[:len(stack)-1] // pop
		lpkg.color = black

		return lpkg.needsrc
	}

	// For each initial package, create its import DAG.
	for _, lpkg := range initial {
		visit(lpkg)
	}

	// Load some/all packages from source, starting at
	// the initial packages (roots of the import DAG).
	if ld.mode != metadata {
		var wg sync.WaitGroup
		for _, lpkg := range initial {
			wg.Add(1)
			go func(lpkg *Package) {
				ld.loadRecursive(lpkg)
				wg.Done()
			}(lpkg)
		}
		wg.Wait()
	}

	return initial, nil
}

// loadRecursive loads, parses, and type-checks the specified package and its
// dependencies, recursively, in parallel, in topological order.
// It is atomic and idempotent.
// Precondition: ld.mode != Metadata.
// In typeCheck mode, only needsrc packages are loaded.
func (ld *loader) loadRecursive(lpkg *Package) {
	lpkg.loadOnce.Do(func() {
		// Load the direct dependencies, in parallel.
		var wg sync.WaitGroup
		for _, imp := range lpkg.Imports {
			wg.Add(1)
			go func(imp *Package) {
				ld.loadRecursive(imp)
				wg.Done()
			}(imp)
		}
		wg.Wait()

		ld.loadPackage(lpkg)
	})
}

// loadPackage loads, parses, and type-checks the
// files of the specified package, if needed.
// It must be called only once per Package,
// after immediate dependencies are loaded.
// Precondition: ld.mode != Metadata.
func (ld *loader) loadPackage(lpkg *Package) {
	if lpkg.PkgPath == "unsafe" {
		// Fill in the blanks to avoid surprises.
		lpkg.Type = types.Unsafe
		lpkg.Fset = ld.Fset
		lpkg.Files = []*ast.File{}
		lpkg.Info = new(types.Info)
		return
	}

	if ld.mode == typeCheck && !lpkg.needsrc {
		return // not a source package
	}

	hardErrors := false
	appendError := func(err error) {
		if terr, ok := err.(types.Error); ok && terr.Soft {
			// Don't mark the package as bad.
		} else {
			hardErrors = true
		}
		ld.Error(err)
		lpkg.Errors = append(lpkg.Errors, err)
	}

	files, errs := ld.parseFiles(lpkg.Srcs)
	for _, err := range errs {
		appendError(err)
	}

	lpkg.Fset = ld.Fset
	lpkg.Files = files

	// Call NewPackage directly with explicit name.
	// This avoids skew between golist and go/types when the files'
	// package declarations are inconsistent.
	lpkg.Type = types.NewPackage(lpkg.PkgPath, lpkg.Name)

	lpkg.Info = &types.Info{
		Types:      make(map[ast.Expr]types.TypeAndValue),
		Defs:       make(map[*ast.Ident]types.Object),
		Uses:       make(map[*ast.Ident]types.Object),
		Implicits:  make(map[ast.Node]types.Object),
		Scopes:     make(map[ast.Node]*types.Scope),
		Selections: make(map[*ast.SelectorExpr]*types.Selection),
	}

	// Copy the prototype types.Config as it must vary across Packages.
	tc := ld.TypeChecker // copy
	if !ld.cgo {
		tc.FakeImportC = true
	}
	tc.Importer = importerFunc(func(path string) (*types.Package, error) {
		if path == "unsafe" {
			return types.Unsafe, nil
		}

		// The imports map is keyed by import path.
		imp := lpkg.Imports[path]
		if imp == nil {
			if err := lpkg.importErrors[path]; err != nil {
				return nil, err
			}
			// There was skew between the metadata and the
			// import declarations, likely due to an edit
			// race, or because the ParseFile feature was
			// used to supply alternative file contents.
			return nil, fmt.Errorf("no metadata for %s", path)
		}
		if imp.Type != nil && imp.Type.Complete() {
			return imp.Type, nil
		}
		if ld.mode == typeCheck && !imp.needsrc {
			return ld.loadFromExportData(imp)
		}
		log.Fatalf("internal error: nil Pkg importing %q from %q", path, lpkg)
		panic("unreachable")
	})
	tc.Error = appendError

	// type-check
	types.NewChecker(&tc, ld.Fset, lpkg.Type, lpkg.Info).Files(lpkg.Files)

	lpkg.importErrors = nil // no longer needed

	// If !Cgo, the type-checker uses FakeImportC mode, so
	// it doesn't invoke the importer for import "C",
	// nor report an error for the import,
	// or for any undefined C.f reference.
	// We must detect this explicitly and correctly
	// mark the package as IllTyped (by reporting an error).
	// TODO(adonovan): if these errors are annoying,
	// we could just set IllTyped quietly.
	if tc.FakeImportC {
	outer:
		for _, f := range lpkg.Files {
			for _, imp := range f.Imports {
				if imp.Path.Value == `"C"` {
					appendError(fmt.Errorf(`%s: import "C" ignored`,
						lpkg.Fset.Position(imp.Pos())))
					break outer
				}
			}
		}
	}

	// Record accumulated errors.
	for _, imp := range lpkg.Imports {
		if imp.IllTyped {
			hardErrors = true
			break
		}
	}

	lpkg.IllTyped = hardErrors
}

// An importFunc is an implementation of the single-method
// types.Importer interface based on a function value.
type importerFunc func(path string) (*types.Package, error)

func (f importerFunc) Import(path string) (*types.Package, error) { return f(path) }

// We use a counting semaphore to limit
// the number of parallel I/O calls per process.
var ioLimit = make(chan bool, 20)

// parseFiles reads and parses the Go source files and returns the ASTs
// of the ones that could be at least partially parsed, along with a
// list of I/O and parse errors encountered.
//
// Because files are scanned in parallel, the token.Pos
// positions of the resulting ast.Files are not ordered.
//
func (ld *loader) parseFiles(filenames []string) ([]*ast.File, []error) {
	var wg sync.WaitGroup
	n := len(filenames)
	parsed := make([]*ast.File, n)
	errors := make([]error, n)
	for i, file := range filenames {
		wg.Add(1)
		go func(i int, filename string) {
			ioLimit <- true // wait
			// ParseFile may return both an AST and an error.
			parsed[i], errors[i] = ld.ParseFile(ld.Fset, filename)
			<-ioLimit // signal
			wg.Done()
		}(i, file)
	}
	wg.Wait()

	// Eliminate nils, preserving order.
	var o int
	for _, f := range parsed {
		if f != nil {
			parsed[o] = f
			o++
		}
	}
	parsed = parsed[:o]

	o = 0
	for _, err := range errors {
		if err != nil {
			errors[o] = err
			o++
		}
	}
	errors = errors[:o]

	return parsed, errors
}

// loadFromExportData returns type information for the specified
// package, loading it from an export data file on the first request.
func (ld *loader) loadFromExportData(lpkg *Package) (*types.Package, error) {
	if lpkg.PkgPath == "" {
		log.Fatalf("internal error: Package %s has no PkgPath", lpkg)
	}

	// Because gcexportdata.Read has the potential to create or
	// modify the types.Package for each node in the transitive
	// closure of dependencies of lpkg, all exportdata operations
	// must be sequential. (Finer-grained locking would require
	// changes to the gcexportdata API.)
	//
	// The exportMu lock guards the Package.Pkg field and the
	// types.Package it points to, for each Package in the graph.
	//
	// Not all accesses to Package.Pkg need to be protected by exportMu:
	// graph ordering ensures that direct dependencies of source
	// packages are fully loaded before the importer reads their Pkg field.
	ld.exportMu.Lock()
	defer ld.exportMu.Unlock()

	if tpkg := lpkg.Type; tpkg != nil && tpkg.Complete() {
		return tpkg, nil // cache hit
	}

	lpkg.IllTyped = true // fail safe

	if lpkg.export == "" {
		// Errors while building export data will have been printed to stderr.
		return nil, fmt.Errorf("no export data file")
	}
	f, err := os.Open(lpkg.export)
	if err != nil {
		return nil, err
	}
	defer f.Close()

	// Read gc export data.
	//
	// We don't currently support gccgo export data because all
	// underlying workspaces use the gc toolchain. (Even build
	// systems that support gccgo don't use it for workspace
	// queries.)
	r, err := gcexportdata.NewReader(f)
	if err != nil {
		return nil, fmt.Errorf("reading %s: %v", lpkg.export, err)
	}

	// Build the view.
	//
	// The gcexportdata machinery has no concept of package ID.
	// It identifies packages by their PkgPath, which although not
	// globally unique is unique within the scope of one invocation
	// of the linker, type-checker, or gcexportdata.
	//
	// So, we must build a PkgPath-keyed view of the global
	// (conceptually ID-keyed) cache of packages and pass it to
	// gcexportdata, then copy back to the global cache any newly
	// created entries in the view map. The view must contain every
	// existing package that might possibly be mentioned by the
	// current package---its reflexive transitive closure.
	//
	// (Yes, reflexive: although loadRecursive processes source
	// packages in topological order, export data packages are
	// processed only lazily within Importer calls. In the graph
	// A->B->C, A->C where A is a source package and B and C are
	// export data packages, processing of the A->B and A->C import
	// edges may occur in either order, depending on the sequence
	// of imports within A. If B is processed first, and its export
	// data mentions C, an imcomplete package for C will be created
	// before processing of C.)
	// We could do export data processing in topological order using
	// loadRecursive, but there's no parallelism to be gained.
	//
	// TODO(adonovan): it would be more simpler and more efficient
	// if the export data machinery invoked a callback to
	// get-or-create a package instead of a map.
	//
	view := make(map[string]*types.Package) // view seen by gcexportdata
	seen := make(map[*Package]bool)         // all visited packages
	var copyback []*Package                 // candidates for copying back to global cache
	var visit func(p *Package)
	visit = func(p *Package) {
		if !seen[p] {
			seen[p] = true
			if p.Type != nil {
				view[p.PkgPath] = p.Type
			} else {
				copyback = append(copyback, p)
			}
			for _, p := range p.Imports {
				visit(p)
			}
		}
	}
	visit(lpkg)

	// Parse the export data.
	// (May create/modify packages in view.)
	tpkg, err := gcexportdata.Read(r, ld.Fset, view, lpkg.PkgPath)
	if err != nil {
		return nil, fmt.Errorf("reading %s: %v", lpkg.export, err)
	}

	// For each newly created types.Package in the view,
	// save it in the main graph.
	for _, p := range copyback {
		p.Type = view[p.PkgPath] // may still be nil
	}

	lpkg.Type = tpkg
	lpkg.IllTyped = false

	return tpkg, nil
}

// All returns a new map containing all the transitive dependencies of
// the specified initial packages, keyed by ID.
func All(initial []*Package) map[string]*Package {
	all := make(map[string]*Package)
	var visit func(p *Package)
	visit = func(p *Package) {
		if all[p.ID] == nil {
			all[p.ID] = p
			for _, imp := range p.Imports {
				visit(imp)
			}
		}
	}
	for _, p := range initial {
		visit(p)
	}
	return all
}