Merge pull request #499 from sauyon/extractor-profiling

Extract diagnostic information
2026-01-31 07:12:57 +01:00 · 2021-03-19 05:36:30 -07:00
parent 104f58151c 92c5999c4d
commit d73d0f3b79
20 changed files with 6418 additions and 1270 deletions
--- a/extractor/dbscheme/dbscheme.go
+++ b/extractor/dbscheme/dbscheme.go
@@ -272,6 +272,11 @@ func IntColumn(columnName string) Column {
 	return Column{columnName, INT, false, true}
 }

+// FloatColumn constructs a column with name `columnName` holding floating point number values
+func FloatColumn(columnName string) Column {
+	return Column{columnName, FLOAT, false, true}
+}
+
 // A Table represents a database table
 type Table struct {
 	name    string
--- a/extractor/dbscheme/tables.go
+++ b/extractor/dbscheme/tables.go
@@ -120,6 +120,130 @@ xmllocations(
@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace;
 `)

+// Compiler diagnostic tables
+var CompilationType = NewPrimaryKeyType("@compilation")
+
+/**
+ * An invocation of the compiler. Note that more than one file may be
+ * compiled per invocation. For example, this command compiles three
+ * source files:
+ *
+ *   go build a.go b.go c.go
+ *
+ * The `id` simply identifies the invocation, while `cwd` is the working
+ * directory from which the compiler was invoked.
+ */
+var CompilationsTable = NewTable("compilations",
+	EntityColumn(CompilationType, "id").Key(),
+	StringColumn("cwd"),
+)
+
+/**
+ * The arguments that were passed to the extractor for a compiler
+ * invocation. If `id` is for the compiler invocation
+ *
+ *   go build a.go b.go c.go
+ *
+ * then typically there will be rows for
+ *
+ * num | arg
+ * --- | ---
+ * 0   | *path to extractor*
+ * 1   | `--`
+ * 2   | a.go
+ * 3   | b.go
+ * 4   | c.go
+ */
+var CompilationArgsTable = NewTable("compilation_args",
+	EntityColumn(CompilationType, "id"),
+	IntColumn("num"),
+	StringColumn("arg"),
+).KeySet("id", "num")
+
+/**
+ * The source files that are compiled by a compiler invocation.
+ * If `id` is for the compiler invocation
+ *
+ *   go build a.go b.go c.go
+ *
+ * then there will be rows for
+ *
+ * num | arg
+ * --- | ---
+ * 0   | a.go
+ * 1   | b.go
+ * 2   | c.go
+ */
+var CompilationCompilingFilesTable = NewTable("compilation_compiling_files",
+	EntityColumn(CompilationType, "id"),
+	IntColumn("num"),
+	EntityColumn(FileType, "file"),
+).KeySet("id", "num")
+
+type CompilationTypeKind int
+
+const (
+	FRONTEND_CPU_SECONDS = iota
+	FRONTEND_ELAPSED_SECONDS
+	EXTRACTOR_CPU_SECONDS
+	EXTRACTOR_ELAPSED_SECONDS
+)
+
+/**
+ * The time taken by the extractor for a compiler invocation.
+ *
+ * For each file `num`, there will be rows for
+ *
+ * kind | seconds
+ * ---- | ---
+ * 1    | CPU seconds used by the extractor frontend
+ * 2    | Elapsed seconds during the extractor frontend
+ * 3    | CPU seconds used by the extractor backend
+ * 4    | Elapsed seconds during the extractor backend
+ */
+var CompilationTimeTable = NewTable("compilation_time",
+	EntityColumn(CompilationType, "id"),
+	IntColumn("num"),
+	IntColumn("kind"),
+	FloatColumn("secs"),
+).KeySet("id", "num", "kind")
+
+var DiagnosticType = NewPrimaryKeyType("@diagnostic")
+
+/**
+ * An error or warning generated by the extractor.
+ * The diagnostic message `diagnostic` was generated during compiler
+ * invocation `compilation`, and is the `file_number_diagnostic_number`th
+ * message generated while extracting the `file_number`th file of that
+ * invocation.
+ */
+var DiagnosticForTable = NewTable("diagnostic_for",
+	EntityColumn(DiagnosticType, "diagnostic").Unique(),
+	EntityColumn(CompilationType, "compilation"),
+	IntColumn("file_number"),
+	IntColumn("file_number_diagnostic_number"),
+)
+
+/**
+ * If extraction was successful, then `cpu_seconds` and
+ * `elapsed_seconds` are the CPU time and elapsed time (respectively)
+ * that extraction took for compiler invocation `id`.
+ */
+var CompilationFinishedTable = NewTable("compilation_finished",
+	EntityColumn(CompilationType, "id").Unique(),
+	FloatColumn("cpu_seconds"),
+	FloatColumn("elapsed_seconds"),
+)
+
+var DiagnosticsTable = NewTable("diagnostics",
+	EntityColumn(DiagnosticType, "id").Key(),
+	IntColumn("severity"),
+	StringColumn("error_tag"),
+	StringColumn("error_message"),
+	StringColumn("full_error_message"),
+	EntityColumn(LocationType, "location"),
+)
+
 // ContainerType is the type of files and folders
 var ContainerType = NewUnionType("@container")

@@ -742,6 +866,14 @@ var ErrorTypes = map[packages.ErrorKind]*BranchType{
 	packages.TypeError:    ErrorKind.NewBranch("@typeerror"),
 }

+// ErrorTypes is a map from error kinds to the corresponding tag
+var ErrorTags = map[packages.ErrorKind]string{
+	packages.UnknownError: "@unknownerror",
+	packages.ListError:    "@listerror",
+	packages.ParseError:   "@parseerror",
+	packages.TypeError:    "@typeerror",
+}
+
 // LocationsDefaultTable is the table defining location objects
 var LocationsDefaultTable = NewTable("locations_default",
 	EntityColumn(LocationDefaultType, "id").Key(),
--- a/extractor/extractor.go
+++ b/extractor/extractor.go
@@ -1,12 +1,15 @@
 package extractor

 import (
+	"crypto/md5"
+	"encoding/hex"
 	"fmt"
 	"go/ast"
 	"go/constant"
 	"go/scanner"
 	"go/token"
 	"go/types"
+	"io"
 	"io/ioutil"
 	"log"
 	"os"
@@ -25,6 +28,33 @@ import (
 	"golang.org/x/tools/go/packages"
 )

+var MaxGoRoutines int
+
+func init() {
+	// this sets the number of threads that the Go runtime will spawn; this is separate
+	// from the number of goroutines that the program spawns, which are scheduled into
+	// the system threads by the Go runtime scheduler
+	threads := os.Getenv("LGTM_THREADS")
+	if maxprocs, err := strconv.Atoi(threads); err == nil && maxprocs > 0 {
+		log.Printf("Max threads set to %d", maxprocs)
+		runtime.GOMAXPROCS(maxprocs)
+	} else if threads != "" {
+		log.Printf("Warning: LGTM_THREADS value %s is not valid, defaulting to using all available threads.", threads)
+	}
+	// if the value is empty or not set, use the Go default, which is the number of cores
+	// available since Go 1.5, but is subject to change
+
+	var err error
+	if MaxGoRoutines, err = strconv.Atoi(util.Getenv(
+		"CODEQL_EXTRACTOR_GO_MAX_GOROUTINES",
+		"SEMMLE_MAX_GOROUTINES",
+	)); err != nil {
+		MaxGoRoutines = 32
+	} else {
+		log.Printf("Max goroutines set to %d", MaxGoRoutines)
+	}
+}
+
 // Extract extracts the packages specified by the given patterns
 func Extract(patterns []string) error {
 	return ExtractWithFlags(nil, patterns)
@@ -32,6 +62,11 @@ func Extract(patterns []string) error {

 // ExtractWithFlags extracts the packages specified by the given patterns and build flags
 func ExtractWithFlags(buildFlags []string, patterns []string) error {
+	startTime := time.Now()
+
+	extraction := NewExtraction(buildFlags, patterns)
+	defer extraction.StatWriter.Close()
+
 	modEnabled := os.Getenv("GO111MODULE") != "off"
 	if !modEnabled {
 		log.Println("Go module mode disabled.")
@@ -111,7 +146,7 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {
 			log.Printf("Warning: encountered errors extracting package `%s`:", pkg.PkgPath)
 			for i, err := range pkg.Errors {
 				log.Printf("  %s", err.Error())
-				extractError(tw, err, lbl, i)
+				extraction.extractError(tw, err, lbl, i)
 			}
 		}
 		log.Printf("Done extracting types for package %s.", pkg.PkgPath)
@@ -129,38 +164,6 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {

 	log.Println("Starting to extract packages.")

-	// this sets the number of threads that the Go runtime will spawn; this is separate
-	// from the number of goroutines that the program spawns, which are scheduled into
-	// the system threads by the Go runtime scheduler
-	threads := os.Getenv("LGTM_THREADS")
-	if maxprocs, err := strconv.Atoi(threads); err == nil && maxprocs > 0 {
-		log.Printf("Max threads set to %d", maxprocs)
-		runtime.GOMAXPROCS(maxprocs)
-	} else if threads != "" {
-		log.Printf("Warning: LGTM_THREADS value %s is not valid, defaulting to using all available threads.", threads)
-	}
-	// if the value is empty or not set, use the Go default, which is the number of cores
-	// available since Go 1.5, but is subject to change
-
-	var maxgoroutines int
-	if maxgoroutines, err = strconv.Atoi(util.Getenv(
-		"CODEQL_EXTRACTOR_GO_MAX_GOROUTINES",
-		"SEMMLE_MAX_GOROUTINES",
-	)); err != nil {
-		maxgoroutines = 32
-	} else {
-		log.Printf("Max goroutines set to %d", maxgoroutines)
-	}
-
-	var wg sync.WaitGroup
-	// this semaphore is used to limit the number of files that are open at once;
-	// this is to prevent the extractor from running into issues with caps on the
-	// number of open files that can be held by one process
-	fdSem := newSemaphore(100)
-	// this semaphore is used to limit the number of goroutines spawned, so we
-	// don't run into memory issues
-	goroutineSem := newSemaphore(maxgoroutines)
-
 	sep := regexp.QuoteMeta(string(filepath.Separator))
 	// if a path matches this regexp, we don't want to extract this package. Currently, it checks
 	//   - that the path does not contain a `..` segment, and
@@ -178,7 +181,7 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {
 				continue
 			}

-			extractPackage(pkg, &wg, goroutineSem, fdSem)
+			extraction.extractPackage(pkg)

 			if pkgRoots[pkg.PkgPath] != "" {
 				modPath := filepath.Join(pkgRoots[pkg.PkgPath], "go.mod")
@@ -186,7 +189,7 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {
 					log.Printf("Extracting %s", modPath)
 					start := time.Now()

-					err := extractGoMod(modPath)
+					err := extraction.extractGoMod(modPath)
 					if err != nil {
 						log.Printf("Failed to extract go.mod: %s", err.Error())
 					}
@@ -202,13 +205,133 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {
 		log.Printf("Skipping dependency package %s.", pkg.PkgPath)
 	})

-	wg.Wait()
+	extraction.WaitGroup.Wait()

 	log.Println("Done extracting packages.")

+	t := time.Now()
+	elapsed := t.Sub(startTime)
+	dbscheme.CompilationFinishedTable.Emit(extraction.StatWriter, extraction.Label, 0.0, elapsed.Seconds())
+
 	return nil
 }

+type Extraction struct {
+	// A lock for preventing concurrent writes to maps and the stat trap writer, as they are not
+	// thread-safe
+	Lock         sync.Mutex
+	LabelKey     string
+	Label        trap.Label
+	StatWriter   *trap.Writer
+	WaitGroup    sync.WaitGroup
+	GoroutineSem *semaphore
+	FdSem        *semaphore
+	NextFileId   int
+	FileInfo     map[string]*FileInfo
+	SeenGoMods   map[string]bool
+}
+
+type FileInfo struct {
+	Idx     int
+	NextErr int
+}
+
+func (extraction *Extraction) GetFileInfo(path string) *FileInfo {
+	if fileInfo, ok := extraction.FileInfo[path]; ok {
+		return fileInfo
+	}
+
+	extraction.FileInfo[path] = &FileInfo{extraction.NextFileId, 0}
+	extraction.NextFileId += 1
+
+	return extraction.FileInfo[path]
+}
+
+func (extraction *Extraction) GetFileIdx(path string) int {
+	return extraction.GetFileInfo(path).Idx
+}
+
+func (extraction *Extraction) GetNextErr(path string) int {
+	finfo := extraction.GetFileInfo(path)
+	res := finfo.NextErr
+	finfo.NextErr += 1
+	return res
+}
+
+func NewExtraction(buildFlags []string, patterns []string) *Extraction {
+	hash := md5.New()
+	io.WriteString(hash, "go")
+	for _, buildFlag := range buildFlags {
+		io.WriteString(hash, " "+buildFlag)
+	}
+	io.WriteString(hash, " --")
+	for _, pattern := range patterns {
+		io.WriteString(hash, " "+pattern)
+	}
+	sum := hash.Sum(nil)
+
+	i := 0
+	var path string
+	// split compilation files into directories to avoid filling a single directory with too many files
+	pathFmt := fmt.Sprintf("compilations/%s/%s_%%d", hex.EncodeToString(sum[:1]), hex.EncodeToString(sum[1:]))
+	for {
+		path = fmt.Sprintf(pathFmt, i)
+		file, err := trap.FileFor(path)
+		if err != nil {
+			log.Fatalf("Error creating trap file: %s\n", err.Error())
+		}
+		i++
+
+		if !util.FileExists(file) {
+			break
+		}
+	}
+
+	statWriter, err := trap.NewWriter(path, nil)
+	if err != nil {
+		log.Fatal(err)
+	}
+	lblKey := fmt.Sprintf("%s_%d;compilation", hex.EncodeToString(sum), i)
+	lbl := statWriter.Labeler.GlobalID(lblKey)
+
+	wd, err := os.Getwd()
+	if err != nil {
+		log.Fatalf("Unable to determine current directory: %s\n", err.Error())
+	}
+
+	dbscheme.CompilationsTable.Emit(statWriter, lbl, wd)
+	i = 0
+	dbscheme.CompilationArgsTable.Emit(statWriter, lbl, 0, util.GetExtractorPath())
+	i++
+	for _, flag := range buildFlags {
+		dbscheme.CompilationArgsTable.Emit(statWriter, lbl, i, flag)
+		i++
+	}
+	// emit a fake "--" argument to make it clear that what comes after it are patterns
+	dbscheme.CompilationArgsTable.Emit(statWriter, lbl, i, "--")
+	i++
+	for _, pattern := range patterns {
+		dbscheme.CompilationArgsTable.Emit(statWriter, lbl, i, pattern)
+		i++
+	}
+
+	return &Extraction{
+		LabelKey:   lblKey,
+		Label:      lbl,
+		StatWriter: statWriter,
+		// this semaphore is used to limit the number of files that are open at once;
+		// this is to prevent the extractor from running into issues with caps on the
+		// number of open files that can be held by one process
+		FdSem: newSemaphore(100),
+		// this semaphore is used to limit the number of goroutines spawned, so we
+		// don't run into memory issues
+		GoroutineSem: newSemaphore(MaxGoRoutines),
+		NextFileId:   0,
+		FileInfo:     make(map[string]*FileInfo),
+		SeenGoMods:   make(map[string]bool),
+	}
+}
+
 // extractUniverseScope extracts symbol table information for the universe scope
 func extractUniverseScope() {
 	tw, err := trap.NewWriter("universe", nil)
@@ -315,9 +438,10 @@ var (
 )

 // extractError extracts the message and location of a frontend error
-func extractError(tw *trap.Writer, err packages.Error, pkglbl trap.Label, idx int) {
+func (extraction *Extraction) extractError(tw *trap.Writer, err packages.Error, pkglbl trap.Label, idx int) {
 	var (
 		lbl  = tw.Labeler.FreshID()
+		tag  = dbscheme.ErrorTags[err.Kind]
 		kind = dbscheme.ErrorTypes[err.Kind].Index()
 		pos  = err.Pos
 		file = ""
@@ -347,23 +471,42 @@ func extractError(tw *trap.Writer, err packages.Error, pkglbl trap.Label, idx in
 	} else if pos != "" && pos != "-" {
 		log.Printf("Warning: malformed error position `%s`", pos)
 	}
-	file = filepath.ToSlash(srcarchive.TransformPath(file))
-	dbscheme.ErrorsTable.Emit(tw, lbl, kind, err.Msg, pos, file, line, col, pkglbl, idx)
+	afile, e := filepath.Abs(file)
+	if e != nil {
+		log.Printf("Warning: failed to get absolute path for for %s", file)
+		afile = file
+	}
+	ffile, e := filepath.EvalSymlinks(afile)
+	if e != nil {
+		log.Printf("Warning: failed to evaluate symlinks for %s", afile)
+		ffile = afile
+	}
+	transformed := filepath.ToSlash(srcarchive.TransformPath(ffile))
+
+	extraction.Lock.Lock()
+	diagLbl := extraction.StatWriter.Labeler.FreshID()
+	dbscheme.DiagnosticsTable.Emit(extraction.StatWriter, diagLbl, 1, tag, err.Msg, err.Msg,
+		emitLocation(
+			extraction.StatWriter, extraction.StatWriter.Labeler.GlobalID(ffile+";sourcefile"),
+			line, col, line, col,
+		))
+	dbscheme.DiagnosticForTable.Emit(extraction.StatWriter, diagLbl, extraction.Label, extraction.GetFileIdx(transformed), extraction.GetNextErr(transformed))
+	extraction.Lock.Unlock()
+	dbscheme.ErrorsTable.Emit(tw, lbl, kind, err.Msg, pos, transformed, line, col, pkglbl, idx)
 }

 // extractPackage extracts AST information for all files in the given package
-func extractPackage(pkg *packages.Package, wg *sync.WaitGroup,
-	goroutineSem *semaphore, fdSem *semaphore) {
+func (extraction *Extraction) extractPackage(pkg *packages.Package) {
 	for _, astFile := range pkg.Syntax {
-		wg.Add(1)
-		goroutineSem.acquire(1)
+		extraction.WaitGroup.Add(1)
+		extraction.GoroutineSem.acquire(1)
 		go func(astFile *ast.File) {
-			err := extractFile(astFile, pkg, fdSem)
+			err := extraction.extractFile(astFile, pkg)
 			if err != nil {
 				log.Fatal(err)
 			}
-			goroutineSem.release(1)
-			wg.Done()
+			extraction.GoroutineSem.release(1)
+			extraction.WaitGroup.Done()
 		}(astFile)
 	}
 }
@@ -379,7 +522,7 @@ func normalizedPath(ast *ast.File, fset *token.FileSet) string {
 }

 // extractFile extracts AST information for the given file
-func extractFile(ast *ast.File, pkg *packages.Package, fdSem *semaphore) error {
+func (extraction *Extraction) extractFile(ast *ast.File, pkg *packages.Package) error {
 	fset := pkg.Fset
 	if ast.Package == token.NoPos {
 		log.Printf("Skipping extracting a file without a 'package' declaration")
@@ -387,26 +530,26 @@ func extractFile(ast *ast.File, pkg *packages.Package, fdSem *semaphore) error {
 	}
 	path := normalizedPath(ast, fset)

-	fdSem.acquire(3)
+	extraction.FdSem.acquire(3)

 	log.Printf("Extracting %s", path)
 	start := time.Now()

-	defer fdSem.release(1)
+	defer extraction.FdSem.release(1)
 	tw, err := trap.NewWriter(path, pkg)
 	if err != nil {
-		fdSem.release(2)
+		extraction.FdSem.release(2)
 		return err
 	}
 	defer tw.Close()

 	err = srcarchive.Add(path)
-	fdSem.release(2)
+	extraction.FdSem.release(2)
 	if err != nil {
 		return err
 	}

-	extractFileInfo(tw, path)
+	extraction.extractFileInfo(tw, path)

 	extractScopes(tw, ast, pkg)

@@ -433,7 +576,7 @@ func stemAndExt(base string) (string, string) {

 // extractFileInfo extracts file-system level information for the given file, populating
 // the `files` and `containerparent` tables
-func extractFileInfo(tw *trap.Writer, file string) {
+func (extraction *Extraction) extractFileInfo(tw *trap.Writer, file string) {
 	path := filepath.ToSlash(srcarchive.TransformPath(file))
 	components := strings.Split(path, "/")
 	parentPath := ""
@@ -454,6 +597,9 @@ func extractFileInfo(tw *trap.Writer, file string) {
 			dbscheme.FilesTable.Emit(tw, lbl, path, stem, ext, 0)
 			dbscheme.ContainerParentTable.Emit(tw, parentLbl, lbl)
 			extractLocation(tw, lbl, 0, 0, 0, 0)
+			extraction.Lock.Lock()
+			dbscheme.CompilationCompilingFilesTable.Emit(extraction.StatWriter, extraction.Label, extraction.GetFileIdx(path), extraction.StatWriter.Labeler.FileLabelFor(tw))
+			extraction.Lock.Unlock()
 			break
 		}
 		lbl := tw.Labeler.GlobalID(path + ";folder")
@@ -470,10 +616,16 @@ func extractFileInfo(tw *trap.Writer, file string) {

 // extractLocation emits a location entity for the given entity
 func extractLocation(tw *trap.Writer, entity trap.Label, sl int, sc int, el int, ec int) {
-	lbl := tw.Labeler.FileLabel()
-	locLbl := tw.Labeler.GlobalID(fmt.Sprintf("loc,{%s},%d,%d,%d,%d", lbl.String(), sl, sc, el, ec))
-	dbscheme.LocationsDefaultTable.Emit(tw, locLbl, lbl, sl, sc, el, ec)
-	dbscheme.HasLocationTable.Emit(tw, entity, locLbl)
+	filelbl := tw.Labeler.FileLabel()
+	dbscheme.HasLocationTable.Emit(tw, entity, emitLocation(tw, filelbl, sl, sc, el, ec))
+}
+
+// emitLocation emits a location entity
+func emitLocation(tw *trap.Writer, filelbl trap.Label, sl int, sc int, el int, ec int) trap.Label {
+	locLbl := tw.Labeler.GlobalID(fmt.Sprintf("loc,{%s},%d,%d,%d,%d", filelbl, sl, sc, el, ec))
+	dbscheme.LocationsDefaultTable.Emit(tw, locLbl, filelbl, sl, sc, el, ec)
+
+	return locLbl
 }

 // extractNodeLocation extracts location information for the given node
--- a/extractor/gomodextractor.go
+++ b/extractor/gomodextractor.go
@@ -14,11 +14,20 @@ import (
 	"github.com/github/codeql-go/extractor/trap"
 )

-func extractGoMod(path string) error {
+func (extraction *Extraction) extractGoMod(path string) error {
 	if normPath, err := filepath.EvalSymlinks(path); err == nil {
 		path = normPath
 	}

+	extraction.Lock.Lock()
+	if extraction.SeenGoMods[path] {
+		extraction.Lock.Unlock()
+		return nil
+	}
+
+	extraction.SeenGoMods[path] = true
+	extraction.Lock.Unlock()
+
 	tw, err := trap.NewWriter(path, nil)
 	if err != nil {
 		return err
@@ -30,7 +39,7 @@ func extractGoMod(path string) error {
 		return err
 	}

-	extractFileInfo(tw, path)
+	extraction.extractFileInfo(tw, path)

 	file, err := os.Open(path)
 	if err != nil {
--- a/extractor/trap/labels.go
+++ b/extractor/trap/labels.go
@@ -70,6 +70,11 @@ func (l *Labeler) FileLabel() Label {
 	return l.fileLabel
 }

+// FileLabelFor returns the label for the file for which the trap writer `tw` is associated
+func (l *Labeler) FileLabelFor(tw *Writer) Label {
+	return l.GlobalID(tw.path + ";sourcefile")
+}
+
 // LocalID associates a label with the given AST node `nd` and returns it
 func (l *Labeler) LocalID(nd interface{}) Label {
 	label, exists := l.nodeLabels[nd]
--- a/extractor/trap/trapwriter.go
+++ b/extractor/trap/trapwriter.go
@@ -26,14 +26,22 @@ type Writer struct {
 	Package      *packages.Package
 }

+func FileFor(path string) (string, error) {
+	trapFolder, err := trapFolder()
+	if err != nil {
+		return "", err
+	}
+
+	return filepath.Join(trapFolder, srcarchive.AppendablePath(path)+".trap.gz"), nil
+}
+
 // NewWriter creates a TRAP file for the given path and returns a writer for
 // writing to it
 func NewWriter(path string, pkg *packages.Package) (*Writer, error) {
-	trapFolder, err := trapFolder()
+	trapFilePath, err := FileFor(path)
 	if err != nil {
 		return nil, err
 	}
-	trapFilePath := filepath.Join(trapFolder, srcarchive.AppendablePath(path)+".trap.gz")
 	trapFileDir := filepath.Dir(trapFilePath)
 	err = os.MkdirAll(trapFileDir, 0755)
 	if err != nil {
@@ -133,6 +141,8 @@ func (tw *Writer) Emit(table string, values []interface{}) error {
 			fmt.Fprintf(tw.zip, "\"%s\"", escapeString(capStringLength(value)))
 		case int:
 			fmt.Fprintf(tw.zip, "%d", value)
+		case float64:
+			fmt.Fprintf(tw.zip, "%e", value)
 		default:
 			return errors.New("Cannot emit value")
 		}
--- a/extractor/util/util.go
+++ b/extractor/util/util.go
@@ -8,6 +8,8 @@ import (
 	"strings"
 )

+var extractorPath string
+
 // Getenv retrieves the value of the environment variable named by the key.
 // If that variable is not present, it iterates over the given aliases until
 // it finds one that is. If none are present, the empty string is returned.
@@ -138,3 +140,21 @@ func RunCmd(cmd *exec.Cmd) bool {

 	return true
 }
+
+func GetExtractorPath() string {
+	if extractorPath != "" {
+		return extractorPath
+	}
+
+	root, set := os.LookupEnv("CODEQL_EXTRACTOR_GO_ROOT")
+	if !set {
+		log.Fatal("CODEQL_EXTRACTOR_GO_ROOT not set; this binary should be run from the `codeql` CLI.")
+	}
+	platform, set := os.LookupEnv("CODEQL_PLATFORM")
+	if !set {
+		log.Fatal("CODEQL_PLATFORM not set; this binary should be run from the `codeql` CLI.")
+	}
+
+	extractorPath = filepath.Join(root, "tools", platform, "go-extractor")
+	return extractorPath
+}
--- a/ql/src/go.dbscheme
+++ b/ql/src/go.dbscheme
@@ -110,6 +110,24 @@ xmllocations(

@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace;

+compilations(unique int id: @compilation, string cwd: string ref);
+
+#keyset[id, num]
+compilation_args(int id: @compilation ref, int num: int ref, string arg: string ref);
+
+#keyset[id, num, kind]
+compilation_time(int id: @compilation ref, int num: int ref, int kind: int ref, float secs: float ref);
+
+diagnostic_for(unique int diagnostic: @diagnostic ref, int compilation: @compilation ref, int file_number: int ref, int file_number_diagnostic_number: int ref);
+
+compilation_finished(unique int id: @compilation ref, float cpu_seconds: float ref, float elapsed_seconds: float ref);
+
+#keyset[id, num]
+compilation_compiling_files(int id: @compilation ref, int num: int ref, int file: @file ref);
+
+diagnostics(unique int id: @diagnostic, int severity: int ref, string error_tag: string ref, string error_message: string ref,
+            string full_error_message: string ref, int location: @location ref);
+
 locations_default(unique int id: @location_default, int file: @file ref, int beginLine: int ref, int beginColumn: int ref,
                  int endLine: int ref, int endColumn: int ref);

--- a/ql/src/go.dbscheme.stats
+++ b/ql/src/go.dbscheme.stats
--- a/ql/test/extractor-tests/diagnostics/CONSISTENCY/UnexpectedFrontendErrors.expected
+++ b/ql/test/extractor-tests/diagnostics/CONSISTENCY/UnexpectedFrontendErrors.expected
@@ -0,0 +1,5 @@
+| broken2/test1.go:4:2:4:2 | undeclared name: fmt |
+| broken2/test1.go:5:2:5:2 | undeclared name: fmt |
+| broken2/test1.go:5:14:5:14 | undeclared name: a |
+| broken2/test.go:3:1:3:1 | expected 'package', found pac |
+| broken/test.go:7:1:7:1 | expected declaration, found This |
--- a/ql/test/extractor-tests/diagnostics/Diagnostics.expected
+++ b/ql/test/extractor-tests/diagnostics/Diagnostics.expected
@@ -0,0 +1,12 @@
+qcompilations
+| compilation in 'diagnostics': go-extractor -mod=vendor -- ./... | broken2/test1.go:0:0:0:0 | broken2/test1.go |
+| compilation in 'diagnostics': go-extractor -mod=vendor -- ./... | broken2/test2.go:0:0:0:0 | broken2/test2.go |
+| compilation in 'diagnostics': go-extractor -mod=vendor -- ./... | broken/test.go:0:0:0:0 | broken/test.go |
+| compilation in 'diagnostics': go-extractor -mod=vendor -- ./... | go.mod:0:0:0:0 | go.mod |
+| compilation in 'diagnostics': go-extractor -mod=vendor -- ./... | notbroken/test.go:0:0:0:0 | notbroken/test.go |
+qdiagnostics
+| broken2/test1.go:4:2:4:2 | error: undeclared name: fmt | compilation in 'diagnostics': go-extractor -mod=vendor -- ./... | broken2/test1.go:0:0:0:0 | broken2/test1.go |
+| broken2/test1.go:5:2:5:2 | error: undeclared name: fmt | compilation in 'diagnostics': go-extractor -mod=vendor -- ./... | broken2/test1.go:0:0:0:0 | broken2/test1.go |
+| broken2/test1.go:5:14:5:14 | error: undeclared name: a | compilation in 'diagnostics': go-extractor -mod=vendor -- ./... | broken2/test1.go:0:0:0:0 | broken2/test1.go |
+| broken/test.go:7:1:7:1 | error: expected declaration, found This | compilation in 'diagnostics': go-extractor -mod=vendor -- ./... | broken/test.go:0:0:0:0 | broken/test.go |
+duplicateerrs
--- a/ql/test/extractor-tests/diagnostics/Diagnostics.ql
+++ b/ql/test/extractor-tests/diagnostics/Diagnostics.ql
@@ -0,0 +1,56 @@
+import go
+
+bindingset[path]
+string baseName(string path) { result = path.regexpCapture(".*(/|\\\\)([^/\\\\]+)(/|\\\\)?$", 2) }
+
+class Compilation extends @compilation {
+  string getArg(int i) { compilation_args(this, i, result) }
+
+  string getCwd() { compilations(this, result) }
+
+  int getNumArgs() { result = count(int i | exists(this.getArg(i))) }
+
+  predicate compilesFile(int i, File f) { compilation_compiling_files(this, i, f) }
+
+  private string getArgsTo(int i) {
+    // use baseName for location-independent tests
+    i = 0 and result = baseName(this.getArg(0))
+    or
+    result = this.getArgsTo(i - 1) + " " + this.getArg(i)
+  }
+
+  string toString() {
+    result =
+      "compilation in '" + baseName(this.getCwd()) + "': " + this.getArgsTo(this.getNumArgs() - 1)
+  }
+}
+
+class Diagnostic extends @diagnostic {
+  predicate diagnosticFor(Compilation c, int fileNum, int idx) {
+    diagnostic_for(this, c, fileNum, idx)
+  }
+
+  Location getLocation() { diagnostics(this, _, _, _, _, result) }
+
+  // string getTag() {
+  //   diagnostics(this, _, result, _, _, _)
+  // }
+  string getMessage() { diagnostics(this, _, _, result, _, _) }
+
+  // string getFullMessage() {
+  //   diagnostics(this, _, _, _, result, _)
+  // }
+  string toString() { result = "error: " + this.getMessage() }
+}
+
+query predicate qcompilations(Compilation c, File f) { c.compilesFile(_, f) }
+
+query predicate qdiagnostics(Diagnostic d, Compilation c, File f) {
+  exists(int fileno | d.diagnosticFor(c, fileno, _) | c.compilesFile(fileno, f))
+}
+
+query predicate duplicateerrs(Diagnostic d, Diagnostic d1, Compilation c, int fileno, int idx) {
+  d != d1 and
+  d.diagnosticFor(c, fileno, idx) and
+  d1.diagnosticFor(c, fileno, idx)
+}
--- a/ql/test/extractor-tests/diagnostics/broken/test.go
+++ b/ql/test/extractor-tests/diagnostics/broken/test.go
@@ -0,0 +1,7 @@
+package main
+
+// autoformat-ignore (gofmt chokes on invalid programs)
+
+// Example file with a syntax error to demonstrate use of consistency queries
+
+This is not a valid Go program
--- a/ql/test/extractor-tests/diagnostics/broken2/test.go
+++ b/ql/test/extractor-tests/diagnostics/broken2/test.go
@@ -0,0 +1,3 @@
+// autoformat-ignore
+
+pac
--- a/ql/test/extractor-tests/diagnostics/broken2/test1.go
+++ b/ql/test/extractor-tests/diagnostics/broken2/test1.go
@@ -0,0 +1,6 @@
+package main
+
+func main() {
+	fmt.Println("a")
+	fmt.Println(a)
+}
--- a/ql/test/extractor-tests/diagnostics/broken2/test2.go
+++ b/ql/test/extractor-tests/diagnostics/broken2/test2.go
@@ -0,0 +1 @@
+package main
--- a/ql/test/extractor-tests/diagnostics/go.mod
+++ b/ql/test/extractor-tests/diagnostics/go.mod
@@ -0,0 +1,3 @@
+module codeql-go-tests/diagnostics
+
+go 1.16
--- a/ql/test/extractor-tests/diagnostics/notbroken/test.go
+++ b/ql/test/extractor-tests/diagnostics/notbroken/test.go
@@ -0,0 +1 @@
+package main
--- a/upgrades/4affa49dbe2bbab1a33f0e3ea6b045116abbcfda/go.dbscheme
+++ b/upgrades/4affa49dbe2bbab1a33f0e3ea6b045116abbcfda/go.dbscheme
@@ -110,6 +110,24 @@ xmllocations(

@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace;

+compilations(unique int id: @compilation, string cwd: string ref);
+
+#keyset[id, num]
+compilation_args(int id: @compilation ref, int num: int ref, string arg: string ref);
+
+#keyset[id, num, kind]
+compilation_time(int id: @compilation ref, int num: int ref, int kind: int ref, float secs: float ref);
+
+diagnostic_for(unique int diagnostic: @diagnostic ref, int compilation: @compilation ref, int file_number: int ref, int file_number_diagnostic_number: int ref);
+
+compilation_finished(unique int id: @compilation ref, float cpu_seconds: float ref, float elapsed_seconds: float ref);
+
+#keyset[id, num]
+compilation_compiling_files(int id: @compilation ref, int num: int ref, int file: @file ref);
+
+diagnostics(unique int id: @diagnostic, int severity: int ref, string error_tag: string ref, string error_message: string ref,
+            string full_error_message: string ref, int location: @location ref);
+
 locations_default(unique int id: @location_default, int file: @file ref, int beginLine: int ref, int beginColumn: int ref,
                  int endLine: int ref, int endColumn: int ref);

--- a/upgrades/4affa49dbe2bbab1a33f0e3ea6b045116abbcfda/upgrade.properties
+++ b/upgrades/4affa49dbe2bbab1a33f0e3ea6b045116abbcfda/upgrade.properties
@@ -1,2 +1,2 @@
-description: Add XML tables
+description: Add tables for extractor diagnostics and XML
 compatibility: backwards