Go: implement basic overlay extraction

When in overlay mode, extractFile will exit early if the file isn't in
the list of files that changed since the base was extracted.
This commit is contained in:
Nick Rolfe
2025-09-01 16:15:58 +01:00
parent 05e5502680
commit 604df2125d
3 changed files with 90 additions and 14 deletions

View File

@@ -311,6 +311,8 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool)
extraction.WaitGroup.Wait()
util.WriteOverlayBaseMetadata()
log.Println("Done extracting packages.")
t := time.Now()
@@ -323,16 +325,17 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool)
type Extraction struct {
// A lock for preventing concurrent writes to maps and the stat trap writer, as they are not
// thread-safe
Lock sync.Mutex
LabelKey string
Label trap.Label
StatWriter *trap.Writer
WaitGroup sync.WaitGroup
GoroutineSem *semaphore
FdSem *semaphore
NextFileId int
FileInfo map[string]*FileInfo
SeenGoMods map[string]bool
Lock sync.Mutex
LabelKey string
Label trap.Label
StatWriter *trap.Writer
WaitGroup sync.WaitGroup
GoroutineSem *semaphore
FdSem *semaphore
NextFileId int
FileInfo map[string]*FileInfo
SeenGoMods map[string]bool
OverlayChanges map[string]bool
}
type FileInfo struct {
@@ -379,6 +382,21 @@ func NewExtraction(buildFlags []string, patterns []string) *Extraction {
}
sum := hash.Sum(nil)
overlayChangeList := util.GetOverlayChanges()
var overlayChanges map[string]bool
if overlayChangeList == nil {
overlayChanges = nil
} else {
overlayChanges = make(map[string]bool)
for _, changedFilePath := range overlayChangeList {
absPath, err := filepath.Abs(changedFilePath)
if err != nil {
log.Fatalf("Error resolving absolute path of overlay change %s: %s", changedFilePath, err.Error())
}
overlayChanges[absPath] = true
}
}
i := 0
var path string
// split compilation files into directories to avoid filling a single directory with too many files
@@ -438,10 +456,11 @@ func NewExtraction(buildFlags []string, patterns []string) *Extraction {
FdSem: newSemaphore(100),
// this semaphore is used to limit the number of goroutines spawned, so we
// don't run into memory issues
GoroutineSem: newSemaphore(MaxGoRoutines),
NextFileId: 0,
FileInfo: make(map[string]*FileInfo),
SeenGoMods: make(map[string]bool),
GoroutineSem: newSemaphore(MaxGoRoutines),
NextFileId: 0,
FileInfo: make(map[string]*FileInfo),
SeenGoMods: make(map[string]bool),
OverlayChanges: overlayChanges,
}
}
@@ -720,6 +739,10 @@ func (extraction *Extraction) extractFile(ast *ast.File, pkg *packages.Package)
return nil
}
path := normalizedPath(ast, fset)
if extraction.OverlayChanges != nil && !extraction.OverlayChanges[path] {
// This file did not change since the base was extracted
return nil
}
extraction.FdSem.acquire(3)

View File

@@ -7,6 +7,7 @@ go_library(
srcs = [
"extractvendordirs.go",
"logging.go",
"overlays.go",
"registryproxy.go",
"semver.go",
"util.go",

View File

@@ -0,0 +1,52 @@
package util
import (
"encoding/json"
"log"
"os"
)
// If the relevant environment variable is set, indicating that we are extracting an overlay
// database, GetOverlayChanges returns the list of relative paths of files that have changed (or
// been deleted). Otherwise, it returns `nil`.
func GetOverlayChanges() []string {
if overlayChangesJsonPath, present := os.LookupEnv("CODEQL_EXTRACTOR_GO_OVERLAY_CHANGES"); present {
log.Printf("Reading overlay changes from: %s", overlayChangesJsonPath)
file, err := os.Open(overlayChangesJsonPath)
if err != nil {
log.Fatalf("Failed to open overlay changes JSON file: %s", err)
}
defer file.Close()
var overlayData struct {
Changes []string `json:"changes"`
}
decoder := json.NewDecoder(file)
if err := decoder.Decode(&overlayData); err != nil {
log.Fatalf("Failed to decode overlay changes JSON file: %s", err)
}
return overlayData.Changes
} else {
return nil
}
}
// WriteOverlayBaseMetadata creates an empty metadata file if we are extracting an overlay base;
// otherwise, it does nothing.
func WriteOverlayBaseMetadata() {
if metadataPath, present := os.LookupEnv("CODEQL_EXTRACTOR_GO_OVERLAY_BASE_METADATA_OUT"); present {
log.Printf("Writing overlay base metadata to: %s", metadataPath)
// In principle, we could store some metadata here and read it back when extracting the
// overlay. For now, we don't need to store anything, but the CLI still requires us to write
// something, so just create an empty file.
file, err := os.Create(metadataPath)
if err != nil {
log.Fatalf("Failed to create overlay base metadata file: %s", err)
}
file.Close()
}
}