Fix Go extractor to extract root internal test files

When CODEQL_EXTRACTOR_GO_OPTION_EXTRACT_TESTS=true is set, the Go
extractor was incorrectly skipping internal test files (package foo)
at repository roots when the project contains nested test packages.

Root Cause:
The extractor selected package variants by longest ID string, but this
heuristic fails when nested packages have tests. For a package like
"github.com/go-git/go-git/v6", packages.Load returns multiple variants:

1. "github.com/go-git/go-git/v6" (19 files, production only)
2. "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]"
   (39 files, production + 20 root tests) ← Should select this
3. "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]"
   (19 files, test dependency) ← Was incorrectly selected (longest string)

The old logic selected variant #3 (76 chars) over #2 (68 chars),
causing 20 root test files to be missing from the database.

Fix:
Replace string length comparison with a better heuristic that prefers:
1. Exact test packages (e.g., "pkg [pkg.test]") over nested dependencies
2. Packages with more Syntax nodes (more files to extract)
3. String length as a tiebreaker

This ensures the extractor selects the variant with the most complete
test coverage, particularly for root-level internal tests.

Testing:
- Added comprehensive unit tests covering the selection logic
- Tests simulate the real-world go-git scenario
- All tests pass

Impact:
Root-level external tests (package foo_test) were already extracted
correctly. This fix ensures internal tests (package foo) at the root
are now also extracted when they exist alongside nested test packages.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Arieh Schneier
2026-05-11 13:42:17 +10:00
parent 36554d160c
commit 3ef4a5836c
2 changed files with 309 additions and 14 deletions

View File

@@ -59,6 +59,44 @@ func init() {
}
}
// isExactTestPackage checks if a package ID represents an exact test match.
// Returns true for IDs like "github.com/foo/bar [github.com/foo/bar.test]"
// Returns false for IDs like "github.com/foo/bar [github.com/foo/bar/nested.test]"
func isExactTestPackage(pkg *packages.Package) bool {
// Test packages have IDs in the format: "pkgpath [pkgpath.test]"
// or for nested test dependencies: "pkgpath [pkgpath/nested.test]"
if !strings.Contains(pkg.ID, " [") {
return false
}
expectedTestID := pkg.PkgPath + " [" + pkg.PkgPath + ".test]"
return pkg.ID == expectedTestID
}
// isBetterPackage determines if pkg is a better choice than current for extraction.
// Preferences:
// 1. Exact test package (e.g., "pkg [pkg.test]") over nested test dependencies
// 2. More Syntax nodes (more files to extract)
// 3. Longer ID string as tiebreaker
func isBetterPackage(pkg, current *packages.Package) bool {
pkgIsExact := isExactTestPackage(pkg)
currentIsExact := isExactTestPackage(current)
// Prefer exact test packages
if pkgIsExact != currentIsExact {
return pkgIsExact
}
// Prefer packages with more syntax nodes (more files)
pkgSyntaxCount := len(pkg.Syntax)
currentSyntaxCount := len(current.Syntax)
if pkgSyntaxCount != currentSyntaxCount {
return pkgSyntaxCount > currentSyntaxCount
}
// Fall back to string length
return len(pkg.ID) > len(current.ID)
}
// ExtractWithFlags extracts the packages specified by the given patterns and build flags
func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, sourceRoot string) error {
startTime := time.Now()
@@ -153,20 +191,22 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool,
pkgsNotFound := make([]string, 0, len(pkgs))
// Build a map from package paths to their longest IDs--
// Build a map from package paths to their best IDs--
// in the context of a `go test -c` compilation, we will see the same package more than
// once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version
// that contains and is used by test code.
// For our purposes it is simplest to just ignore the non-test version, since the test
// version seems to be a superset of it.
longestPackageIds := make(map[string]string)
// We prefer the version with the most complete test coverage, which is typically:
// 1. The exact test package (e.g., "pkg [pkg.test]") over nested test dependencies
// 2. The package with the most Syntax nodes (most files to extract)
// 3. The longest ID string as a tiebreaker
bestPackageIds := make(map[string]*packages.Package)
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
if longestIDSoFar, present := longestPackageIds[pkg.PkgPath]; present {
if len(pkg.ID) > len(longestIDSoFar) {
longestPackageIds[pkg.PkgPath] = pkg.ID
if bestSoFar, present := bestPackageIds[pkg.PkgPath]; present {
if isBetterPackage(pkg, bestSoFar) {
bestPackageIds[pkg.PkgPath] = pkg
}
} else {
longestPackageIds[pkg.PkgPath] = pkg.ID
bestPackageIds[pkg.PkgPath] = pkg
}
})
@@ -257,15 +297,15 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool,
// extract AST information for all packages
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
// If this is a variant of a package that also occurs with a longer ID, skip it;
// If this is a variant of a package that also occurs with a better ID, skip it;
// otherwise we would extract the same file more than once including extracting the
// body of methods twice, causing database inconsistencies.
//
// We prefer the version with the longest ID because that is (so far as I know) always
// the version that defines more entities -- the only case I'm aware of being a test
// variant of a package, which includes test-only functions in addition to the complete
// contents of the main variant.
if pkg.ID != longestPackageIds[pkg.PkgPath] {
// We prefer the version with the most complete test coverage, prioritizing:
// 1. Exact test packages (e.g., "pkg [pkg.test]") over nested test dependencies
// 2. Packages with more Syntax nodes (more files to extract)
// 3. Longer ID strings as a tiebreaker
if pkg.ID != bestPackageIds[pkg.PkgPath].ID {
return
}

View File

@@ -0,0 +1,255 @@
package extractor
import (
"go/ast"
"testing"
"golang.org/x/tools/go/packages"
)
func TestIsExactTestPackage(t *testing.T) {
tests := []struct {
name string
pkgID string
pkgPath string
expected bool
}{
{
name: "exact test package",
pkgID: "github.com/foo/bar [github.com/foo/bar.test]",
pkgPath: "github.com/foo/bar",
expected: true,
},
{
name: "nested test package",
pkgID: "github.com/foo/bar [github.com/foo/bar/nested.test]",
pkgPath: "github.com/foo/bar",
expected: false,
},
{
name: "deeply nested test package",
pkgID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
pkgPath: "github.com/go-git/go-git/v6",
expected: false,
},
{
name: "exact test package with version",
pkgID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
pkgPath: "github.com/go-git/go-git/v6",
expected: true,
},
{
name: "non-test package",
pkgID: "github.com/foo/bar",
pkgPath: "github.com/foo/bar",
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
pkg := &packages.Package{
ID: tt.pkgID,
PkgPath: tt.pkgPath,
}
result := isExactTestPackage(pkg)
if result != tt.expected {
t.Errorf("isExactTestPackage(%q) = %v, want %v", tt.pkgID, result, tt.expected)
}
})
}
}
func TestIsBetterPackage(t *testing.T) {
// Helper to create a package with specified properties
makePkg := func(id, path string, syntaxCount int) *packages.Package {
syntax := make([]*ast.File, syntaxCount)
return &packages.Package{
ID: id,
PkgPath: path,
Syntax: syntax,
}
}
tests := []struct {
name string
pkg *packages.Package
current *packages.Package
expected bool // true if pkg is better than current
}{
{
name: "exact test package beats nested test package",
pkg: makePkg(
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
"github.com/go-git/go-git/v6",
39, // 19 production + 20 test files
),
current: makePkg(
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
"github.com/go-git/go-git/v6",
19, // production files only
),
expected: true,
},
{
name: "nested test package loses to exact test package",
pkg: makePkg(
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
"github.com/go-git/go-git/v6",
19,
),
current: makePkg(
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
"github.com/go-git/go-git/v6",
39,
),
expected: false,
},
{
name: "more syntax nodes wins when both are exact tests",
pkg: makePkg(
"github.com/foo/bar [github.com/foo/bar.test]",
"github.com/foo/bar",
50,
),
current: makePkg(
"github.com/foo/bar [github.com/foo/bar.test]",
"github.com/foo/bar",
30,
),
expected: true,
},
{
name: "fewer syntax nodes loses when both are exact tests",
pkg: makePkg(
"github.com/foo/bar [github.com/foo/bar.test]",
"github.com/foo/bar",
30,
),
current: makePkg(
"github.com/foo/bar [github.com/foo/bar.test]",
"github.com/foo/bar",
50,
),
expected: false,
},
{
name: "more syntax nodes wins when both are nested tests",
pkg: makePkg(
"github.com/foo/bar [github.com/foo/bar/pkg1.test]",
"github.com/foo/bar",
25,
),
current: makePkg(
"github.com/foo/bar [github.com/foo/bar/pkg2.test]",
"github.com/foo/bar",
20,
),
expected: true,
},
{
name: "longer ID wins when same syntax count",
pkg: makePkg(
"github.com/foo/bar [github.com/foo/bar/verylongpackagename.test]",
"github.com/foo/bar",
20,
),
current: makePkg(
"github.com/foo/bar [github.com/foo/bar/short.test]",
"github.com/foo/bar",
20,
),
expected: true,
},
{
name: "test package beats non-test with same syntax count",
pkg: makePkg(
"github.com/foo/bar [github.com/foo/bar.test]",
"github.com/foo/bar",
20,
),
current: makePkg(
"github.com/foo/bar",
"github.com/foo/bar",
20,
),
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isBetterPackage(tt.pkg, tt.current)
if result != tt.expected {
t.Errorf("isBetterPackage() = %v, want %v\n pkg: %q (%d syntax nodes)\n current: %q (%d syntax nodes)",
result, tt.expected,
tt.pkg.ID, len(tt.pkg.Syntax),
tt.current.ID, len(tt.current.Syntax))
}
})
}
}
// TestPackageSelectionRealWorld simulates the real-world go-git scenario
func TestPackageSelectionRealWorld(t *testing.T) {
// Simulate the actual packages.Load result for go-git repository
// when EXTRACT_TESTS=true
pkgs := []*packages.Package{
// Production package only
{
ID: "github.com/go-git/go-git/v6",
PkgPath: "github.com/go-git/go-git/v6",
Syntax: make([]*ast.File, 19), // 19 production files
},
// Root test package - this is what we want!
{
ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
PkgPath: "github.com/go-git/go-git/v6",
Syntax: make([]*ast.File, 39), // 19 production + 20 test files
},
// Nested test dependency 1
{
ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
PkgPath: "github.com/go-git/go-git/v6",
Syntax: make([]*ast.File, 19), // production files only (dependency)
},
// Nested test dependency 2
{
ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/object.test]",
PkgPath: "github.com/go-git/go-git/v6",
Syntax: make([]*ast.File, 19), // production files only (dependency)
},
}
// Simulate the bestPackageIds selection logic
bestPackageIds := make(map[string]*packages.Package)
for _, pkg := range pkgs {
if bestSoFar, present := bestPackageIds[pkg.PkgPath]; present {
if isBetterPackage(pkg, bestSoFar) {
bestPackageIds[pkg.PkgPath] = pkg
}
} else {
bestPackageIds[pkg.PkgPath] = pkg
}
}
// Verify the correct package was selected
selected := bestPackageIds["github.com/go-git/go-git/v6"]
expectedID := "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]"
expectedSyntaxCount := 39
if selected.ID != expectedID {
t.Errorf("Wrong package selected!\n got: %q (%d syntax nodes)\n want: %q (%d syntax nodes)",
selected.ID, len(selected.Syntax),
expectedID, expectedSyntaxCount)
}
if len(selected.Syntax) != expectedSyntaxCount {
t.Errorf("Wrong syntax count: got %d, want %d", len(selected.Syntax), expectedSyntaxCount)
}
// Verify it's recognized as an exact test package
if !isExactTestPackage(selected) {
t.Errorf("Selected package %q should be recognized as exact test package", selected.ID)
}
}