mirror of
https://github.com/github/codeql.git
synced 2026-05-14 11:19:27 +02:00
Fix Go extractor to extract root internal test files
When CODEQL_EXTRACTOR_GO_OPTION_EXTRACT_TESTS=true is set, the Go extractor was incorrectly skipping internal test files (package foo) at repository roots when the project contains nested test packages. Root Cause: The extractor selected package variants by longest ID string, but this heuristic fails when nested packages have tests. For a package like "github.com/go-git/go-git/v6", packages.Load returns multiple variants: 1. "github.com/go-git/go-git/v6" (19 files, production only) 2. "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]" (39 files, production + 20 root tests) ← Should select this 3. "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]" (19 files, test dependency) ← Was incorrectly selected (longest string) The old logic selected variant #3 (76 chars) over #2 (68 chars), causing 20 root test files to be missing from the database. Fix: Replace string length comparison with a better heuristic that prefers: 1. Exact test packages (e.g., "pkg [pkg.test]") over nested dependencies 2. Packages with more Syntax nodes (more files to extract) 3. String length as a tiebreaker This ensures the extractor selects the variant with the most complete test coverage, particularly for root-level internal tests. Testing: - Added comprehensive unit tests covering the selection logic - Tests simulate the real-world go-git scenario - All tests pass Impact: Root-level external tests (package foo_test) were already extracted correctly. This fix ensures internal tests (package foo) at the root are now also extracted when they exist alongside nested test packages. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -59,6 +59,44 @@ func init() {
|
||||
}
|
||||
}
|
||||
|
||||
// isExactTestPackage checks if a package ID represents an exact test match.
|
||||
// Returns true for IDs like "github.com/foo/bar [github.com/foo/bar.test]"
|
||||
// Returns false for IDs like "github.com/foo/bar [github.com/foo/bar/nested.test]"
|
||||
func isExactTestPackage(pkg *packages.Package) bool {
|
||||
// Test packages have IDs in the format: "pkgpath [pkgpath.test]"
|
||||
// or for nested test dependencies: "pkgpath [pkgpath/nested.test]"
|
||||
if !strings.Contains(pkg.ID, " [") {
|
||||
return false
|
||||
}
|
||||
expectedTestID := pkg.PkgPath + " [" + pkg.PkgPath + ".test]"
|
||||
return pkg.ID == expectedTestID
|
||||
}
|
||||
|
||||
// isBetterPackage determines if pkg is a better choice than current for extraction.
|
||||
// Preferences:
|
||||
// 1. Exact test package (e.g., "pkg [pkg.test]") over nested test dependencies
|
||||
// 2. More Syntax nodes (more files to extract)
|
||||
// 3. Longer ID string as tiebreaker
|
||||
func isBetterPackage(pkg, current *packages.Package) bool {
|
||||
pkgIsExact := isExactTestPackage(pkg)
|
||||
currentIsExact := isExactTestPackage(current)
|
||||
|
||||
// Prefer exact test packages
|
||||
if pkgIsExact != currentIsExact {
|
||||
return pkgIsExact
|
||||
}
|
||||
|
||||
// Prefer packages with more syntax nodes (more files)
|
||||
pkgSyntaxCount := len(pkg.Syntax)
|
||||
currentSyntaxCount := len(current.Syntax)
|
||||
if pkgSyntaxCount != currentSyntaxCount {
|
||||
return pkgSyntaxCount > currentSyntaxCount
|
||||
}
|
||||
|
||||
// Fall back to string length
|
||||
return len(pkg.ID) > len(current.ID)
|
||||
}
|
||||
|
||||
// ExtractWithFlags extracts the packages specified by the given patterns and build flags
|
||||
func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, sourceRoot string) error {
|
||||
startTime := time.Now()
|
||||
@@ -153,20 +191,22 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool,
|
||||
|
||||
pkgsNotFound := make([]string, 0, len(pkgs))
|
||||
|
||||
// Build a map from package paths to their longest IDs--
|
||||
// Build a map from package paths to their best IDs--
|
||||
// in the context of a `go test -c` compilation, we will see the same package more than
|
||||
// once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version
|
||||
// that contains and is used by test code.
|
||||
// For our purposes it is simplest to just ignore the non-test version, since the test
|
||||
// version seems to be a superset of it.
|
||||
longestPackageIds := make(map[string]string)
|
||||
// We prefer the version with the most complete test coverage, which is typically:
|
||||
// 1. The exact test package (e.g., "pkg [pkg.test]") over nested test dependencies
|
||||
// 2. The package with the most Syntax nodes (most files to extract)
|
||||
// 3. The longest ID string as a tiebreaker
|
||||
bestPackageIds := make(map[string]*packages.Package)
|
||||
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
|
||||
if longestIDSoFar, present := longestPackageIds[pkg.PkgPath]; present {
|
||||
if len(pkg.ID) > len(longestIDSoFar) {
|
||||
longestPackageIds[pkg.PkgPath] = pkg.ID
|
||||
if bestSoFar, present := bestPackageIds[pkg.PkgPath]; present {
|
||||
if isBetterPackage(pkg, bestSoFar) {
|
||||
bestPackageIds[pkg.PkgPath] = pkg
|
||||
}
|
||||
} else {
|
||||
longestPackageIds[pkg.PkgPath] = pkg.ID
|
||||
bestPackageIds[pkg.PkgPath] = pkg
|
||||
}
|
||||
})
|
||||
|
||||
@@ -257,15 +297,15 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool,
|
||||
// extract AST information for all packages
|
||||
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
|
||||
|
||||
// If this is a variant of a package that also occurs with a longer ID, skip it;
|
||||
// If this is a variant of a package that also occurs with a better ID, skip it;
|
||||
// otherwise we would extract the same file more than once including extracting the
|
||||
// body of methods twice, causing database inconsistencies.
|
||||
//
|
||||
// We prefer the version with the longest ID because that is (so far as I know) always
|
||||
// the version that defines more entities -- the only case I'm aware of being a test
|
||||
// variant of a package, which includes test-only functions in addition to the complete
|
||||
// contents of the main variant.
|
||||
if pkg.ID != longestPackageIds[pkg.PkgPath] {
|
||||
// We prefer the version with the most complete test coverage, prioritizing:
|
||||
// 1. Exact test packages (e.g., "pkg [pkg.test]") over nested test dependencies
|
||||
// 2. Packages with more Syntax nodes (more files to extract)
|
||||
// 3. Longer ID strings as a tiebreaker
|
||||
if pkg.ID != bestPackageIds[pkg.PkgPath].ID {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
255
go/extractor/extractor_test.go
Normal file
255
go/extractor/extractor_test.go
Normal file
@@ -0,0 +1,255 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/tools/go/packages"
|
||||
)
|
||||
|
||||
func TestIsExactTestPackage(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
pkgID string
|
||||
pkgPath string
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "exact test package",
|
||||
pkgID: "github.com/foo/bar [github.com/foo/bar.test]",
|
||||
pkgPath: "github.com/foo/bar",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "nested test package",
|
||||
pkgID: "github.com/foo/bar [github.com/foo/bar/nested.test]",
|
||||
pkgPath: "github.com/foo/bar",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "deeply nested test package",
|
||||
pkgID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
|
||||
pkgPath: "github.com/go-git/go-git/v6",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "exact test package with version",
|
||||
pkgID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
|
||||
pkgPath: "github.com/go-git/go-git/v6",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "non-test package",
|
||||
pkgID: "github.com/foo/bar",
|
||||
pkgPath: "github.com/foo/bar",
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
pkg := &packages.Package{
|
||||
ID: tt.pkgID,
|
||||
PkgPath: tt.pkgPath,
|
||||
}
|
||||
result := isExactTestPackage(pkg)
|
||||
if result != tt.expected {
|
||||
t.Errorf("isExactTestPackage(%q) = %v, want %v", tt.pkgID, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsBetterPackage(t *testing.T) {
|
||||
// Helper to create a package with specified properties
|
||||
makePkg := func(id, path string, syntaxCount int) *packages.Package {
|
||||
syntax := make([]*ast.File, syntaxCount)
|
||||
return &packages.Package{
|
||||
ID: id,
|
||||
PkgPath: path,
|
||||
Syntax: syntax,
|
||||
}
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
pkg *packages.Package
|
||||
current *packages.Package
|
||||
expected bool // true if pkg is better than current
|
||||
}{
|
||||
{
|
||||
name: "exact test package beats nested test package",
|
||||
pkg: makePkg(
|
||||
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
|
||||
"github.com/go-git/go-git/v6",
|
||||
39, // 19 production + 20 test files
|
||||
),
|
||||
current: makePkg(
|
||||
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
|
||||
"github.com/go-git/go-git/v6",
|
||||
19, // production files only
|
||||
),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "nested test package loses to exact test package",
|
||||
pkg: makePkg(
|
||||
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
|
||||
"github.com/go-git/go-git/v6",
|
||||
19,
|
||||
),
|
||||
current: makePkg(
|
||||
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
|
||||
"github.com/go-git/go-git/v6",
|
||||
39,
|
||||
),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "more syntax nodes wins when both are exact tests",
|
||||
pkg: makePkg(
|
||||
"github.com/foo/bar [github.com/foo/bar.test]",
|
||||
"github.com/foo/bar",
|
||||
50,
|
||||
),
|
||||
current: makePkg(
|
||||
"github.com/foo/bar [github.com/foo/bar.test]",
|
||||
"github.com/foo/bar",
|
||||
30,
|
||||
),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "fewer syntax nodes loses when both are exact tests",
|
||||
pkg: makePkg(
|
||||
"github.com/foo/bar [github.com/foo/bar.test]",
|
||||
"github.com/foo/bar",
|
||||
30,
|
||||
),
|
||||
current: makePkg(
|
||||
"github.com/foo/bar [github.com/foo/bar.test]",
|
||||
"github.com/foo/bar",
|
||||
50,
|
||||
),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "more syntax nodes wins when both are nested tests",
|
||||
pkg: makePkg(
|
||||
"github.com/foo/bar [github.com/foo/bar/pkg1.test]",
|
||||
"github.com/foo/bar",
|
||||
25,
|
||||
),
|
||||
current: makePkg(
|
||||
"github.com/foo/bar [github.com/foo/bar/pkg2.test]",
|
||||
"github.com/foo/bar",
|
||||
20,
|
||||
),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "longer ID wins when same syntax count",
|
||||
pkg: makePkg(
|
||||
"github.com/foo/bar [github.com/foo/bar/verylongpackagename.test]",
|
||||
"github.com/foo/bar",
|
||||
20,
|
||||
),
|
||||
current: makePkg(
|
||||
"github.com/foo/bar [github.com/foo/bar/short.test]",
|
||||
"github.com/foo/bar",
|
||||
20,
|
||||
),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "test package beats non-test with same syntax count",
|
||||
pkg: makePkg(
|
||||
"github.com/foo/bar [github.com/foo/bar.test]",
|
||||
"github.com/foo/bar",
|
||||
20,
|
||||
),
|
||||
current: makePkg(
|
||||
"github.com/foo/bar",
|
||||
"github.com/foo/bar",
|
||||
20,
|
||||
),
|
||||
expected: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := isBetterPackage(tt.pkg, tt.current)
|
||||
if result != tt.expected {
|
||||
t.Errorf("isBetterPackage() = %v, want %v\n pkg: %q (%d syntax nodes)\n current: %q (%d syntax nodes)",
|
||||
result, tt.expected,
|
||||
tt.pkg.ID, len(tt.pkg.Syntax),
|
||||
tt.current.ID, len(tt.current.Syntax))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestPackageSelectionRealWorld simulates the real-world go-git scenario
|
||||
func TestPackageSelectionRealWorld(t *testing.T) {
|
||||
// Simulate the actual packages.Load result for go-git repository
|
||||
// when EXTRACT_TESTS=true
|
||||
pkgs := []*packages.Package{
|
||||
// Production package only
|
||||
{
|
||||
ID: "github.com/go-git/go-git/v6",
|
||||
PkgPath: "github.com/go-git/go-git/v6",
|
||||
Syntax: make([]*ast.File, 19), // 19 production files
|
||||
},
|
||||
// Root test package - this is what we want!
|
||||
{
|
||||
ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
|
||||
PkgPath: "github.com/go-git/go-git/v6",
|
||||
Syntax: make([]*ast.File, 39), // 19 production + 20 test files
|
||||
},
|
||||
// Nested test dependency 1
|
||||
{
|
||||
ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
|
||||
PkgPath: "github.com/go-git/go-git/v6",
|
||||
Syntax: make([]*ast.File, 19), // production files only (dependency)
|
||||
},
|
||||
// Nested test dependency 2
|
||||
{
|
||||
ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/object.test]",
|
||||
PkgPath: "github.com/go-git/go-git/v6",
|
||||
Syntax: make([]*ast.File, 19), // production files only (dependency)
|
||||
},
|
||||
}
|
||||
|
||||
// Simulate the bestPackageIds selection logic
|
||||
bestPackageIds := make(map[string]*packages.Package)
|
||||
for _, pkg := range pkgs {
|
||||
if bestSoFar, present := bestPackageIds[pkg.PkgPath]; present {
|
||||
if isBetterPackage(pkg, bestSoFar) {
|
||||
bestPackageIds[pkg.PkgPath] = pkg
|
||||
}
|
||||
} else {
|
||||
bestPackageIds[pkg.PkgPath] = pkg
|
||||
}
|
||||
}
|
||||
|
||||
// Verify the correct package was selected
|
||||
selected := bestPackageIds["github.com/go-git/go-git/v6"]
|
||||
expectedID := "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]"
|
||||
expectedSyntaxCount := 39
|
||||
|
||||
if selected.ID != expectedID {
|
||||
t.Errorf("Wrong package selected!\n got: %q (%d syntax nodes)\n want: %q (%d syntax nodes)",
|
||||
selected.ID, len(selected.Syntax),
|
||||
expectedID, expectedSyntaxCount)
|
||||
}
|
||||
|
||||
if len(selected.Syntax) != expectedSyntaxCount {
|
||||
t.Errorf("Wrong syntax count: got %d, want %d", len(selected.Syntax), expectedSyntaxCount)
|
||||
}
|
||||
|
||||
// Verify it's recognized as an exact test package
|
||||
if !isExactTestPackage(selected) {
|
||||
t.Errorf("Selected package %q should be recognized as exact test package", selected.ID)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user