Merge pull request #21826 from AriehSchneier/fix/go-extractor-root-test-files

Go: Fix extractor to extract root internal test files
This commit is contained in:
Owen Mansel-Chan
2026-05-12 10:34:42 +01:00
committed by GitHub
11 changed files with 489 additions and 23 deletions

View File

@@ -1,4 +1,4 @@
load("@rules_go//go:def.bzl", "go_library")
load("@rules_go//go:def.bzl", "go_library", "go_test")
load("@rules_java//java:defs.bzl", "java_library")
load("@rules_pkg//pkg:mappings.bzl", "pkg_files")
@@ -60,3 +60,10 @@ pkg_files(
},
visibility = ["//go:__pkg__"],
)
go_test(
name = "extractor_test",
srcs = ["extractor_test.go"],
embed = [":extractor"],
deps = ["@org_golang_x_tools//go/packages"],
)

View File

@@ -59,6 +59,63 @@ func init() {
}
}
// isExactTestPackage checks if a package ID represents an exact test match.
// Returns true for IDs like "github.com/foo/bar [github.com/foo/bar.test]"
// Returns false for IDs like "github.com/foo/bar [github.com/foo/bar/nested.test]"
func isExactTestPackage(pkg *packages.Package) bool {
// Test packages have IDs in the format: "pkgpath [pkgpath.test]"
// or for nested test dependencies: "pkgpath [pkgpath/nested.test]"
expectedTestID := pkg.PkgPath + " [" + pkg.PkgPath + ".test]"
return pkg.ID == expectedTestID
}
// isBetterPackage determines if pkg is a better choice than current for extraction.
// Preferences:
// 1. Exact test package (e.g., "pkg [pkg.test]") over nested test dependencies
// 2. More Syntax nodes (more files to extract)
// 3. Longer ID string as tiebreaker
func isBetterPackage(pkg, current *packages.Package) bool {
pkgIsExact := isExactTestPackage(pkg)
currentIsExact := isExactTestPackage(current)
// Prefer exact test packages
if pkgIsExact != currentIsExact {
return pkgIsExact
}
// Prefer packages with more syntax nodes (more files)
pkgSyntaxCount := len(pkg.Syntax)
currentSyntaxCount := len(current.Syntax)
if pkgSyntaxCount != currentSyntaxCount {
return pkgSyntaxCount > currentSyntaxCount
}
// Fall back to string length
return len(pkg.ID) > len(current.ID)
}
// selectBestPackages builds a map from package paths to their best package variants.
// In the context of a `go test -c` compilation, we see the same package more than
// once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version
// that contains and is used by test code.
// We prefer the version with the most complete test coverage, which is typically:
// 1. The exact test package (e.g., "pkg [pkg.test]") over nested test dependencies
// 2. The package with the most Syntax nodes (most files to extract)
// 3. The longest ID string as a tiebreaker
func selectBestPackages(pkgs []*packages.Package) map[string]*packages.Package {
bestPackageIds := make(map[string]*packages.Package)
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
if bestSoFar, present := bestPackageIds[pkg.PkgPath]; present {
if isBetterPackage(pkg, bestSoFar) {
bestPackageIds[pkg.PkgPath] = pkg
}
} else {
bestPackageIds[pkg.PkgPath] = pkg
}
})
return bestPackageIds
}
// ExtractWithFlags extracts the packages specified by the given patterns and build flags
func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, sourceRoot string) error {
startTime := time.Now()
@@ -153,22 +210,8 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool,
pkgsNotFound := make([]string, 0, len(pkgs))
// Build a map from package paths to their longest IDs--
// in the context of a `go test -c` compilation, we will see the same package more than
// once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version
// that contains and is used by test code.
// For our purposes it is simplest to just ignore the non-test version, since the test
// version seems to be a superset of it.
longestPackageIds := make(map[string]string)
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
if longestIDSoFar, present := longestPackageIds[pkg.PkgPath]; present {
if len(pkg.ID) > len(longestIDSoFar) {
longestPackageIds[pkg.PkgPath] = pkg.ID
}
} else {
longestPackageIds[pkg.PkgPath] = pkg.ID
}
})
// Build a map from package paths to their best IDs
bestPackageIds := selectBestPackages(pkgs)
// Do a post-order traversal and extract the package scope of each package
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
@@ -257,15 +300,15 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool,
// extract AST information for all packages
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
// If this is a variant of a package that also occurs with a longer ID, skip it;
// If this is a variant of a package that also occurs with a better ID, skip it;
// otherwise we would extract the same file more than once including extracting the
// body of methods twice, causing database inconsistencies.
//
// We prefer the version with the longest ID because that is (so far as I know) always
// the version that defines more entities -- the only case I'm aware of being a test
// variant of a package, which includes test-only functions in addition to the complete
// contents of the main variant.
if pkg.ID != longestPackageIds[pkg.PkgPath] {
// We prefer the version with the most complete test coverage, prioritizing:
// 1. Exact test packages (e.g., "pkg [pkg.test]") over nested test dependencies
// 2. Packages with more Syntax nodes (more files to extract)
// 3. Longer ID strings as a tiebreaker
if pkg.ID != bestPackageIds[pkg.PkgPath].ID {
return
}

View File

@@ -0,0 +1,343 @@
package extractor
import (
"go/ast"
"testing"
"golang.org/x/tools/go/packages"
)
func TestIsExactTestPackage(t *testing.T) {
tests := []struct {
name string
pkgID string
pkgPath string
expected bool
}{
{
name: "exact test package",
pkgID: "github.com/foo/bar [github.com/foo/bar.test]",
pkgPath: "github.com/foo/bar",
expected: true,
},
{
name: "nested test package",
pkgID: "github.com/foo/bar [github.com/foo/bar/nested.test]",
pkgPath: "github.com/foo/bar",
expected: false,
},
{
name: "deeply nested test package",
pkgID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
pkgPath: "github.com/go-git/go-git/v6",
expected: false,
},
{
name: "exact test package with version",
pkgID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
pkgPath: "github.com/go-git/go-git/v6",
expected: true,
},
{
name: "non-test package",
pkgID: "github.com/foo/bar",
pkgPath: "github.com/foo/bar",
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
pkg := &packages.Package{
ID: tt.pkgID,
PkgPath: tt.pkgPath,
}
result := isExactTestPackage(pkg)
if result != tt.expected {
t.Errorf("isExactTestPackage(%q) = %v, want %v", tt.pkgID, result, tt.expected)
}
})
}
}
func TestIsBetterPackage(t *testing.T) {
// Helper to create a package with specified properties
makePkg := func(id, path string, syntaxCount int) *packages.Package {
syntax := make([]*ast.File, syntaxCount)
return &packages.Package{
ID: id,
PkgPath: path,
Syntax: syntax,
}
}
tests := []struct {
name string
pkg *packages.Package
current *packages.Package
expected bool // true if pkg is better than current
}{
{
name: "exact test package beats nested test package",
pkg: makePkg(
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
"github.com/go-git/go-git/v6",
39, // 19 production + 20 test files
),
current: makePkg(
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
"github.com/go-git/go-git/v6",
19, // production files only
),
expected: true,
},
{
name: "nested test package loses to exact test package",
pkg: makePkg(
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
"github.com/go-git/go-git/v6",
19,
),
current: makePkg(
"github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
"github.com/go-git/go-git/v6",
39,
),
expected: false,
},
{
name: "more syntax nodes wins when both are exact tests",
pkg: makePkg(
"github.com/foo/bar [github.com/foo/bar.test]",
"github.com/foo/bar",
50,
),
current: makePkg(
"github.com/foo/bar [github.com/foo/bar.test]",
"github.com/foo/bar",
30,
),
expected: true,
},
{
name: "fewer syntax nodes loses when both are exact tests",
pkg: makePkg(
"github.com/foo/bar [github.com/foo/bar.test]",
"github.com/foo/bar",
30,
),
current: makePkg(
"github.com/foo/bar [github.com/foo/bar.test]",
"github.com/foo/bar",
50,
),
expected: false,
},
{
name: "more syntax nodes wins when both are nested tests",
pkg: makePkg(
"github.com/foo/bar [github.com/foo/bar/pkg1.test]",
"github.com/foo/bar",
25,
),
current: makePkg(
"github.com/foo/bar [github.com/foo/bar/pkg2.test]",
"github.com/foo/bar",
20,
),
expected: true,
},
{
name: "longer ID wins when same syntax count",
pkg: makePkg(
"github.com/foo/bar [github.com/foo/bar/verylongpackagename.test]",
"github.com/foo/bar",
20,
),
current: makePkg(
"github.com/foo/bar [github.com/foo/bar/short.test]",
"github.com/foo/bar",
20,
),
expected: true,
},
{
name: "test package beats non-test with same syntax count",
pkg: makePkg(
"github.com/foo/bar [github.com/foo/bar.test]",
"github.com/foo/bar",
20,
),
current: makePkg(
"github.com/foo/bar",
"github.com/foo/bar",
20,
),
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isBetterPackage(tt.pkg, tt.current)
if result != tt.expected {
t.Errorf("isBetterPackage() = %v, want %v\n pkg: %q (%d syntax nodes)\n current: %q (%d syntax nodes)",
result, tt.expected,
tt.pkg.ID, len(tt.pkg.Syntax),
tt.current.ID, len(tt.current.Syntax))
}
})
}
}
// TestSelectBestPackages tests the selectBestPackages function
func TestSelectBestPackages(t *testing.T) {
// Helper to create a package with specified properties
makePkg := func(id, path string, syntaxCount int) *packages.Package {
syntax := make([]*ast.File, syntaxCount)
return &packages.Package{
ID: id,
PkgPath: path,
Syntax: syntax,
}
}
tests := []struct {
name string
pkgs []*packages.Package
expectedPkgIDs map[string]string // pkgPath -> expected selected ID
}{
{
name: "single package",
pkgs: []*packages.Package{
makePkg("example.com/pkg", "example.com/pkg", 10),
},
expectedPkgIDs: map[string]string{
"example.com/pkg": "example.com/pkg",
},
},
{
name: "test package preferred over production",
pkgs: []*packages.Package{
makePkg("example.com/pkg", "example.com/pkg", 10),
makePkg("example.com/pkg [example.com/pkg.test]", "example.com/pkg", 15),
},
expectedPkgIDs: map[string]string{
"example.com/pkg": "example.com/pkg [example.com/pkg.test]",
},
},
{
name: "exact test preferred over nested test",
pkgs: []*packages.Package{
makePkg("example.com/pkg [example.com/pkg.test]", "example.com/pkg", 20),
makePkg("example.com/pkg [example.com/pkg/nested.test]", "example.com/pkg", 15),
},
expectedPkgIDs: map[string]string{
"example.com/pkg": "example.com/pkg [example.com/pkg.test]",
},
},
{
name: "multiple packages with different paths",
pkgs: []*packages.Package{
makePkg("example.com/pkg1", "example.com/pkg1", 10),
makePkg("example.com/pkg1 [example.com/pkg1.test]", "example.com/pkg1", 15),
makePkg("example.com/pkg2", "example.com/pkg2", 8),
makePkg("example.com/pkg2 [example.com/pkg2.test]", "example.com/pkg2", 12),
},
expectedPkgIDs: map[string]string{
"example.com/pkg1": "example.com/pkg1 [example.com/pkg1.test]",
"example.com/pkg2": "example.com/pkg2 [example.com/pkg2.test]",
},
},
{
name: "more syntax nodes wins among nested tests",
pkgs: []*packages.Package{
makePkg("example.com/pkg [example.com/pkg/a.test]", "example.com/pkg", 10),
makePkg("example.com/pkg [example.com/pkg/b.test]", "example.com/pkg", 20),
},
expectedPkgIDs: map[string]string{
"example.com/pkg": "example.com/pkg [example.com/pkg/b.test]",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := selectBestPackages(tt.pkgs)
// Check that all expected packages are present
for pkgPath, expectedID := range tt.expectedPkgIDs {
selected, found := result[pkgPath]
if !found {
t.Errorf("Expected package path %q not found in result", pkgPath)
continue
}
if selected.ID != expectedID {
t.Errorf("For package path %q: got ID %q, want %q",
pkgPath, selected.ID, expectedID)
}
}
// Check that no unexpected packages are present
if len(result) != len(tt.expectedPkgIDs) {
t.Errorf("Expected %d packages in result, got %d",
len(tt.expectedPkgIDs), len(result))
}
})
}
}
// TestPackageSelectionRealWorld simulates the real-world go-git scenario
func TestPackageSelectionRealWorld(t *testing.T) {
// Simulate the actual packages.Load result for go-git repository
// when EXTRACT_TESTS=true
pkgs := []*packages.Package{
// Production package only
{
ID: "github.com/go-git/go-git/v6",
PkgPath: "github.com/go-git/go-git/v6",
Syntax: make([]*ast.File, 19), // 19 production files
},
// Root test package - this is what we want!
{
ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]",
PkgPath: "github.com/go-git/go-git/v6",
Syntax: make([]*ast.File, 39), // 19 production + 20 test files
},
// Nested test dependency 1
{
ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]",
PkgPath: "github.com/go-git/go-git/v6",
Syntax: make([]*ast.File, 19), // production files only (dependency)
},
// Nested test dependency 2
{
ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/object.test]",
PkgPath: "github.com/go-git/go-git/v6",
Syntax: make([]*ast.File, 19), // production files only (dependency)
},
}
// Use the actual selection logic from the extractor
bestPackageIds := selectBestPackages(pkgs)
// Verify the correct package was selected
selected := bestPackageIds["github.com/go-git/go-git/v6"]
expectedID := "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]"
expectedSyntaxCount := 39
if selected.ID != expectedID {
t.Errorf("Wrong package selected!\n got: %q (%d syntax nodes)\n want: %q (%d syntax nodes)",
selected.ID, len(selected.Syntax),
expectedID, expectedSyntaxCount)
}
if len(selected.Syntax) != expectedSyntaxCount {
t.Errorf("Wrong syntax count: got %d, want %d", len(selected.Syntax), expectedSyntaxCount)
}
// Verify it's recognized as an exact test package
if !isExactTestPackage(selected) {
t.Errorf("Selected package %q should be recognized as exact test package", selected.ID)
}
}

View File

@@ -0,0 +1,3 @@
module example.com/testpkg
go 1.26

View File

@@ -0,0 +1,13 @@
package main
func PublicFunc() int {
return 42
}
func privateFunc() int {
return 24
}
func main() {
PublicFunc()
}

View File

@@ -0,0 +1,16 @@
package main
import "testing"
// Root internal test - tests private functions
func TestPrivateFunc(t *testing.T) {
if privateFunc() != 24 {
t.Error("privateFunc failed")
}
}
func TestPublicFunc(t *testing.T) {
if PublicFunc() != 42 {
t.Error("PublicFunc failed")
}
}

View File

@@ -0,0 +1,5 @@
package nested
func NestedFunc() string {
return "nested"
}

View File

@@ -0,0 +1,9 @@
package nested
import "testing"
func TestNestedFunc(t *testing.T) {
if NestedFunc() != "nested" {
t.Error("NestedFunc failed")
}
}

View File

@@ -0,0 +1,7 @@
#select
| main_test.go |
| nested/nested_test.go |
testFunctions
| TestNestedFunc | nested/nested_test.go |
| TestPrivateFunc | main_test.go |
| TestPublicFunc | main_test.go |

View File

@@ -0,0 +1,5 @@
import os
def test(codeql, go):
# Test that root internal test files are extracted when nested packages have tests
codeql.database.create(source_root="src", extractor_option = ["extract_tests=true"])

View File

@@ -0,0 +1,15 @@
import go
// Verify that root internal test files are extracted
// when nested packages also have tests
from File f
where f.getBaseName().matches("%_test.go")
select f.getRelativePath()
query predicate testFunctions(string name, string file) {
exists(FuncDecl fn |
fn.getName().matches("Test%") and
name = fn.getName() and
file = fn.getFile().getRelativePath()
)
}