diff --git a/go/extractor/extractor.go b/go/extractor/extractor.go index bbcd32c10d2..c9ac7c6088a 100644 --- a/go/extractor/extractor.go +++ b/go/extractor/extractor.go @@ -59,6 +59,44 @@ func init() { } } +// isExactTestPackage checks if a package ID represents an exact test match. +// Returns true for IDs like "github.com/foo/bar [github.com/foo/bar.test]" +// Returns false for IDs like "github.com/foo/bar [github.com/foo/bar/nested.test]" +func isExactTestPackage(pkg *packages.Package) bool { + // Test packages have IDs in the format: "pkgpath [pkgpath.test]" + // or for nested test dependencies: "pkgpath [pkgpath/nested.test]" + if !strings.Contains(pkg.ID, " [") { + return false + } + expectedTestID := pkg.PkgPath + " [" + pkg.PkgPath + ".test]" + return pkg.ID == expectedTestID +} + +// isBetterPackage determines if pkg is a better choice than current for extraction. +// Preferences: +// 1. Exact test package (e.g., "pkg [pkg.test]") over nested test dependencies +// 2. More Syntax nodes (more files to extract) +// 3. Longer ID string as tiebreaker +func isBetterPackage(pkg, current *packages.Package) bool { + pkgIsExact := isExactTestPackage(pkg) + currentIsExact := isExactTestPackage(current) + + // Prefer exact test packages + if pkgIsExact != currentIsExact { + return pkgIsExact + } + + // Prefer packages with more syntax nodes (more files) + pkgSyntaxCount := len(pkg.Syntax) + currentSyntaxCount := len(current.Syntax) + if pkgSyntaxCount != currentSyntaxCount { + return pkgSyntaxCount > currentSyntaxCount + } + + // Fall back to string length + return len(pkg.ID) > len(current.ID) +} + // ExtractWithFlags extracts the packages specified by the given patterns and build flags func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, sourceRoot string) error { startTime := time.Now() @@ -153,20 +191,22 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, pkgsNotFound := make([]string, 0, len(pkgs)) - // Build a map from package paths to their longest IDs-- + // Build a map from package paths to their best IDs-- // in the context of a `go test -c` compilation, we will see the same package more than // once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version // that contains and is used by test code. - // For our purposes it is simplest to just ignore the non-test version, since the test - // version seems to be a superset of it. - longestPackageIds := make(map[string]string) + // We prefer the version with the most complete test coverage, which is typically: + // 1. The exact test package (e.g., "pkg [pkg.test]") over nested test dependencies + // 2. The package with the most Syntax nodes (most files to extract) + // 3. The longest ID string as a tiebreaker + bestPackageIds := make(map[string]*packages.Package) packages.Visit(pkgs, nil, func(pkg *packages.Package) { - if longestIDSoFar, present := longestPackageIds[pkg.PkgPath]; present { - if len(pkg.ID) > len(longestIDSoFar) { - longestPackageIds[pkg.PkgPath] = pkg.ID + if bestSoFar, present := bestPackageIds[pkg.PkgPath]; present { + if isBetterPackage(pkg, bestSoFar) { + bestPackageIds[pkg.PkgPath] = pkg } } else { - longestPackageIds[pkg.PkgPath] = pkg.ID + bestPackageIds[pkg.PkgPath] = pkg } }) @@ -257,15 +297,15 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, // extract AST information for all packages packages.Visit(pkgs, nil, func(pkg *packages.Package) { - // If this is a variant of a package that also occurs with a longer ID, skip it; + // If this is a variant of a package that also occurs with a better ID, skip it; // otherwise we would extract the same file more than once including extracting the // body of methods twice, causing database inconsistencies. // - // We prefer the version with the longest ID because that is (so far as I know) always - // the version that defines more entities -- the only case I'm aware of being a test - // variant of a package, which includes test-only functions in addition to the complete - // contents of the main variant. - if pkg.ID != longestPackageIds[pkg.PkgPath] { + // We prefer the version with the most complete test coverage, prioritizing: + // 1. Exact test packages (e.g., "pkg [pkg.test]") over nested test dependencies + // 2. Packages with more Syntax nodes (more files to extract) + // 3. Longer ID strings as a tiebreaker + if pkg.ID != bestPackageIds[pkg.PkgPath].ID { return } diff --git a/go/extractor/extractor_test.go b/go/extractor/extractor_test.go new file mode 100644 index 00000000000..54f9dac1c12 --- /dev/null +++ b/go/extractor/extractor_test.go @@ -0,0 +1,255 @@ +package extractor + +import ( + "go/ast" + "testing" + + "golang.org/x/tools/go/packages" +) + +func TestIsExactTestPackage(t *testing.T) { + tests := []struct { + name string + pkgID string + pkgPath string + expected bool + }{ + { + name: "exact test package", + pkgID: "github.com/foo/bar [github.com/foo/bar.test]", + pkgPath: "github.com/foo/bar", + expected: true, + }, + { + name: "nested test package", + pkgID: "github.com/foo/bar [github.com/foo/bar/nested.test]", + pkgPath: "github.com/foo/bar", + expected: false, + }, + { + name: "deeply nested test package", + pkgID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]", + pkgPath: "github.com/go-git/go-git/v6", + expected: false, + }, + { + name: "exact test package with version", + pkgID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]", + pkgPath: "github.com/go-git/go-git/v6", + expected: true, + }, + { + name: "non-test package", + pkgID: "github.com/foo/bar", + pkgPath: "github.com/foo/bar", + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pkg := &packages.Package{ + ID: tt.pkgID, + PkgPath: tt.pkgPath, + } + result := isExactTestPackage(pkg) + if result != tt.expected { + t.Errorf("isExactTestPackage(%q) = %v, want %v", tt.pkgID, result, tt.expected) + } + }) + } +} + +func TestIsBetterPackage(t *testing.T) { + // Helper to create a package with specified properties + makePkg := func(id, path string, syntaxCount int) *packages.Package { + syntax := make([]*ast.File, syntaxCount) + return &packages.Package{ + ID: id, + PkgPath: path, + Syntax: syntax, + } + } + + tests := []struct { + name string + pkg *packages.Package + current *packages.Package + expected bool // true if pkg is better than current + }{ + { + name: "exact test package beats nested test package", + pkg: makePkg( + "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]", + "github.com/go-git/go-git/v6", + 39, // 19 production + 20 test files + ), + current: makePkg( + "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]", + "github.com/go-git/go-git/v6", + 19, // production files only + ), + expected: true, + }, + { + name: "nested test package loses to exact test package", + pkg: makePkg( + "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]", + "github.com/go-git/go-git/v6", + 19, + ), + current: makePkg( + "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]", + "github.com/go-git/go-git/v6", + 39, + ), + expected: false, + }, + { + name: "more syntax nodes wins when both are exact tests", + pkg: makePkg( + "github.com/foo/bar [github.com/foo/bar.test]", + "github.com/foo/bar", + 50, + ), + current: makePkg( + "github.com/foo/bar [github.com/foo/bar.test]", + "github.com/foo/bar", + 30, + ), + expected: true, + }, + { + name: "fewer syntax nodes loses when both are exact tests", + pkg: makePkg( + "github.com/foo/bar [github.com/foo/bar.test]", + "github.com/foo/bar", + 30, + ), + current: makePkg( + "github.com/foo/bar [github.com/foo/bar.test]", + "github.com/foo/bar", + 50, + ), + expected: false, + }, + { + name: "more syntax nodes wins when both are nested tests", + pkg: makePkg( + "github.com/foo/bar [github.com/foo/bar/pkg1.test]", + "github.com/foo/bar", + 25, + ), + current: makePkg( + "github.com/foo/bar [github.com/foo/bar/pkg2.test]", + "github.com/foo/bar", + 20, + ), + expected: true, + }, + { + name: "longer ID wins when same syntax count", + pkg: makePkg( + "github.com/foo/bar [github.com/foo/bar/verylongpackagename.test]", + "github.com/foo/bar", + 20, + ), + current: makePkg( + "github.com/foo/bar [github.com/foo/bar/short.test]", + "github.com/foo/bar", + 20, + ), + expected: true, + }, + { + name: "test package beats non-test with same syntax count", + pkg: makePkg( + "github.com/foo/bar [github.com/foo/bar.test]", + "github.com/foo/bar", + 20, + ), + current: makePkg( + "github.com/foo/bar", + "github.com/foo/bar", + 20, + ), + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isBetterPackage(tt.pkg, tt.current) + if result != tt.expected { + t.Errorf("isBetterPackage() = %v, want %v\n pkg: %q (%d syntax nodes)\n current: %q (%d syntax nodes)", + result, tt.expected, + tt.pkg.ID, len(tt.pkg.Syntax), + tt.current.ID, len(tt.current.Syntax)) + } + }) + } +} + +// TestPackageSelectionRealWorld simulates the real-world go-git scenario +func TestPackageSelectionRealWorld(t *testing.T) { + // Simulate the actual packages.Load result for go-git repository + // when EXTRACT_TESTS=true + pkgs := []*packages.Package{ + // Production package only + { + ID: "github.com/go-git/go-git/v6", + PkgPath: "github.com/go-git/go-git/v6", + Syntax: make([]*ast.File, 19), // 19 production files + }, + // Root test package - this is what we want! + { + ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]", + PkgPath: "github.com/go-git/go-git/v6", + Syntax: make([]*ast.File, 39), // 19 production + 20 test files + }, + // Nested test dependency 1 + { + ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]", + PkgPath: "github.com/go-git/go-git/v6", + Syntax: make([]*ast.File, 19), // production files only (dependency) + }, + // Nested test dependency 2 + { + ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/object.test]", + PkgPath: "github.com/go-git/go-git/v6", + Syntax: make([]*ast.File, 19), // production files only (dependency) + }, + } + + // Simulate the bestPackageIds selection logic + bestPackageIds := make(map[string]*packages.Package) + for _, pkg := range pkgs { + if bestSoFar, present := bestPackageIds[pkg.PkgPath]; present { + if isBetterPackage(pkg, bestSoFar) { + bestPackageIds[pkg.PkgPath] = pkg + } + } else { + bestPackageIds[pkg.PkgPath] = pkg + } + } + + // Verify the correct package was selected + selected := bestPackageIds["github.com/go-git/go-git/v6"] + expectedID := "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]" + expectedSyntaxCount := 39 + + if selected.ID != expectedID { + t.Errorf("Wrong package selected!\n got: %q (%d syntax nodes)\n want: %q (%d syntax nodes)", + selected.ID, len(selected.Syntax), + expectedID, expectedSyntaxCount) + } + + if len(selected.Syntax) != expectedSyntaxCount { + t.Errorf("Wrong syntax count: got %d, want %d", len(selected.Syntax), expectedSyntaxCount) + } + + // Verify it's recognized as an exact test package + if !isExactTestPackage(selected) { + t.Errorf("Selected package %q should be recognized as exact test package", selected.ID) + } +}