Extract packages more intelligently

We now extract packages that have the same module root as the specified packages, as determined by
the `go list` command.
This commit is contained in:
Sauyon Lee
2020-05-22 07:52:29 -07:00
parent 6883a97628
commit f197975c6e
2 changed files with 136 additions and 17 deletions

View File

@@ -52,12 +52,42 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {
extractUniverseScope()
// a map of package path to package root directory (currently the module root or the source directory)
pkgRoots := make(map[string]string)
// a map of package path to source code directory
pkgDirs := make(map[string]string)
// root directories of packages that we want to extract
wantedRoots := make(map[string]bool)
for _, pkg := range pkgs {
mdir := util.GetModDir(pkg.PkgPath)
pdir := util.GetPkgDir(pkg.PkgPath)
if mdir == "" {
mdir = pdir
}
if mdir == "" {
log.Fatalf("Unable to get a source directory for input package %s.", pkg.PkgPath)
}
pkgRoots[pkg.PkgPath] = mdir
pkgDirs[pkg.PkgPath] = pdir
wantedRoots[mdir] = true
}
// recursively visit all packages in depth-first order;
// on the way down, associate each package scope with its corresponding package,
// and on the way up extract the package's scope
packages.Visit(pkgs, func(pkg *packages.Package) bool {
return true
}, func(pkg *packages.Package) {
if _, ok := pkgRoots[pkg.PkgPath]; !ok {
mdir := util.GetModDir(pkg.PkgPath)
pdir := util.GetPkgDir(pkg.PkgPath)
if mdir == "" {
mdir = pdir
}
pkgRoots[pkg.PkgPath] = mdir
pkgDirs[pkg.PkgPath] = pdir
}
tw, err := trap.NewWriter(pkg.PkgPath, pkg)
if err != nil {
log.Fatal(err)
@@ -101,22 +131,6 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {
log.Printf("Max goroutines set to %d", maxgoroutines)
}
var wg sync.WaitGroup
// this semaphore is used to limit the number of files that are open at once;
// this is to prevent the extractor from running into issues with caps on the
// number of open files that can be held by one process
fdSem := newSemaphore(100)
// this semaphore is used to limit the number of goroutines spawned, so we
// don't run into memory issues
goroutineSem := newSemaphore(maxgoroutines)
// extract AST information for all packages
for _, pkg := range pkgs {
extractPackage(pkg, &wg, goroutineSem, fdSem)
}
wg.Wait()
cwd, err := os.Getwd()
if err != nil {
log.Printf("Warning: unable to get working directory: %s", err.Error())
@@ -154,6 +168,47 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error {
log.Printf("Done extracting %s (%dms)", path, end.Nanoseconds()/1000000)
}
var wg sync.WaitGroup
// this semaphore is used to limit the number of files that are open at once;
// this is to prevent the extractor from running into issues with caps on the
// number of open files that can be held by one process
fdSem := newSemaphore(100)
// this semaphore is used to limit the number of goroutines spawned, so we
// don't run into memory issues
goroutineSem := newSemaphore(maxgoroutines)
// extract AST information for all packages
packages.Visit(pkgs, func(pkg *packages.Package) bool {
return wantedRoots[pkgRoots[pkg.PkgPath]]
}, func(pkg *packages.Package) {
rootLoop:
for root, _ := range wantedRoots {
relDir, err := filepath.Rel(root, pkgDirs[pkg.PkgPath])
if err != nil {
// if the paths can't be made relative, skip it
continue
}
dirList := strings.Split(relDir, string(filepath.Separator))
if len(dirList) == 0 || dirList[0] != ".." {
// if dirList is empty, root is the same as the source dir
// if dirList starts with `".."`, it is not inside the root dir
for _, dir := range dirList {
if dir == "vendor" {
// if the path relative to the root contains vendor, continue
//
// we may want to extract the package if it's been explicitly included
// (i.e. it has been passed directly), but we shouldn't include it for
// this root
continue rootLoop
}
}
extractPackage(pkg, &wg, goroutineSem, fdSem)
}
}
})
wg.Wait()
return nil
}

View File

@@ -1,6 +1,12 @@
package util
import "os"
import (
"log"
"os"
"os/exec"
"path/filepath"
"strings"
)
// Getenv retrieves the value of the environment variable named by the key.
// If that variable is not present, it iterates over the given aliases until
@@ -18,3 +24,61 @@ func Getenv(key string, aliases ...string) string {
}
return ""
}
// GetModDir gets directory of the module containing the package with path `pkgpath`.
func GetModDir(pkgpath string) string {
mod, err := exec.Command("go", "list", "-e", "-f", "{{.Module}}", pkgpath).Output()
if err != nil {
if err, ok := err.(*exec.ExitError); ok {
log.Printf("Warning: go list command failed, output below:\n%s%s", mod, err.Stderr)
} else {
log.Printf("Warning: Failed to run go list: %s", err.Error())
}
return ""
}
if strings.TrimSpace(string(mod)) == "<nil>" {
// if modules aren't being used, return nothing
return ""
}
modDir, err := exec.Command("go", "list", "-e", "-f", "{{.Module.Dir}}", pkgpath).Output()
if err != nil {
if err, ok := err.(*exec.ExitError); ok {
log.Printf("Warning: go list command failed, output below:\n%s%s", modDir, err.Stderr)
} else {
log.Printf("Warning: Failed to run go list: %s", err.Error())
}
return ""
}
trimmed := strings.TrimSpace(string(modDir))
abs, err := filepath.Abs(trimmed)
if err != nil {
log.Printf("Warning: unable to make %s absolute: %s", trimmed, err.Error())
}
return abs
}
// GetPkgDir gets directory containing the package with path `pkgpath`.
func GetPkgDir(pkgpath string) string {
pkgDir, err := exec.Command("go", "list", "-e", "-f", "{{.Dir}}", pkgpath).Output()
if err != nil {
if err, ok := err.(*exec.ExitError); ok {
log.Printf("Warning: go list command failed, output below:\n%s%s", pkgDir, err.Stderr)
} else {
log.Printf("Warning: Failed to run go list: %s", err.Error())
}
return ""
}
trimmed := strings.TrimSpace(string(pkgDir))
abs, err := filepath.Abs(trimmed)
if err != nil {
log.Printf("Warning: unable to make %s absolute: %s", trimmed, err.Error())
}
return abs
}