From 9d60489908cc23a25d4f951680e8daacc06fa00e Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Tue, 13 Aug 2024 09:22:24 -0700 Subject: [PATCH] wip: Handle varying CodeQL DB formats. This code contains debugging features This patch fixes the following - [X] Wrong db metadata path. Fixed via : globRecursively(databasePath, "codeql-database.yml") The log output for reference: agent | 2024/08/09 21:16:40 DEBUG XX:getDataBaseMetadata databasePath=/tmp/ce523549-a217-4b54-a118-7224ce444870/db "Waiting for SIGUSR1 or SIGUSR2..."= agent | 2024/08/09 21:16:40 DEBUG XX:getDataBaseMetadata databasePath=/tmp/bc24fe72-b520-4e72-9634-a98d630cb75e/db "Waiting for SIGUSR1 or SIGUSR2..."= agent | 2024/08/09 21:16:40 DEBUG Received signal: %s "user defined signal 1"= agent | 2024/08/09 21:16:40 DEBUG XX:getDataBaseMetadata databasePath=/tmp/41fcf5cc-e151-4a11-bccc-481d599aa426/db "Waiting for SIGUSR1 or SIGUSR2..."= From func getDatabaseMetadata(databasePath string) (*DatabaseMetadata, error) { data, err := os.ReadFile(filepath.Join(databasePath, "codeql-database.yml")) ...} And some inspection: root@3fa4b8013336:~# find /tmp |grep ql-datab /tmp/27f09b9f-254f-4ef5-abf5-9a1a2927906b/db/cpp/codeql-database.yml /tmp/d7e14cd4-8789-4176-81bc-2ac1957ed9fd/db/codeql_db/codeql-database.yml /tmp/41fcf5cc-e151-4a11-bccc-481d599aa426/db/codeql_db/codeql-database.yml /tmp/bc24fe72-b520-4e72-9634-a98d630cb75e/db/codeql_db/codeql-database.yml /tmp/ce523549-a217-4b54-a118-7224ce444870/db/codeql_db/codeql-database.yml - [X] Wrong db path. Fixed via : findDBDir(databasePath) The log output for reference: agent | 2024/08/09 21:51:09 ERROR Failed to run analysis job error="failed to run analysis: failed to run queries: exit status 2\nOutput: A fatal error occurred: /tmp/91c61e0b-dfd9-4dd3-a3ad-cb77dbc1cbfd/db is not a recognized CodeQL database.\n" agent | 2024/08/09 21:51:09 INFO Running analysis job job="{Spec:{SessionID:1 NameWithOwner:{Owner:USCiLab Repo:cerealctsj264953}} QueryPackLocation:{Key:1 Bucket:packs} QueryLanguage:cpp}" agent | 2024/08/09 21:51:09 ERROR Failed to run analysis job error="failed to run analysis: failed to run queries: exit status 2\nOutput: A fatal error occurred: /tmp/1b8ffeba-8ad1-465e-8ec7-36cda449a5f5/db is not a recognized CodeQL database.\n" ... This is easily confirmed: root@171b5417e05f:~# /opt/codeql/codeql database upgrade /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2/ A fatal error occurred: /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2 is not a recognized CodeQL database. Another try: root@171b5417e05f:~# /opt/codeql/codeql database upgrade /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2/database.zip A fatal error occurred: Database root /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2/database.zip is not a directory. This one is correct: root@171b5417e05f:~# /opt/codeql/codeql database upgrade /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2/db/codeql_db /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2/db/codeql_db/db-cpp is up to date. - [X] Wrong database source prefix. Also fixed via : findDBDir(databasePath) Similar log entries: agent | 2024/08/13 15:40:14 ERROR Failed to run analysis job error="failed to run analysis: failed to get source location prefix: failed to resolve database: exit status 2\nOutput: A fatal error occurred: /tmp/da420844-a284-4d82-9470-fa189a5b4ee6/db is not a recognized CodeQL database.\n" agent | 2024/08/13 15:40:14 INFO Worker stopping due to reduction in worker count agent | 2024/08/13 15:40:18 ERROR Failed to run analysis job error="failed to run analysis: failed to get source location prefix: failed to resolve database: exit status 2\nOutput: A fatal error occurred: /tmp/eebfc52c-3ecf-490d-bbf4-23c305d6ba18/db is not a recognized CodeQL database.\n" and agent | 2024/08/13 15:49:33 ERROR Failed to resolve database err="exit status 2" output="A fatal error occurred: /tmp/b5c4941a-5692-4640-aa79-9810bcab39f4/db is not a recognized CodeQL database.\n" agent | 2024/08/13 15:49:33 DEBUG XX: RunQuery failed to get source location prefixdatabasePath=/tmp/b5c4941a-5692-4640-aa79-9810bcab39f4/db "Waiting for SIGUSR1 or SIGUSR2..."= agent | 2024/08/13 15:49:35 INFO Modifying worker count current=3 new=2 agent | 2024/08/13 15:49:35 ERROR Failed to resolve database err="exit status 2" output="A fatal error occurred: /tmp/eda30582-81a3-4582-8897-65f8904e8501/db is not a recognized CodeQL database.\n" agent | 2024/08/13 15:49:35 DEBUG XX: RunQuery failed to get source location prefixdatabasePath=/tmp/eda30582-81a3-4582-8897-65f8904e8501/db "Waiting for SIGUSR1 or SIGUSR2..."= And this fails root@51464985499f:~# /opt/codeql/codeql resolve database /tmp/eda30582-81a3-4582-8897-65f8904e8501/db/ A fatal error occurred: /tmp/eda30582-81a3-4582-8897-65f8904e8501/db is not a recognized CodeQL database. But this works: root@51464985499f:~# /opt/codeql/codeql resolve database /tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/ { "sourceLocationPrefix" : "/home/runner/work/bulk-builder/bulk-builder", "columnKind" : "utf8", "unicodeNewlines" : false, "sourceArchiveZip" : "/tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/src.zip", "sourceArchiveRoot" : "/tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/src", "datasetFolder" : "/tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/db-cpp", "logsFolder" : "/tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/log", "languages" : [ "cpp" ], "scratchDir" : "/tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/working" } --- pkg/codeql/codeql.go | 136 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 126 insertions(+), 10 deletions(-) diff --git a/pkg/codeql/codeql.go b/pkg/codeql/codeql.go index d4fc100..06f37f3 100644 --- a/pkg/codeql/codeql.go +++ b/pkg/codeql/codeql.go @@ -4,6 +4,7 @@ import ( "archive/zip" "bytes" "encoding/json" + "errors" "fmt" "io" "log" @@ -12,7 +13,10 @@ import ( "mrvacommander/utils" "os" "os/exec" + "os/signal" "path/filepath" + "strings" + "syscall" "gopkg.in/yaml.v3" ) @@ -82,11 +86,9 @@ func addFileToZip(zipWriter *zip.Writer, filePath, zipPath string) error { return nil } -func RunQuery(database string, language queue.QueryLanguage, queryPackPath string, tempDir string) (*RunQueryResult, error) { +func RunQuery(database string, language queue.QueryLanguage, + queryPackPath string, tempDir string) (*RunQueryResult, error) { path, err := getCodeQLCLIPath() - // XX: is nwo a name/owner, or the original callers' queryLanguage? - slog.Debug("XX: is nwo a name/owner, or the original callers' queryLanguage?", - "language", language) if err != nil { return nil, fmt.Errorf("failed to get codeql cli path: %v", err) @@ -119,18 +121,26 @@ func RunQuery(database string, language queue.QueryLanguage, queryPackPath strin databaseSHA = *dbMetadata.CreationMetadata.SHA } - cmd := exec.Command(codeql.Path, "database", "run-queries", "--ram=2048", "--additional-packs", queryPackPath, "--", databasePath, queryPackPath) + dbDir, err := findDBDir(databasePath) + if err != nil { + return nil, fmt.Errorf("failed to get database path: %v", err) + } + + cmd := exec.Command(codeql.Path, "database", "run-queries", + "--ram=2048", "--additional-packs", queryPackPath, "--", dbDir, queryPackPath) if output, err := cmd.CombinedOutput(); err != nil { + awaitSignal("XX: RunQuery: ", string(output)) return nil, fmt.Errorf("failed to run queries: %v\nOutput: %s", err, output) } - queryPackRunResults, err := getQueryPackRunResults(codeql, databasePath, queryPackPath) + queryPackRunResults, err := getQueryPackRunResults(codeql, dbDir, queryPackPath) if err != nil { return nil, fmt.Errorf("failed to get query pack run results: %v", err) } - sourceLocationPrefix, err := getSourceLocationPrefix(codeql, databasePath) + sourceLocationPrefix, err := getSourceLocationPrefix(codeql, dbDir) if err != nil { + awaitSignal("XX: RunQuery ", "failed to get source location prefix ", "databasePath=", dbDir) return nil, fmt.Errorf("failed to get source location prefix: %v", err) } @@ -146,7 +156,7 @@ func RunQuery(database string, language queue.QueryLanguage, queryPackPath strin var sarifFilePath string if shouldGenerateSarif { - sarif, err := generateSarif(codeql, language, databasePath, queryPackPath, databaseSHA, resultsDir) + sarif, err := generateSarif(codeql, language, dbDir, queryPackPath, databaseSHA, resultsDir) if err != nil { return nil, fmt.Errorf("failed to generate SARIF: %v", err) } @@ -176,9 +186,114 @@ func RunQuery(database string, language queue.QueryLanguage, queryPackPath strin }, nil } -func getDatabaseMetadata(databasePath string) (*DatabaseMetadata, error) { - data, err := os.ReadFile(filepath.Join(databasePath, "codeql-database.yml")) +// awaitSignal() +// +// Debugging support function. Freezes the current goroutine / function and +// waits for an external signal before continuing. +// Use +// +// pkill -SIGUSR1 mrva_agent +// +// in the container to continue +func awaitSignal(strs ...string) { + msg := strings.Join(strs, "") + sigChan := make(chan os.Signal, 1) + + // Notify the channel on SIGUSR1 or SIGUSR2 + signal.Notify(sigChan, syscall.SIGUSR1, syscall.SIGUSR2) + + // Pause the program and wait for a signal + slog.Debug(msg, "Waiting for SIGUSR1 or SIGUSR2...", nil) + sig := <-sigChan + + // Handle the signal + slog.Debug("Received signal: %s", sig.String(), nil) +} + +// findDBDir(rootdir) +// +// Find a subdirectory of `rootdir` that contains the files `codeql-database.yml` +// and `src.zip` and return its absolute path +func findDBDir(rootdir string) (string, error) { + var dbDir string + err := filepath.Walk(rootdir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Check if it's a directory + if info.IsDir() { + codeqlPath := filepath.Join(path, "codeql-database.yml") + srcZipPath := filepath.Join(path, "src.zip") + + // Check if both files exist in this directory + if _, err := os.Stat(codeqlPath); err == nil { + if _, err := os.Stat(srcZipPath); err == nil { + dbDir = path + // Stop walking once we've found the directory + return filepath.SkipDir + } + } + } + return nil + }) + if err != nil { + slog.Error("findDBDir: Problem in traversing directory:", "rootdir", + rootdir, "err", err) + return "", err + } + + if dbDir == "" { + slog.Error("Unable to find CodeQL DB directory in database zip", "rootdir", rootdir) + return "", errors.New("no directory containing both 'codeql-database.yml' and 'src.zip' found") + } + + return dbDir, nil +} + +// Recursively search for files matching the glob pattern starting at rootdir +func globRecursively(rootdir string, pattern string) ([]string, error) { + var matches []string + + // Walk the directory tree + err := filepath.Walk(rootdir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Check if the file name matches the glob pattern + match, err := filepath.Match(pattern, info.Name()) + if err != nil { + return err + } + + if !info.IsDir() && match { + matches = append(matches, path) + } + + return nil + }) + + if err != nil { + return nil, err + } + + return matches, nil +} + +func getDatabaseMetadata(databasePath string) (*DatabaseMetadata, error) { + paths, err := globRecursively(databasePath, "codeql-database.yml") + if err != nil { + return nil, fmt.Errorf("Failed to find codeql-database.yml %v", err) + } + if len(paths) != 1 { + return nil, fmt.Errorf("Found wrong number of paths to codeql-database.yml: %s", paths) + } + + data, err := os.ReadFile(paths[0]) + if err != nil { + awaitSignal("XX: getDataBaseMetadata ", "databasePath=", databasePath) return nil, fmt.Errorf("failed to read database metadata: %v", err) } @@ -317,6 +432,7 @@ func getSourceLocationPrefix(codeql CodeqlCli, databasePath string) (string, err cmd := exec.Command(codeql.Path, "resolve", "database", databasePath) output, err := cmd.CombinedOutput() if err != nil { + slog.Error("Failed to resolve database", "err", err, "output", output) return "", fmt.Errorf("failed to resolve database: %v\nOutput: %s", err, output) }