wip: Handle varying CodeQL DB formats. This code contains debugging features

This patch fixes the following

     - [X] Wrong db metadata path.  Fixed via
       : globRecursively(databasePath, "codeql-database.yml")

       The log output for reference:

                 agent          | 2024/08/09 21:16:40 DEBUG XX:getDataBaseMetadata databasePath=/tmp/ce523549-a217-4b54-a118-7224ce444870/db "Waiting for SIGUSR1 or SIGUSR2..."=<nil>
                 agent          | 2024/08/09 21:16:40 DEBUG XX:getDataBaseMetadata databasePath=/tmp/bc24fe72-b520-4e72-9634-a98d630cb75e/db "Waiting for SIGUSR1 or SIGUSR2..."=<nil>
                 agent          | 2024/08/09 21:16:40 DEBUG Received signal: %s "user defined signal 1"=<nil>
                 agent          | 2024/08/09 21:16:40 DEBUG XX:getDataBaseMetadata databasePath=/tmp/41fcf5cc-e151-4a11-bccc-481d599aa426/db "Waiting for SIGUSR1 or SIGUSR2..."=<nil>

            From

                 func getDatabaseMetadata(databasePath string) (*DatabaseMetadata, error) {
                 data, err := os.ReadFile(filepath.Join(databasePath, "codeql-database.yml"))
                 ...}

            And some inspection:

                 root@3fa4b8013336:~# find /tmp |grep ql-datab
                 /tmp/27f09b9f-254f-4ef5-abf5-9a1a2927906b/db/cpp/codeql-database.yml
                 /tmp/d7e14cd4-8789-4176-81bc-2ac1957ed9fd/db/codeql_db/codeql-database.yml
                 /tmp/41fcf5cc-e151-4a11-bccc-481d599aa426/db/codeql_db/codeql-database.yml
                 /tmp/bc24fe72-b520-4e72-9634-a98d630cb75e/db/codeql_db/codeql-database.yml
                 /tmp/ce523549-a217-4b54-a118-7224ce444870/db/codeql_db/codeql-database.yml

     - [X] Wrong db path.  Fixed via
       : findDBDir(databasePath)

       The log output for reference:

                 agent          | 2024/08/09 21:51:09 ERROR Failed to run analysis job error="failed to run analysis: failed to run queries: exit status 2\nOutput: A fatal error occurred: /tmp/91c61e0b-dfd9-4dd3-a3ad-cb77dbc1cbfd/db is not a recognized CodeQL database.\n"
                 agent          | 2024/08/09 21:51:09 INFO Running analysis job job="{Spec:{SessionID:1 NameWithOwner:{Owner:USCiLab Repo:cerealctsj264953}} QueryPackLocation:{Key:1 Bucket:packs} QueryLanguage:cpp}"
                 agent          | 2024/08/09 21:51:09 ERROR Failed to run analysis job error="failed to run analysis: failed to run queries: exit status 2\nOutput: A fatal error occurred: /tmp/1b8ffeba-8ad1-465e-8ec7-36cda449a5f5/db is not a recognized CodeQL database.\n"
                 ...

            This is easily confirmed:

                 root@171b5417e05f:~# /opt/codeql/codeql database upgrade  /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2/
                 A fatal error occurred: /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2 is not a recognized CodeQL database.

            Another try:

                 root@171b5417e05f:~# /opt/codeql/codeql database upgrade  /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2/database.zip
                 A fatal error occurred: Database root /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2/database.zip is not a directory.

             This one is correct:

                 root@171b5417e05f:~# /opt/codeql/codeql database upgrade /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2/db/codeql_db
                 /tmp/7ed27578-d7ea-42e0-902a-effbc4df05f2/db/codeql_db/db-cpp is up to date.

     - [X] Wrong database source prefix.  Also fixed via
       : findDBDir(databasePath)

       Similar log entries:

                 agent          | 2024/08/13 15:40:14 ERROR Failed to run analysis job error="failed to run analysis: failed to get source location prefix: failed to resolve database: exit status 2\nOutput: A fatal error occurred: /tmp/da420844-a284-4d82-9470-fa189a5b4ee6/db is not a recognized CodeQL database.\n"
                 agent          | 2024/08/13 15:40:14 INFO Worker stopping due to reduction in worker count
                 agent          | 2024/08/13 15:40:18 ERROR Failed to run analysis job error="failed to run analysis: failed to get source location prefix: failed to resolve database: exit status 2\nOutput: A fatal error occurred: /tmp/eebfc52c-3ecf-490d-bbf4-23c305d6ba18/db is not a recognized CodeQL database.\n"

            and
                 agent          | 2024/08/13 15:49:33 ERROR Failed to resolve database err="exit status 2" output="A fatal error occurred: /tmp/b5c4941a-5692-4640-aa79-9810bcab39f4/db is not a recognized CodeQL database.\n"
                 agent          | 2024/08/13 15:49:33 DEBUG XX: RunQuery failed to get source location prefixdatabasePath=/tmp/b5c4941a-5692-4640-aa79-9810bcab39f4/db "Waiting for SIGUSR1 or SIGUSR2..."=<nil>
                 agent          | 2024/08/13 15:49:35 INFO Modifying worker count current=3 new=2
                 agent          | 2024/08/13 15:49:35 ERROR Failed to resolve database err="exit status 2" output="A fatal error occurred: /tmp/eda30582-81a3-4582-8897-65f8904e8501/db is not a recognized CodeQL database.\n"
                 agent          | 2024/08/13 15:49:35 DEBUG XX: RunQuery failed to get source location prefixdatabasePath=/tmp/eda30582-81a3-4582-8897-65f8904e8501/db "Waiting for SIGUSR1 or SIGUSR2..."=<nil>

            And this fails

                 root@51464985499f:~# /opt/codeql/codeql resolve database /tmp/eda30582-81a3-4582-8897-65f8904e8501/db/
                 A fatal error occurred: /tmp/eda30582-81a3-4582-8897-65f8904e8501/db is not a recognized CodeQL database.

            But this works:

                 root@51464985499f:~# /opt/codeql/codeql resolve database /tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/
                 {
                   "sourceLocationPrefix" : "/home/runner/work/bulk-builder/bulk-builder",
                   "columnKind" : "utf8",
                   "unicodeNewlines" : false,
                   "sourceArchiveZip" : "/tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/src.zip",
                   "sourceArchiveRoot" : "/tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/src",
                   "datasetFolder" : "/tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/db-cpp",
                   "logsFolder" : "/tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/log",
                   "languages" : [
                     "cpp"
                   ],
                   "scratchDir" : "/tmp/eda30582-81a3-4582-8897-65f8904e8501/db/codeql_db/working"
                }
This commit is contained in:
Michael Hohn
2024-08-13 09:22:24 -07:00
committed by =Michael Hohn
parent 35100f89a7
commit 9d60489908

View File

@@ -4,6 +4,7 @@ import (
"archive/zip" "archive/zip"
"bytes" "bytes"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"log" "log"
@@ -12,7 +13,10 @@ import (
"mrvacommander/utils" "mrvacommander/utils"
"os" "os"
"os/exec" "os/exec"
"os/signal"
"path/filepath" "path/filepath"
"strings"
"syscall"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
@@ -82,11 +86,9 @@ func addFileToZip(zipWriter *zip.Writer, filePath, zipPath string) error {
return nil return nil
} }
func RunQuery(database string, language queue.QueryLanguage, queryPackPath string, tempDir string) (*RunQueryResult, error) { func RunQuery(database string, language queue.QueryLanguage,
queryPackPath string, tempDir string) (*RunQueryResult, error) {
path, err := getCodeQLCLIPath() path, err := getCodeQLCLIPath()
// XX: is nwo a name/owner, or the original callers' queryLanguage?
slog.Debug("XX: is nwo a name/owner, or the original callers' queryLanguage?",
"language", language)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get codeql cli path: %v", err) return nil, fmt.Errorf("failed to get codeql cli path: %v", err)
@@ -119,18 +121,26 @@ func RunQuery(database string, language queue.QueryLanguage, queryPackPath strin
databaseSHA = *dbMetadata.CreationMetadata.SHA databaseSHA = *dbMetadata.CreationMetadata.SHA
} }
cmd := exec.Command(codeql.Path, "database", "run-queries", "--ram=2048", "--additional-packs", queryPackPath, "--", databasePath, queryPackPath) dbDir, err := findDBDir(databasePath)
if err != nil {
return nil, fmt.Errorf("failed to get database path: %v", err)
}
cmd := exec.Command(codeql.Path, "database", "run-queries",
"--ram=2048", "--additional-packs", queryPackPath, "--", dbDir, queryPackPath)
if output, err := cmd.CombinedOutput(); err != nil { if output, err := cmd.CombinedOutput(); err != nil {
awaitSignal("XX: RunQuery: ", string(output))
return nil, fmt.Errorf("failed to run queries: %v\nOutput: %s", err, output) return nil, fmt.Errorf("failed to run queries: %v\nOutput: %s", err, output)
} }
queryPackRunResults, err := getQueryPackRunResults(codeql, databasePath, queryPackPath) queryPackRunResults, err := getQueryPackRunResults(codeql, dbDir, queryPackPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get query pack run results: %v", err) return nil, fmt.Errorf("failed to get query pack run results: %v", err)
} }
sourceLocationPrefix, err := getSourceLocationPrefix(codeql, databasePath) sourceLocationPrefix, err := getSourceLocationPrefix(codeql, dbDir)
if err != nil { if err != nil {
awaitSignal("XX: RunQuery ", "failed to get source location prefix ", "databasePath=", dbDir)
return nil, fmt.Errorf("failed to get source location prefix: %v", err) return nil, fmt.Errorf("failed to get source location prefix: %v", err)
} }
@@ -146,7 +156,7 @@ func RunQuery(database string, language queue.QueryLanguage, queryPackPath strin
var sarifFilePath string var sarifFilePath string
if shouldGenerateSarif { if shouldGenerateSarif {
sarif, err := generateSarif(codeql, language, databasePath, queryPackPath, databaseSHA, resultsDir) sarif, err := generateSarif(codeql, language, dbDir, queryPackPath, databaseSHA, resultsDir)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to generate SARIF: %v", err) return nil, fmt.Errorf("failed to generate SARIF: %v", err)
} }
@@ -176,9 +186,114 @@ func RunQuery(database string, language queue.QueryLanguage, queryPackPath strin
}, nil }, nil
} }
func getDatabaseMetadata(databasePath string) (*DatabaseMetadata, error) { // awaitSignal()
data, err := os.ReadFile(filepath.Join(databasePath, "codeql-database.yml")) //
// Debugging support function. Freezes the current goroutine / function and
// waits for an external signal before continuing.
// Use
//
// pkill -SIGUSR1 mrva_agent
//
// in the container to continue
func awaitSignal(strs ...string) {
msg := strings.Join(strs, "")
sigChan := make(chan os.Signal, 1)
// Notify the channel on SIGUSR1 or SIGUSR2
signal.Notify(sigChan, syscall.SIGUSR1, syscall.SIGUSR2)
// Pause the program and wait for a signal
slog.Debug(msg, "Waiting for SIGUSR1 or SIGUSR2...", nil)
sig := <-sigChan
// Handle the signal
slog.Debug("Received signal: %s", sig.String(), nil)
}
// findDBDir(rootdir)
//
// Find a subdirectory of `rootdir` that contains the files `codeql-database.yml`
// and `src.zip` and return its absolute path
func findDBDir(rootdir string) (string, error) {
var dbDir string
err := filepath.Walk(rootdir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Check if it's a directory
if info.IsDir() {
codeqlPath := filepath.Join(path, "codeql-database.yml")
srcZipPath := filepath.Join(path, "src.zip")
// Check if both files exist in this directory
if _, err := os.Stat(codeqlPath); err == nil {
if _, err := os.Stat(srcZipPath); err == nil {
dbDir = path
// Stop walking once we've found the directory
return filepath.SkipDir
}
}
}
return nil
})
if err != nil { if err != nil {
slog.Error("findDBDir: Problem in traversing directory:", "rootdir",
rootdir, "err", err)
return "", err
}
if dbDir == "" {
slog.Error("Unable to find CodeQL DB directory in database zip", "rootdir", rootdir)
return "", errors.New("no directory containing both 'codeql-database.yml' and 'src.zip' found")
}
return dbDir, nil
}
// Recursively search for files matching the glob pattern starting at rootdir
func globRecursively(rootdir string, pattern string) ([]string, error) {
var matches []string
// Walk the directory tree
err := filepath.Walk(rootdir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Check if the file name matches the glob pattern
match, err := filepath.Match(pattern, info.Name())
if err != nil {
return err
}
if !info.IsDir() && match {
matches = append(matches, path)
}
return nil
})
if err != nil {
return nil, err
}
return matches, nil
}
func getDatabaseMetadata(databasePath string) (*DatabaseMetadata, error) {
paths, err := globRecursively(databasePath, "codeql-database.yml")
if err != nil {
return nil, fmt.Errorf("Failed to find codeql-database.yml %v", err)
}
if len(paths) != 1 {
return nil, fmt.Errorf("Found wrong number of paths to codeql-database.yml: %s", paths)
}
data, err := os.ReadFile(paths[0])
if err != nil {
awaitSignal("XX: getDataBaseMetadata ", "databasePath=", databasePath)
return nil, fmt.Errorf("failed to read database metadata: %v", err) return nil, fmt.Errorf("failed to read database metadata: %v", err)
} }
@@ -317,6 +432,7 @@ func getSourceLocationPrefix(codeql CodeqlCli, databasePath string) (string, err
cmd := exec.Command(codeql.Path, "resolve", "database", databasePath) cmd := exec.Command(codeql.Path, "resolve", "database", databasePath)
output, err := cmd.CombinedOutput() output, err := cmd.CombinedOutput()
if err != nil { if err != nil {
slog.Error("Failed to resolve database", "err", err, "output", output)
return "", fmt.Errorf("failed to resolve database: %v\nOutput: %s", err, output) return "", fmt.Errorf("failed to resolve database: %v\nOutput: %s", err, output)
} }