From d956f47db31d3b8bce7c74ede0de43ec872e2099 Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Fri, 16 Aug 2024 14:27:46 -0700 Subject: [PATCH] Fix: Produce complete SARIF output in agent The problem was missing fields in the SARIF output. After the debugging below, the cause were conversions from JSON to Go to JSON; the Go -> JSON conversion only output the fields defined in the Go struct. Because SARIF has so many optional fields, no attempt is made to enforce a statically-defined structure. Instead, the JSON -> Go conversion is now to a fully dynamic structure; unused fields are simply passed through Debugging: Comparing two SARIF files shows { "$schema" : "https://json.schemastore.org/sarif-2.1.0.json", "version" : "2.1.0", "runs" : [ {... } ] } and { "runs": [... ] } so there are missing fields. The Problem 1. Problem origin // Modify the sarif: start by extracting var sarif Sarif if err := json.Unmarshal(sarifData, &sarif); err != nil { return nil, fmt.Errorf("failed to unmarshal SARIF: %v", err) } ... // now inject version control info ... // and write it back sarifBytes, err := json.Marshal(sarif) if err != nil { return nil, fmt.Errorf("failed to marshal SARIF: %v", err) } 2. But the struct only has one of the needed fields type Sarif struct { Runs []SarifRun `json:"runs"` } 3. From the docs: // To unmarshal JSON into a struct, Unmarshal matches incoming object // keys to the keys used by [Marshal] (either the struct field name or its tag), // preferring an exact match but also accepting a case-insensitive match. By // default, object keys which don't have a corresponding struct field are // ignored (see [Decoder.DisallowUnknownFields] for an alternative). --- pkg/codeql/codeql.go | 61 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/pkg/codeql/codeql.go b/pkg/codeql/codeql.go index c5d65bf..d521c6e 100644 --- a/pkg/codeql/codeql.go +++ b/pkg/codeql/codeql.go @@ -476,27 +476,60 @@ func generateSarif(codeql CodeqlCli, language queue.QueryLanguage, databasePath, return nil, fmt.Errorf("failed to read SARIF file: %v", err) } - // Modify the sarif: start by extracting - var sarif Sarif - if err := json.Unmarshal(sarifData, &sarif); err != nil { - return nil, fmt.Errorf("failed to unmarshal SARIF: %v", err) + // Modify the sarif: start by extracting a fully dynamic structure. + // Previous attempts with a fully typechecked SARIF input have failed; + // there are simply too many optional fields that may vanish or appear at + // any time. + + // Read json into a fully dynamic Go structure, check for the presence of an + // array 'Runs' in the top-level object, iterate over 'Runs' and for every + // entry append to the field 'VersionControlProvenance' + + // Decode JSON into a map for a fully dynamic structure + var data map[string]interface{} + if err := json.Unmarshal([]byte(sarifData), &data); err != nil { + slog.Error("Error decoding SARIF", "err", err) + return nil, err } - // now inject version control info - for _, run := range sarif.Runs { - run.VersionControlProvenance = append(run.VersionControlProvenance, map[string]interface{}{ - "repositoryUri": fmt.Sprintf("%s/%s", os.Getenv("GITHUB_SERVER_URL"), language), - "revisionId": databaseSHA, - }) + // Check if the "Runs" array exists + if runs, ok := data["Runs"].([]interface{}); ok { + for _, run := range runs { + if runMap, ok := run.(map[string]interface{}); ok { + // Check and append to the "VersionControlProvenance" field + if vcp, ok := runMap["VersionControlProvenance"].([]interface{}); ok { + // Array exists, add an entry + runMap["VersionControlProvenance"] = append(vcp, + map[string]interface{}{ + "repositoryUri": fmt.Sprintf("%s/%s", + os.Getenv("GITHUB_SERVER_URL"), language), + "revisionId": databaseSHA, + }) + } else { + // No array, provide one and the entry + runMap["VersionControlProvenance"] = []map[string]interface{}{ + { + "repositoryUri": fmt.Sprintf("%s/%s", + os.Getenv("GITHUB_SERVER_URL"), language), + "revisionId": databaseSHA, + }, + } + } + } + } } - // and write it back - sarifBytes, err := json.Marshal(sarif) + // Encode the modified structure back to JSON + modifiedJSON, err := json.MarshalIndent(data, "", " ") if err != nil { - return nil, fmt.Errorf("failed to marshal SARIF: %v", err) + slog.Error("Unable to re-encode SARIF to JSON", "err", err) + return nil, err } - return sarifBytes, nil + slog.Debug("XX: sarif original", "sarif=", sarifData) + slog.Debug("XX: sarif modified", "modifiedJSON=", modifiedJSON) + + return modifiedJSON, nil } // XX: inlined this function