Fix: Produce complete SARIF output in agent

The problem was missing fields in the SARIF output.  After the debugging
below, the cause were conversions from JSON to Go to JSON; the Go ->
JSON conversion only output the fields defined in the Go struct.

Because SARIF has so many optional fields, no attempt is made to enforce
a statically-defined structure.  Instead, the JSON -> Go conversion is
now to a fully dynamic structure; unused fields are simply passed through

Debugging:

       Comparing two SARIF files shows

           {
             "$schema" : "https://json.schemastore.org/sarif-2.1.0.json",
             "version" : "2.1.0",
             "runs" : [ {...
             } ]
           }

       and

           {
             "runs": [...
             ]
           }

       so there are missing fields.

    The Problem
     1. Problem origin

          // Modify the sarif: start by extracting
          var sarif Sarif
          if err := json.Unmarshal(sarifData, &sarif); err != nil {
              return nil, fmt.Errorf("failed to unmarshal SARIF: %v", err)
          }
          ...
          // now inject version control info
          ...
          // and write it back
          sarifBytes, err := json.Marshal(sarif)
          if err != nil {
              return nil, fmt.Errorf("failed to marshal SARIF: %v", err)
          }

     2. But the struct only has one of the needed fields

          type Sarif struct {
              Runs []SarifRun `json:"runs"`
          }

     3. From the docs:

          // To unmarshal JSON into a struct, Unmarshal matches incoming object
          // keys to the keys used by [Marshal] (either the struct field name or its tag),
          // preferring an exact match but also accepting a case-insensitive match. By
          // default, object keys which don't have a corresponding struct field are
          // ignored (see [Decoder.DisallowUnknownFields] for an alternative).
This commit is contained in:
Michael Hohn
2024-08-16 14:27:46 -07:00
committed by =Michael Hohn
parent 0a52b729cd
commit d956f47db3

View File

@@ -476,27 +476,60 @@ func generateSarif(codeql CodeqlCli, language queue.QueryLanguage, databasePath,
return nil, fmt.Errorf("failed to read SARIF file: %v", err)
}
// Modify the sarif: start by extracting
var sarif Sarif
if err := json.Unmarshal(sarifData, &sarif); err != nil {
return nil, fmt.Errorf("failed to unmarshal SARIF: %v", err)
// Modify the sarif: start by extracting a fully dynamic structure.
// Previous attempts with a fully typechecked SARIF input have failed;
// there are simply too many optional fields that may vanish or appear at
// any time.
// Read json into a fully dynamic Go structure, check for the presence of an
// array 'Runs' in the top-level object, iterate over 'Runs' and for every
// entry append to the field 'VersionControlProvenance'
// Decode JSON into a map for a fully dynamic structure
var data map[string]interface{}
if err := json.Unmarshal([]byte(sarifData), &data); err != nil {
slog.Error("Error decoding SARIF", "err", err)
return nil, err
}
// now inject version control info
for _, run := range sarif.Runs {
run.VersionControlProvenance = append(run.VersionControlProvenance, map[string]interface{}{
"repositoryUri": fmt.Sprintf("%s/%s", os.Getenv("GITHUB_SERVER_URL"), language),
// Check if the "Runs" array exists
if runs, ok := data["Runs"].([]interface{}); ok {
for _, run := range runs {
if runMap, ok := run.(map[string]interface{}); ok {
// Check and append to the "VersionControlProvenance" field
if vcp, ok := runMap["VersionControlProvenance"].([]interface{}); ok {
// Array exists, add an entry
runMap["VersionControlProvenance"] = append(vcp,
map[string]interface{}{
"repositoryUri": fmt.Sprintf("%s/%s",
os.Getenv("GITHUB_SERVER_URL"), language),
"revisionId": databaseSHA,
})
} else {
// No array, provide one and the entry
runMap["VersionControlProvenance"] = []map[string]interface{}{
{
"repositoryUri": fmt.Sprintf("%s/%s",
os.Getenv("GITHUB_SERVER_URL"), language),
"revisionId": databaseSHA,
},
}
}
}
}
}
// and write it back
sarifBytes, err := json.Marshal(sarif)
// Encode the modified structure back to JSON
modifiedJSON, err := json.MarshalIndent(data, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal SARIF: %v", err)
slog.Error("Unable to re-encode SARIF to JSON", "err", err)
return nil, err
}
return sarifBytes, nil
slog.Debug("XX: sarif original", "sarif=", sarifData)
slog.Debug("XX: sarif modified", "modifiedJSON=", modifiedJSON)
return modifiedJSON, nil
}
// XX: inlined this function