Add nested zip file handling to agent

This commit is contained in:
Michael Hohn
2025-04-10 10:47:24 -07:00
committed by =Michael Hohn
parent 14d6057248
commit 58f4fe1ca7

View File

@@ -1,6 +1,9 @@
package qldbstore package qldbstore
import ( import (
"archive/tar"
"bytes"
"compress/gzip"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
@@ -223,13 +226,45 @@ func (h *HepcStore) FindAvailableDBs(analysisReposRequested []common.NameWithOwn
return notFoundRepos, foundRepos return notFoundRepos, foundRepos
} }
func extractDatabaseFromTar(tarStream io.Reader) ([]byte, bool, error) {
gzReader, err := gzip.NewReader(tarStream)
if err != nil {
slog.Error("failed to open gzip stream", "error", err)
return nil, false, fmt.Errorf("failed to open gzip stream: %w", err)
}
defer gzReader.Close()
tarReader := tar.NewReader(gzReader)
for {
hdr, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
slog.Error("failed to read tar entry", "error", err)
return nil, false, fmt.Errorf("failed to read tar entry: %w", err)
}
if hdr.Name == "artifacts/codeql_database.zip" {
var buf bytes.Buffer
if _, err := io.Copy(&buf, tarReader); err != nil {
slog.Error("failed to extract zip from tar", "error", err)
return nil, false, fmt.Errorf("failed to extract zip from tar: %w", err)
}
return buf.Bytes(), true, nil
}
}
return nil, false, nil // not found
}
func (h *HepcStore) GetDatabase(location common.NameWithOwner) ([]byte, error) { func (h *HepcStore) GetDatabase(location common.NameWithOwner) ([]byte, error) {
// Ensure metadata is up-to-date by using the cache
h.cacheMutex.Lock() h.cacheMutex.Lock()
if time.Since(h.cacheLastUpdated) > h.cacheDuration { if time.Since(h.cacheLastUpdated) > h.cacheDuration {
// Refresh the metadata cache if it is stale
results, err := h.fetchMetadata() results, err := h.fetchMetadata()
if err != nil { if err != nil {
slog.Error("error refreshing metadata cache", "error", err)
h.cacheMutex.Unlock() h.cacheMutex.Unlock()
return nil, fmt.Errorf("error refreshing metadata cache: %w", err) return nil, fmt.Errorf("error refreshing metadata cache: %w", err)
} }
@@ -239,10 +274,8 @@ func (h *HepcStore) GetDatabase(location common.NameWithOwner) ([]byte, error) {
cachedResults := h.metadataCache cachedResults := h.metadataCache
h.cacheMutex.Unlock() h.cacheMutex.Unlock()
// Construct the key for the requested database
key := fmt.Sprintf("%s/%s", location.Owner, location.Repo) key := fmt.Sprintf("%s/%s", location.Owner, location.Repo)
// Locate the result URL in the cached metadata
var resultURL string var resultURL string
for _, result := range cachedResults { for _, result := range cachedResults {
if result.Projname == key { if result.Projname == key {
@@ -252,29 +285,45 @@ func (h *HepcStore) GetDatabase(location common.NameWithOwner) ([]byte, error) {
} }
if resultURL == "" { if resultURL == "" {
slog.Error("database not found in metadata", "repo", key)
return nil, fmt.Errorf("database not found for repository: %s", key) return nil, fmt.Errorf("database not found for repository: %s", key)
} }
// Fetch the database content
resp, err := http.Get(replaceHepcURL(resultURL)) resp, err := http.Get(replaceHepcURL(resultURL))
if err != nil { if err != nil {
slog.Error("failed to fetch database", "url", resultURL, "error", err)
return nil, fmt.Errorf("error fetching database: %w", err) return nil, fmt.Errorf("error fetching database: %w", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
slog.Error("non-OK HTTP status", "status", resp.Status, "url", resultURL)
return nil, fmt.Errorf("non-OK HTTP status for database fetch: %s", resp.Status) return nil, fmt.Errorf("non-OK HTTP status for database fetch: %s", resp.Status)
} }
// Read and return the database data // Buffer the full gzip tar stream into RAM
data, err := io.ReadAll(resp.Body) fullBody, err := io.ReadAll(resp.Body)
if err != nil { if err != nil {
slog.Error("error reading full database stream into memory", "error", err)
return nil, fmt.Errorf("error reading database content: %w", err) return nil, fmt.Errorf("error reading database content: %w", err)
} }
// Create a fresh reader from RAM buffer for extraction
data, found, err := extractDatabaseFromTar(bytes.NewReader(fullBody))
if err != nil {
slog.Error("error extracting from tar stream", "error", err)
return nil, err
}
if found {
slog.Info("found nested zip", "path", "artifacts/codeql_database.zip")
return data, nil return data, nil
} }
slog.Info("nested zip not found, returning full original stream from buffer")
return fullBody, nil
}
// replaceHepcURL replaces the fixed "http://hepc" with the value from // replaceHepcURL replaces the fixed "http://hepc" with the value from
// MRVA_HEPC_ENDPOINT // MRVA_HEPC_ENDPOINT
func replaceHepcURL(originalURL string) string { func replaceHepcURL(originalURL string) string {