Fix codeql db extraction logic when examining possibly nested archive

This commit is contained in:
Michael Hohn
2025-05-12 12:45:22 -07:00
committed by =Michael Hohn
parent c32ff755ef
commit 75e57dc0a8

View File

@@ -307,26 +307,35 @@ func (h *HepcStore) GetDatabase(location common.NameWithOwner) ([]byte, error) {
return nil, fmt.Errorf("non-OK HTTP status for database fetch: %s", resp.Status) return nil, fmt.Errorf("non-OK HTTP status for database fetch: %s", resp.Status)
} }
// Buffer the full gzip tar stream into RAM // Buffer the full stream into RAM
fullBody, err := io.ReadAll(resp.Body) fullBody, err := io.ReadAll(resp.Body)
if err != nil { if err != nil {
slog.Error("error reading full database stream into memory", "error", err) slog.Error("error reading full database stream into memory", "error", err)
return nil, fmt.Errorf("error reading database content: %w", err) return nil, fmt.Errorf("error reading database content: %w", err)
} }
// Create a fresh reader from RAM buffer for extraction // The input could be the codeql db as zip, or a tar stream containing the zip;
// If gzip header is found, treat the input as a tar+gz archive
// Check for gzip magic number (0x1F 0x8B)
isGzip := len(fullBody) >= 2 && fullBody[0] == 0x1F && fullBody[1] == 0x8B
if isGzip {
// Extract zip data from tar+gz archive
data, found, err := extractDatabaseFromTar(bytes.NewReader(fullBody)) data, found, err := extractDatabaseFromTar(bytes.NewReader(fullBody))
if err != nil { if err != nil {
slog.Error("error extracting from tar stream", "error", err) slog.Error("error extracting from tar stream", "error", err)
return nil, err return nil, err
} }
if !found {
if found { slog.Warn("tar archive read succeeded, but zip entry not found")
slog.Info("found nested zip", "path", "artifacts/codeql_database.zip") return nil, fmt.Errorf("zip file not found in tar archive")
} else {
return data, nil return data, nil
} }
}
slog.Info("nested zip not found, returning full original stream from buffer") // Treat input as raw zip file content
slog.Info("no gzip header found; assuming raw zip content")
return fullBody, nil return fullBody, nil
} }