From 198453ee905a9a8f205a21d0c27a52ccca81ab99 Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Tue, 14 May 2024 18:57:30 -0700 Subject: [PATCH] wip: port most of MirvaRequest from ghes-mirva-server --- interfaces/mci/storage.go | 8 +++ lib/commander/lcmem/lcmem.go | 97 +++++++++++++++++++++++-------- lib/storage/lsmem/localdisk.go | 74 +++++++++++++++++++++++ types/storage.go | 0 types/{mct => tcmdr}/commander.go | 24 +++++++- types/tsto/storage.go | 6 ++ 6 files changed, 184 insertions(+), 25 deletions(-) delete mode 100644 types/storage.go rename types/{mct => tcmdr}/commander.go (94%) create mode 100644 types/tsto/storage.go diff --git a/interfaces/mci/storage.go b/interfaces/mci/storage.go index 2af51fe..4d858b8 100644 --- a/interfaces/mci/storage.go +++ b/interfaces/mci/storage.go @@ -1,5 +1,13 @@ package mci +import ( + "github.com/advanced-security/mrvacommander/types/tsto" + co "github.com/hohn/ghes-mirva-server/common" +) + type Storage interface { NextID() int + SaveQueryPack(tgz []byte, sessionID int) (storagePath string, error error) + FindAvailableDBs(analysisReposRequested []co.OwnerRepo) (not_found_repos []co.OwnerRepo, + analysisRepos *map[co.OwnerRepo]tsto.DBLocation) } diff --git a/lib/commander/lcmem/lcmem.go b/lib/commander/lcmem/lcmem.go index 4d0352c..5f73a8a 100644 --- a/lib/commander/lcmem/lcmem.go +++ b/lib/commander/lcmem/lcmem.go @@ -15,7 +15,7 @@ import ( "strings" "github.com/advanced-security/mrvacommander/interfaces/mci" - "github.com/advanced-security/mrvacommander/types/mct" + "github.com/advanced-security/mrvacommander/types/tcmdr" "github.com/gorilla/mux" "github.com/hohn/ghes-mirva-server/analyze" "github.com/hohn/ghes-mirva-server/api" @@ -203,85 +203,114 @@ func (c *Commander) MirvaRequestID(w http.ResponseWriter, r *http.Request) { func (c *Commander) MirvaRequest(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) slog.Info("New mrva run ", "owner", vars["owner"], "repo", vars["repo"]) - // TODO Change this to functional style? // session := new(MirvaSession) session_id := c.st.Storage.NextID() session_owner := vars["owner"] session_controller_repo := vars["repo"] slog.Info("new run", "id: ", fmt.Sprint(session_id), session_owner, session_controller_repo) - c.collectRequestInfo(w, r) + session_language, session_repositories, session_tgz_ref, err := c.collectRequestInfo(w, r, session_id) - // session_find_available_DBs() + if err != nil { + return + } + not_found_repos, analysisRepos := c.st.Storage.FindAvailableDBs(session_repositories) + + // TODO into Queue // session_start_analyses() - // session_submit_response(w) + + // TODO into Commander (here) + si := tcmdr.SessionInfo{ + ID: session_id, + Owner: session_owner, + ControllerRepo: session_controller_repo, + + QueryPack: session_tgz_ref, + Language: session_language, + Repositories: session_repositories, + + AccessMismatchRepos: nil, /* FIXME */ + NotFoundRepos: not_found_repos, + NoCodeqlDBRepos: nil, /* FIXME */ + OverLimitRepos: nil, /* FIXME */ + + AnalysisRepos: analysisRepos, + } + + c.submit_response(si) + + // TODO into Storage // session_save() + +} +func (c *Commander) submit_response(s tcmdr.SessionInfo) { + // TODO } -func (c *Commander) collectRequestInfo(w http.ResponseWriter, r *http.Request) { +func (c *Commander) collectRequestInfo(w http.ResponseWriter, r *http.Request, sessionId int) (string, []co.OwnerRepo, string, error) { slog.Debug("Collecting session info") if r.Body == nil { - err := "Missing request body" + err := errors.New("Missing request body") log.Println(err) - http.Error(w, err, http.StatusNoContent) - return + http.Error(w, err.Error(), http.StatusNoContent) + return "", []co.OwnerRepo{}, "", err } buf, err := io.ReadAll(r.Body) if err != nil { var w http.ResponseWriter slog.Error("Error reading MRVA submission body", "error", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) - return + return "", []co.OwnerRepo{}, "", err } msg, err := TrySubmitMsg(buf) if err != nil { // Unknown message slog.Error("Unknown MRVA submission body format") http.Error(w, err.Error(), http.StatusBadRequest) - return + return "", []co.OwnerRepo{}, "", err } - // Decompose the SubmitMsg and keep information in the MirvaSession + // Decompose the SubmitMsg and keep information - // 1. Save the query pack and keep the location + // Save the query pack and keep the location if !isBase64Gzip([]byte(msg.QueryPack)) { slog.Error("MRVA submission body querypack has invalid format") err := errors.New("MRVA submission body querypack has invalid format") http.Error(w, err.Error(), http.StatusBadRequest) - return + return "", []co.OwnerRepo{}, "", err } - err = sn.extract_tgz(msg.QueryPack) + session_tgz_ref, err := c.extract_tgz(msg.QueryPack, sessionId) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) - return + return "", []co.OwnerRepo{}, "", err } // 2. Save the language - sn.language = msg.Language + session_language := msg.Language // 3. Save the repositories + var session_repositories []co.OwnerRepo + for _, v := range msg.Repositories { t := strings.Split(v, "/") if len(t) != 2 { slog.Error("Invalid owner / repository entry", "entry", t) http.Error(w, err.Error(), http.StatusBadRequest) } - sn.repositories = append(sn.repositories, - co.OwnerRepo{t[0], t[1]}) + session_repositories = append(session_repositories, + co.OwnerRepo{Owner: t[0], Repo: t[1]}) } - - sn.save() - + return session_language, session_repositories, session_tgz_ref, nil } // Try to extract a SubmitMsg from a json-encoded buffer -func TrySubmitMsg(buf []byte) (mct.SubmitMsg, error) { +func TrySubmitMsg(buf []byte) (tcmdr.SubmitMsg, error) { buf1 := make([]byte, len(buf)) copy(buf1, buf) dec := json.NewDecoder(bytes.NewReader(buf1)) dec.DisallowUnknownFields() - var m mct.SubmitMsg + var m tcmdr.SubmitMsg err := dec.Decode(&m) return m, err } @@ -310,3 +339,23 @@ func isBase64Gzip(val []byte) bool { return false } } + +func (c *Commander) extract_tgz(qp string, sessionID int) (string, error) { + // These are decoded manually via + // base64 -d < foo1 | gunzip | tar t | head -20 + // base64 decode the body + slog.Debug("Extracting query pack") + + tgz, err := base64.StdEncoding.DecodeString(qp) + if err != nil { + slog.Error("querypack body decoding error:", err) + return "", err + } + + session_query_pack_tgz_filepath, err := c.st.Storage.SaveQueryPack(tgz, sessionID) + if err != nil { + return "", err + } + + return session_query_pack_tgz_filepath, err +} diff --git a/lib/storage/lsmem/localdisk.go b/lib/storage/lsmem/localdisk.go index db36d9f..08188bd 100644 --- a/lib/storage/lsmem/localdisk.go +++ b/lib/storage/lsmem/localdisk.go @@ -1,5 +1,18 @@ package lsmem +import ( + "errors" + "fmt" + "io/fs" + "log/slog" + "os" + "path" + "path/filepath" + + "github.com/advanced-security/mrvacommander/types/tsto" + co "github.com/hohn/ghes-mirva-server/common" +) + type Storage struct { CurrentID int } @@ -8,3 +21,64 @@ func (s *Storage) NextID() int { s.CurrentID += 1 return s.CurrentID } + +func (s *Storage) SaveQueryPack(tgz []byte, sessionId int) (string, error) { + // Save the tar.gz body + cwd, err := os.Getwd() + if err != nil { + slog.Error("No working directory") + panic(err) + } + + dirpath := path.Join(cwd, "var", "codeql", "querypacks") + if err := os.MkdirAll(dirpath, 0755); err != nil { + slog.Error("Unable to create query pack output directory", + "dir", dirpath) + return "", err + } + + fpath := path.Join(dirpath, fmt.Sprintf("qp-%d.tgz", sessionId)) + err = os.WriteFile(fpath, tgz, 0644) + if err != nil { + slog.Error("unable to save querypack body decoding error", "path", fpath) + return "", err + } else { + slog.Info("Query pack saved to ", "path", fpath) + } + + return fpath, nil +} + +// Determine for which repositories codeql databases are available. +// +// Those will be the analysis_repos. The rest will be skipped. +func (s *Storage) FindAvailableDBs(analysisReposRequested []co.OwnerRepo) (not_found_repos []co.OwnerRepo, + analysisRepos *map[co.OwnerRepo]tsto.DBLocation) { + slog.Debug("Looking for available CodeQL databases") + + cwd, err := os.Getwd() + if err != nil { + slog.Error("No working directory") + return + } + + analysisRepos = &map[co.OwnerRepo]tsto.DBLocation{} + + not_found_repos = []co.OwnerRepo{} + + for _, rep := range analysisReposRequested { + dbPrefix := filepath.Join(cwd, "codeql", "dbs", rep.Owner, rep.Repo) + dbName := fmt.Sprintf("%s_%s_db.zip", rep.Owner, rep.Repo) + dbPath := filepath.Join(dbPrefix, dbName) + + if _, err := os.Stat(dbPath); errors.Is(err, fs.ErrNotExist) { + slog.Info("Database does not exist for repository ", "owner/repo", rep, + "path", dbPath) + not_found_repos = append(not_found_repos, rep) + } else { + slog.Info("Found database for ", "owner/repo", rep, "path", dbPath) + (*analysisRepos)[rep] = tsto.DBLocation{Prefix: dbPrefix, File: dbName} + } + } + return not_found_repos, analysisRepos +} diff --git a/types/storage.go b/types/storage.go deleted file mode 100644 index e69de29..0000000 diff --git a/types/mct/commander.go b/types/tcmdr/commander.go similarity index 94% rename from types/mct/commander.go rename to types/tcmdr/commander.go index a53e1fe..e13ab86 100644 --- a/types/mct/commander.go +++ b/types/tcmdr/commander.go @@ -1,4 +1,9 @@ -package mct +package tcmdr + +import ( + "github.com/advanced-security/mrvacommander/types/tsto" + co "github.com/hohn/ghes-mirva-server/common" +) type DownloadResponse struct { Repository DownloadRepo `json:"repository"` @@ -217,3 +222,20 @@ type SubmitMsg struct { QueryPack string `json:"query_pack"` Repositories []string `json:"repositories"` } + +type SessionInfo struct { + ID int + Owner string + ControllerRepo string + + QueryPack string + Language string + Repositories []co.OwnerRepo + + AccessMismatchRepos []co.OwnerRepo + NotFoundRepos []co.OwnerRepo + NoCodeqlDBRepos []co.OwnerRepo + OverLimitRepos []co.OwnerRepo + + AnalysisRepos *map[co.OwnerRepo]tsto.DBLocation +} diff --git a/types/tsto/storage.go b/types/tsto/storage.go new file mode 100644 index 0000000..84c32b9 --- /dev/null +++ b/types/tsto/storage.go @@ -0,0 +1,6 @@ +package tsto + +type DBLocation struct { + Prefix string + File string +}