diff --git a/README.org b/README.org index 049f23f..0ac4279 100644 --- a/README.org +++ b/README.org @@ -2,9 +2,10 @@ ** Usage Sample #+BEGIN_SRC sh # Collect DBs from filesystem - cd ~/work-gh/mrva/mrvahepc + cd ~/work-gh/mrva/mrvahepc && rm -fR db-collection.tmp/ ./bin/mc-hepc-init --db_collection_dir db-collection.tmp \ - --starting_path ~/work-gh/mrva/mrva-open-source-download + --starting_path ~/work-gh/mrva/mrva-open-source-download \ + --max_dbs 17 # Serve collected DBs plus metadata ./bin/mc-hepc-serve --codeql-db-dir db-collection.tmp @@ -17,11 +18,11 @@ url=$(curl 127.0.0.1:8070/api/v1/latest_results/codeql-all \ -o - 2>/dev/null | head -1 | jq -r .result_url) - # http://hepc/db-collection.tmp/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip + echo $url + # http://hepc/db/db-collection.tmp/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip wget $(echo $url|sed 's|http://hepc|http://127.0.0.1:8070|g;') - #+END_SRC ** Installation @@ -34,8 +35,6 @@ # From requirements.txt pip install -r requirements.txt - # Or explicitly - pip install ipython #+end_example - Local development @@ -47,7 +46,6 @@ The `--editable` *should* use symlinks for all scripts; use `./bin/*` to be sure. - Full installation - #+begin_example pip install mrvahepc #+end_example diff --git a/bin/mc-hepc-init b/bin/mc-hepc-init index 24a5d0b..e34ee6c 100755 --- a/bin/mc-hepc-init +++ b/bin/mc-hepc-init @@ -54,7 +54,7 @@ def process_db_file(zip_path, db_collection_dir): owner = source_location_prefix.parent.name cid = generate_cid(cli_version, creation_time, primary_language, sha) new_db_fname = f"{owner}-{repo}-ctsj-{cid}.zip" - result_url = f"http://hepc/{db_collection_dir}/{new_db_fname}" + result_url = f"http://hepc/db/{db_collection_dir}/{new_db_fname}" metadata = { "git_branch" : "HEAD", @@ -83,19 +83,24 @@ def process_db_file(zip_path, db_collection_dir): rm("-rf", temp_dir) # Main application class -class DBProcessor(cli.Application): +class HEPC(cli.Application): """ - DBProcessor processes db.zip files found in a starting directory, + HEPC processes db.zip files found in a starting directory, copies updated names in a collection directory, and adds a metadata information file "metadata.json" to the directory. """ db_collection_dir = cli.SwitchAttr( - "--db_collection_dir", str, mandatory=True, help="Specify the database collection directory" + "--db_collection_dir", str, mandatory=True, + help="Specify the database collection directory" ) starting_path = cli.SwitchAttr( "--starting_path", str, mandatory=True, help="Specify the starting path" ) + max_dbs = cli.SwitchAttr( + "--max_dbs", int, mandatory=False, default=100, + help="Specify the maximum number of databases to ingest" + ) def main(self): db_collection_dir = expand_path(self.db_collection_dir) @@ -111,10 +116,10 @@ class DBProcessor(cli.Application): log("WARN", "No db.zip files found in the specified starting path.") return - for zip_path in db_files: + for zip_path in db_files[0:self.max_dbs]: process_db_file(zip_path, db_collection_dir) log("INFO", "Processing completed.") if __name__ == "__main__": - DBProcessor.run() + HEPC.run() diff --git a/bin/mc-hepc-serve b/bin/mc-hepc-serve index e7037f9..a47d286 100755 --- a/bin/mc-hepc-serve +++ b/bin/mc-hepc-serve @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) app = FastAPI() db_dir = None # This will be set by the CLI application -@app.get("/{file_path:path}") +@app.get("/db/{file_path:path}") def serve_file(file_path: str): """ Serve files from the database directory, such as .zip files or metadata.json. @@ -59,7 +59,7 @@ class MRVAHepc(cli.Application): 2. Metadata for those zip files, contained in metadata.json in the same directory. The HTTP endpoints are: - 1. /{filename} + 1. /db/{filename} 2. /index 3. /api/v1/latest_results/codeql-all """ diff --git a/requirements.txt b/requirements.txt index a9b5f81..cf33764 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,8 +10,6 @@ idna==3.10 ipython==8.30.0 jedi==0.19.2 matplotlib-inline==0.1.7 -# Editable Git install with no remote (mrvahepc==0.1.0) --e /Users/hohn/work-gh/mrva/mrvahepc parso==0.8.4 pexpect==4.9.0 plumbum==1.9.0