diff --git a/README.md b/README.md
index 6c7850e..3916397 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,9 @@
 # CLI tools for SARIF processing 
 
   Each of these tools present a high-level command-line interface to extract a
-  specific subset of information from a SARIF file. The main tools are: `sarif-extract-scans-runner`,`sarif-aggregate-scans`,`sarif-create-aggregate-report`
+  specific subset of information from a SARIF file. The main tools are: `sarif-extract-scans-runner`,`sarif-aggregate-scans`,`sarif-create-aggregate-report`. 
+  
+  Each tool can print its options and description like: `sarif-extract-scans-runner --help`.
 
   The tool was implemented using Python 3.9.
 
diff --git a/bin/sarif-extract-scans b/bin/sarif-extract-scans
index d891f71..a171e8b 100755
--- a/bin/sarif-extract-scans
+++ b/bin/sarif-extract-scans
@@ -130,17 +130,14 @@ scantabs = ScanTables()
 
 @dataclass
 class ExternalInfo:
-    project_id : int
+    project_id: pd.UInt64Dtype()
     scan_id : pd.UInt64Dtype()
     sarif_file_name : str
-    ql_query_id : str
 
 external_info = ExternalInfo(
-    scan_spec["project_id"],
+    pd.NA,
     scan_spec["scan_id"],
-    scan_spec["sarif_file_name"],
-    # TODO: Take ql_query_id from where? (git commit id of the ql query set)
-    'deadbeef00',               
+    scan_spec["sarif_file_name"]           
 )
 
 # 
diff --git a/bin/sarif-extract-scans-runner b/bin/sarif-extract-scans-runner
index a069493..b323bea 100755
--- a/bin/sarif-extract-scans-runner
+++ b/bin/sarif-extract-scans-runner
@@ -88,9 +88,9 @@ parser = argparse.ArgumentParser(description='Run sarif-extract-scans over a dir
 
 parser.add_argument('sarif_files', metavar='sarif-files', type=str, help='File containing list of sarif files, use - for stdin')
 
-parser.add_argument('-f','--input-signature', metavar='input-signature', type=str, default="LGTM", 
-                    help='Signature of the sarif, as in, where it was generated it may affect the signature.'
-                    'Options: LGTM, CLI'
+parser.add_argument('-f','--input-signature', metavar='input-signature', type=str, default="CLI", 
+                    help='Signature of the sarif, as in, where it was generated it may affect the signature.\n'
+                    'Options: LGTM, CLI.\n'
                     'If current represented signatures are not sufficient, view signature_single.py for how to support further signatures.'
                     '  Default: "%(default)s"')
 
@@ -161,7 +161,6 @@ for path in paths:
     # Paths and components
     # 
     path = path.rstrip()
-    project, component = path.split('/')
     # 
     # Scan specification
     # 
@@ -171,30 +170,25 @@ for path in paths:
         scan_id = hash.hash_unique(data)
 
     scan_spec = {
-        # assuming sarif file names are like <org>/<repo>
-        # however this will be replaced down the line with the repoURI if possible
-        # still, leaving here in case later versions of this tool do not rely on that property being there
-        # in that case this will be the best guess
-        "project_id": hash.hash_unique((project+"-"+component).encode()),  # pd.UInt64Dtype()
         "scan_id": scan_id,                        # pd.Int64Dtype()
         "sarif_file_name": path,                   # pd.StringDtype()
     }
     
     # 
     # If using outermost output directory, create project directory:
-    # (like <outer_dir>/<project>/*.scantables)
+    # (like <outer_dir>/<repositoryUri>/*.scantables)
     # 
-    try: os.mkdir(outer_dir+ project, mode=0o755)
+    try: os.mkdir(outer_dir+ path, mode=0o755)
     except FileExistsError: pass
 
-    scan_spec_file = os.path.join(outer_dir+ project, component + ".scanspec")
+    scan_spec_file = os.path.join(outer_dir+ path + ".scanspec")
     with open(scan_spec_file, 'w') as fp:
         json.dump(scan_spec, fp)
 
     # 
     # Table output directory
     # 
-    output_dir = os.path.join(outer_dir+ project, component + ".scantables")
+    output_dir = os.path.join(outer_dir+ path + ".scantables")
     try: os.mkdir(output_dir, mode=0o755)
     except FileExistsError: pass
     #
@@ -215,8 +209,8 @@ for path in paths:
             with open(args.successful_runs, 'wb') as outfile:
                 pickle.dump(successful_runs, outfile)
 
-    scan_log_file = os.path.join(outer_dir+ project, component + ".scanlog")
-    csv_outfile = os.path.join(outer_dir+ project, component)
+    scan_log_file = os.path.join(outer_dir+ path + ".scanlog")
+    csv_outfile = os.path.join(outer_dir+ path)
     runstats = subprocess.run(['sarif-extract-scans', scan_spec_file, output_dir, csv_outfile, "-f", args.input_signature],
                               capture_output=True, text=True)
     if runstats.returncode == 0:
diff --git a/sarif_cli/hash.py b/sarif_cli/hash.py
index 9c107ba..f900897 100644
--- a/sarif_cli/hash.py
+++ b/sarif_cli/hash.py
@@ -4,4 +4,4 @@ from hashlib import blake2b
 def hash_unique(item_to_hash):
     h = blake2b(digest_size = 8)
     h.update(item_to_hash)
-    return abs(int.from_bytes(h.digest(), byteorder='big'))
+    return int.from_bytes(h.digest(), byteorder='big')
diff --git a/sarif_cli/scan_tables.py b/sarif_cli/scan_tables.py
index 716d940..0f5a980 100644
--- a/sarif_cli/scan_tables.py
+++ b/sarif_cli/scan_tables.py
@@ -79,43 +79,20 @@ def joins_for_projects(basetables, external_info):
     """
     b = basetables; e = external_info
    
-    # if the sarif does not have versionControlProvenance, semmle.sourceLanguage ect
-    # there is no reliable way to know the project name 
-    # and will still need to use a guess about the project id
+    # if the sarif does have versionControlProvenance
     if "repositoryUri" in b.project:
-        repo_url = b.project.repositoryUri[0]
-         # For a repository url of the form
-        #   (git|https)://*/org/project.*
-        # use the org/project part as the project_name.
-        # 
-        url_parts = re.match(r'(git|https)://[^/]+/([^/]+)/(.*).git', repo_url)
-        if url_parts:
-            project_name = f"{url_parts.group(2)}-{url_parts.group(3)}"
-            project, component = e.sarif_file_name.rstrip().split('/')
-            # if the runners guess from the filename was bad, replace with real info
-            # and continue to use that scanspec to pass that around
-            if project_name != project+"-"+component:
-                e.project_id = hash.hash_unique(project_name.encode())
-        else:
-            project_name = pd.NA
+        repoUri = b.project.repositoryUri[0]
+        e.project_id = hash.hash_unique(repoUri.encode())
     else:
-        repo_url = "unknown"
-        project_name = pd.NA
+        repoUri = "unknown"
 
-    if 'semmle.sourceLanguage' in b.project:
-        srcLang = b.project['semmle.sourceLanguage'][0]
-        allLang = ",".join(list(b.project['semmle.sourceLanguage']))
-    else: 
-        srcLang = "unknown"
-        allLang = "unknown"
-    
     res = pd.DataFrame(data={
         "id"                 : e.project_id,
-        "project_name"       : project_name,
+        "project_name"       : repoUri,
         "creation_date"      : pd.Timestamp(0.0, unit='s'), # TODO: external info 
-        "repo_url"           : repo_url, 
-        "primary_language"   : srcLang, # TODO: external info if CLI sarif
-        "languages_analyzed" : allLang  # TODO: external info if CLI sarif
+        "repo_url"           : repoUri, 
+        "primary_language"   : b.project['semmle.sourceLanguage'][0],
+        "languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage']))
     }, index=[0])
 
     # Force all column types to ensure appropriate formatting
diff --git a/sarif_cli/signature.py b/sarif_cli/signature.py
index 941feb2..62b29f1 100644
--- a/sarif_cli/signature.py
+++ b/sarif_cli/signature.py
@@ -239,6 +239,8 @@ dummy_relatedLocations_entry = [
 
 dummy_message_entry = {'text': 'scli-dyys dummy value'}
 
+dummy_sourceLanguage = 'unknown'
+
 def fillsig_dict(args, elem, context):
     """ Fill in the missing fields in dictionary signatures.
     """
@@ -290,6 +292,10 @@ def fillsig_dict(args, elem, context):
     if 'level' in elem.keys():
         full_elem['enabled'] = elem.get('enabled', True)
 
+    if 'semmle.formatSpecifier' in elem.keys():
+        # Ensure semmle.sourceLanguage is present at least in dummy form
+        full_elem['semmle.sourceLanguage'] = elem.get('semmle.sourceLanguage', dummy_sourceLanguage)
+
     if 'versionControlProvenance' in elem.keys():
         # Ensure newlineSequences is present when versionControlProvenance is
         full_elem['newlineSequences'] = elem.get('newlineSequences', dummy_newlineSequences)
diff --git a/sarif_cli/signature_single_CLI.py b/sarif_cli/signature_single_CLI.py
index d773cf2..1b6b747 100644
--- a/sarif_cli/signature_single_CLI.py
+++ b/sarif_cli/signature_single_CLI.py
@@ -28,7 +28,7 @@ struct_graph_CLI = (
     ('Struct3497', ('struct', ('index', 'Int'), ('uri', 'String'))),
     ('Struct9567', ('struct', ('location', 'Struct3497'))),
     ('Array6920', ('array', (0, 'Struct5277'), (1, 'Struct9567'))),
-    ('Struct1509', ('struct', ('semmle.formatSpecifier', 'String'))),
+    ('Struct1509', ('struct', ('semmle.formatSpecifier', 'String'), ('semmle.sourceLanguage', 'String'))),
     ('Struct2774', ('struct', ('text', 'String'))),
     (   'Struct6299',
         (   'struct',
diff --git a/sarif_cli/typegraph.py b/sarif_cli/typegraph.py
index 3769fc6..4dce356 100644
--- a/sarif_cli/typegraph.py
+++ b/sarif_cli/typegraph.py
@@ -196,9 +196,14 @@ def _destructure_dict(typegraph: Typegraph, node, tree):
             )
 
     else:
-        status_writer.unknown_sarif_parsing_shape["extra_info"] = "type fields {} do not match tree fields {}.".format(type_fields, tree_fields)
-        status_writer.csv_write(status_writer.unknown_sarif_parsing_shape)
-        raise Exception("typegraph: unhandled case reached: cannot match type "
+        # possibly looks like: (Struct9699)type_fields: [codeflows...] vs tree_fields: [...extra_properties]
+        # in that case we need to also try the Struct4055 signature here
+        if "codeFlows" in type_fields:
+            _destructure_dict(typegraph, "Struct4055", tree)
+        else:
+            status_writer.unknown_sarif_parsing_shape["extra_info"] = "type fields {} do not match tree fields {}.".format(type_fields, tree_fields)
+            status_writer.csv_write(status_writer.unknown_sarif_parsing_shape)
+            raise Exception("typegraph: unhandled case reached: cannot match type "
                         "fields {} to tree fields {}.  Data is invalid."
                         .format(type_fields, tree_fields))