diff --git a/bin/sarif-extract-scans b/bin/sarif-extract-scans
new file mode 100755
index 0000000..6da1f44
--- /dev/null
+++ b/bin/sarif-extract-scans
@@ -0,0 +1,174 @@
+#!/usr/bin/env python
+""" Extract scan data from multiple sarif files in table form.
+"""
+from dataclasses import dataclass
+from sarif_cli import signature, signature_single
+from sarif_cli import typegraph
+from sarif_cli import snowflake_id
+import argparse
+import dataclasses as dc
+import json
+import logging
+import pandas as pd
+import pathlib
+import sarif_cli.table_joins as tj
+import sarif_cli.derived_joins as derived
+import sys
+
+#
+# Configure logger
+# 
+logging.basicConfig(format='%(asctime)s %(message)s')
+
+#
+# Start processing 
+#
+parser = argparse.ArgumentParser(description='Read a collection of sarif files and produce tabular output.')
+parser.add_argument('file', metavar='scan-spec.json', type=str,
+                    help="json file containing required external scan information.")
+parser.add_argument('outdir', metavar='output-dir', type=str, help='output directory')
+args = parser.parse_args()
+
+# Load meta info
+def load(fname):
+    with open(fname, 'rb') if fname != '-' else sys.stdin as fp: 
+        try:
+            content = json.load(fp)
+        except json.decoder.JSONDecodeError as err:
+            logging.error('Error reading from {}: {}: line {}, column {}'
+                          .format(args.file, err.msg, err.lineno, err.colno))
+            sys.exit(1)
+        return content
+
+scan_spec = load(args.file)
+sarif_struct = load(scan_spec['sarif_file_name'])
+
+#
+# Preprocess raw SARIF to get smaller signature
+#
+context = signature.Context(
+    {
+        "string" : "String",
+        "int" : "Int",
+        "bool" : "Bool"
+    }
+) 
+sarif_struct = signature.fillsig(args, sarif_struct, context)
+
+#
+# Use reference type graph (signature) to traverse sarif and attach values to tables
+#
+tgraph = typegraph.Typegraph(signature_single.struct_graph_2022_02_01)
+typegraph.destructure(tgraph, signature_single.start_node_2022_02_01, sarif_struct)
+#
+# Form output tables
+# 
+typegraph.attach_tables(tgraph)
+
+#
+# Dataframe / table collection
+# 
+@dataclass
+class BaseTables:
+    artifacts : pd.DataFrame
+    codeflows : pd.DataFrame
+    kind_pathproblem : pd.DataFrame
+    kind_problem : pd.DataFrame
+    project : pd.DataFrame
+    relatedLocations : pd.DataFrame
+    rules : pd.DataFrame
+    def __init__(self): pass
+bt = BaseTables()
+
+@dataclass
+class ScanTables:
+    # project: External table with project information
+    scans : pd.DataFrame
+    results : pd.DataFrame
+    def __init__(self): pass
+scantabs = ScanTables()
+
+# 
+# Add dataframes for base tables 
+# 
+sf_2683 = tj.joins_for_sf_2683(tgraph)
+af_0350_location = tj.joins_for_af_0350_location(tgraph)
+bt.artifacts = tj.joins_for_artifacts(tgraph)
+bt.codeflows = tj.joins_for_codeflows(tgraph, sf_2683)
+bt.kind_pathproblem = tj.joins_for_path_problem(tgraph, af_0350_location)
+bt.kind_problem = tj.joins_for_problem(tgraph, af_0350_location)
+bt.project = tj.joins_for_project_single(tgraph)
+bt.relatedLocations = tj.joins_for_relatedLocations(tgraph, sf_2683)
+bt.rules = tj.joins_for_rules(tgraph)
+
+#
+# Form derived query tables
+#
+# XX
+# scantabs.project = derived.joins_for_project(bt)
+# scantabs.scans = derived.joins_for_scans(bt)
+# scantabs.results = derived.joins_for_results(bt)
+
+
+#
+# Replace the remaining internal ids with snowflake ids
+# 
+flakegen = snowflake_id.Snowflake(0)
+
+columns_to_reindex = {
+    # template from {field.name : [''] for field in dc.fields(bt)}
+    'artifacts': ['artifacts_id'],
+    'codeflows': ['codeflow_id'],
+    'kind_pathproblem': ['results_array_id', 'codeFlows_id'],
+    'kind_problem': ['results_array_id'],
+    'project': ['artifacts', 'results', 'rules'],
+    'relatedLocations': ['struct_id'],
+    'rules': ['rules_array_id']}
+
+_id_to_flake = {}
+def _get_flake(id):
+    flake = _id_to_flake.get(id, -1)
+    if flake == -1:
+        flake = flakegen.next()
+        _id_to_flake[id] = flake
+    return flake
+
+# 
+# Cleaner, but makes far too many copies; keep the loop below
+# 
+# def _reindex(table, colname):
+#     newtable = table.astype({ colname : 'uint64'}).reset_index(drop=True)
+#     for i in range(0, len(newtable)):
+#         newtable.loc[i, colname] = _get_flake(newtable.loc[i, colname])
+#     return newtable
+# 
+# for field in dc.fields(bt):
+#     table_name = field.name   
+#     for colname in columns_to_reindex[table_name]:
+#         setattr(bt, field.name, _reindex(getattr(bt, field.name), colname))
+# 
+for field in dc.fields(bt):
+    table_name = field.name   
+    table = getattr(bt, field.name)
+    # Turn all snowflake columns into uint64 and reset indexing to 0..len(table)
+    newtable = table.astype(
+        { colname : 'uint64'
+          for colname in columns_to_reindex[table_name]}
+    ).reset_index(drop=True)
+    # Swap ids for flakes
+    for colname in columns_to_reindex[table_name]:
+        for i in range(0, len(newtable)):
+            newtable.loc[i, colname] = _get_flake(newtable.loc[i, colname])
+    # Replace the table
+    setattr(bt, field.name, newtable)
+#
+# Write output
+#
+p = pathlib.Path(args.outdir)
+p.mkdir(exist_ok=True)
+def write(path, frame):
+    with p.joinpath(path + ".csv").open(mode='wb') as fh:
+        frame.to_csv(fh, index=False)
+for field in dc.fields(bt):
+    table = getattr(bt, field.name)
+    write(field.name, table)
diff --git a/data/treeio/scan-spec-0.json b/data/treeio/scan-spec-0.json
new file mode 100644
index 0000000..5672109
--- /dev/null
+++ b/data/treeio/scan-spec-0.json
@@ -0,0 +1,5 @@
+{
+    "project_id": 13243,
+    "scan_id": 123457,
+    "sarif_file_name": "2022-02-25/results.sarif"
+}
diff --git a/data/treeio/scan-spec-1.json b/data/treeio/scan-spec-1.json
new file mode 100644
index 0000000..8818266
--- /dev/null
+++ b/data/treeio/scan-spec-1.json
@@ -0,0 +1,5 @@
+{
+    "project_id": 13243,
+    "scan_id": 123456,
+    "sarif_file_name": "2021-12-09/results.sarif"
+}
diff --git a/notes/tables.org b/notes/tables.org
index 534976a..9cdff3c 100644
--- a/notes/tables.org
+++ b/notes/tables.org
@@ -181,7 +181,8 @@
 * Tables or entries to be removed
   The top of the [Mar-23-2022] =projects.csv= table, enumerated below, is ad-hoc
   and included in the other tables below; the information for its fields is not
-  yet collected to it can be discarded.
+  yet collected so it can be discarded.
+
   #+BEGIN_SRC text
     ==> project-meta.csv <==
     creation_date
@@ -196,6 +197,17 @@
     tool_version
   #+END_SRC
 
+  This information was used to expand the sarif tree (see Struct3452 and Array7481
+  in typegraph-multi-with-tables.pdf and the code).  In retrospect, that was a
+  poor choice.  All additional information needed can be represented by one or
+  more tables, so sarif-extract* post commit 30e3dd3a3 do so.
+
+  The minimal information required to drive the sarif-to-table conversion is
+  | project_id      |                      13243 |   |
+  | scan_id         |                     123456 |   |
+  | sarif_file_name | "2021-12-09/results.sarif" |   |
+
+
 * New tables to be exported
   This section enumerates new tables intended for reporting infrastructure. 
   
diff --git a/sarif_cli/table_joins.py b/sarif_cli/table_joins.py
index a133c60..d15c4c2 100644
--- a/sarif_cli/table_joins.py
+++ b/sarif_cli/table_joins.py
@@ -305,7 +305,7 @@ def joins_for_relatedLocations(tgraph, sf_2683):
 
 def joins_for_project(tgraph):
     """ 
-    Return table providing the `project` information.
+    Return table providing the `project` information for sarif-extract-multi.
     """
     # Access convenience functions
     sf = lambda num: tgraph.dataframes['Struct' + str(num)]
@@ -368,6 +368,64 @@ def joins_for_project(tgraph):
     )
     return project_df_1
 
+def joins_for_project_single(tgraph):
+    """ 
+    Return table providing the `project` information for sarif-extract-scans
+    """
+    # Access convenience functions
+    sf = lambda num: tgraph.dataframes['Struct' + str(num)]
+    af = lambda num: tgraph.dataframes['Array' + str(num)]
+    # 
+    project_df = (
+        sf(6787)
+        .rename(columns={"version": "version_6787", "struct_id": "struct_id_6787"})
+        #
+        .merge(af('0177'), how="left", left_on='runs', right_on='array_id',
+               validate="1:m")
+        .drop(columns=['runs', 'array_id', 'type_at_index'])
+        .rename(columns={"value_index": "value_index_0177"})
+        #
+        .merge(sf(3388), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
+        .drop(columns=['id_or_value_at_index', 'struct_id'])
+        # 
+        # .merge(af(7069), how="left", left_on='newlineSequences', right_on='array_id',
+        #        validate="1:m")
+        # .drop(columns=['newlineSequences', 'array_id', 'type_at_index'])
+        .drop(columns=['newlineSequences'])
+        #
+        .merge(sf(9543), how="left", left_on='properties', right_on='struct_id', validate="1:m")
+        .drop(columns=['properties', 'struct_id'])
+        #
+        # tool - driver - rules - defaultConfiguration - ( properties - tags )
+        # 
+        .merge(sf(8972), how="left", left_on='tool', right_on='struct_id', validate="1:m")
+        .drop(columns=['tool', 'struct_id'])
+        # 
+        .merge(sf(7820), how="left", left_on='driver', right_on='struct_id', validate="1:m")
+        .drop(columns=['driver', 'struct_id'])
+        .rename(columns={"version": "driver_version_7820", "name": "driver_name_7820"})
+        # 
+        .merge(af(5511), how="left", left_on='versionControlProvenance', right_on='array_id')
+        .drop(columns=['versionControlProvenance', 'array_id', 'type_at_index'])
+        .rename(columns={"value_index": "versionControl_value_index_5511"})
+        # 
+        .merge(sf(3081), how="left", left_on='id_or_value_at_index', right_on='struct_id')
+        .drop(columns=['id_or_value_at_index', 'struct_id'])
+        #
+    )
+    # Keep columns of interest
+    project_df_1 = (
+        project_df
+        .drop(columns=['struct_id_6787', 'versionControl_value_index_5511'])
+        .rename({
+            'version_6787': 'sarif_version',
+            'value_index_0177': 'run_index',
+            'driver_name_7820': 'driver_name',
+            'driver_version_7820': 'driver_version',
+        }, axis='columns')
+    )
+    return project_df_1
+
 def joins_for_rules(tgraph):
     """ 
     Return table providing the `rules` information.
diff --git a/sarif_cli/typegraph.py b/sarif_cli/typegraph.py
index 06bf62d..348013b 100644
--- a/sarif_cli/typegraph.py
+++ b/sarif_cli/typegraph.py
@@ -8,6 +8,7 @@ This file also contains some type graph reference values; these may be moved out
 separate files at some point.
 """
 from dataclasses import dataclass
+import logging
 from typing import Any, Dict, List, Tuple, Union
 import pandas as pd
 
@@ -160,13 +161,19 @@ def _destructure_dict(typegraph: Typegraph, node, tree):
     elif set(tree_fields).issuperset(set(type_fields)):
         # Log a warning
         # log.warning("XX: Tree has unrecognized fields")
+        logging.warning('Input tree has unrecognized fields, collecting only '
+                        'known entries: {}'.format(tree))
+        logging.warning('tree fields: {}    type fields: {}'
+                        .format(tree_fields, type_fields))
         _destructure_dict_1(typegraph, node, tree)
 
     elif set(tree_fields).issubset(set(type_fields)):
         raise MissingFieldException("XX: (Sub)tree is missing fields required by typedef")
 
     else:
-        raise Exception("typegraph: unhandled case reached.  Internal error")
+        raise Exception("typegraph: unhandled case reached: cannot match type "
+                        "fields {} to tree fields {}.  Data is invalid."
+                        .format(type_fields, tree_fields))
         
 
 def _destructure_list(typegraph, node: str, tree: List):