From ef08825b43c0178366b71885cbb8dc80b3976856 Mon Sep 17 00:00:00 2001
From: Michael Hohn <hohn@github.com>
Date: Wed, 22 Dec 2021 18:03:34 -0800
Subject: [PATCH] Processing in stages: Move the initial sarif_cli code to
 sarif_cli/traverse

---
 bin/sarif-digest          |   2 +-
 bin/sarif-labeled         |   2 +-
 bin/sarif-list-files      |   2 +-
 bin/sarif-results-summary |   2 +-
 sarif_cli/__init__.py     | 178 --------------------------------------
 sarif_cli/traverse.py     | 178 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 182 insertions(+), 182 deletions(-)
 create mode 100644 sarif_cli/traverse.py
diff --git a/bin/sarif-digest b/bin/sarif-digest
index 75470ab..94d2eec 100755
--- a/bin/sarif-digest
+++ b/bin/sarif-digest
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 import json
-import sarif_cli as S
+import sarif_cli.traverse as S
 import sys
 
 # TODO command-line: sarif-digest [<file>]
diff --git a/bin/sarif-labeled b/bin/sarif-labeled
index 4e06158..9868a3d 100755
--- a/bin/sarif-labeled
+++ b/bin/sarif-labeled
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import argparse
 import json
-import sarif_cli as S
+import sarif_cli.traverse as S
 import sys
 import collections
 
diff --git a/bin/sarif-list-files b/bin/sarif-list-files
index 2767208..b009687 100755
--- a/bin/sarif-list-files
+++ b/bin/sarif-list-files
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import argparse
 import json
-import sarif_cli as S
+import sarif_cli.traverse as S
 import sys
 import collections
 
diff --git a/bin/sarif-results-summary b/bin/sarif-results-summary
index 407438f..941db7a 100755
--- a/bin/sarif-results-summary
+++ b/bin/sarif-results-summary
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import argparse
 import json
-import sarif_cli as S
+import sarif_cli.traverse as S
 import re
 import sys
 import collections
diff --git a/sarif_cli/__init__.py b/sarif_cli/__init__.py
index 9abdda7..e69de29 100644
--- a/sarif_cli/__init__.py
+++ b/sarif_cli/__init__.py
@@ -1,178 +0,0 @@
-import sys
-import os
-import re
-import codecs
-import csv
-
-MIN_PYTHON = (3, 7)
-if sys.version_info < MIN_PYTHON:
-    sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)
-
-class WholeFile:
-    """ Special case handling: use this class for non-existent regions where the
-    whole file is to be used.
-    """
-
-class NoFile:
-    """ Special case handling: use this class when no file is available.
-    """
-
-def is_sarif_struct(struct):
-    """A quick check to verify that `struct` is in fact a SARIF tree.
-    """
-    return type(struct) == dict and "$schema" in struct and \
-        "sarif" in struct["$schema"] and "version" in struct
-
-def get_csv_writer():
-    """ Set up and return the default csv writer on stdout.
-    """
-    return csv.writer(sys.stdout, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
-
-def write_csv(writer, *columns):
-    """ Print via `writer`, with some additional processing """
-    writer.writerow(columns)
-
-def get_relatedlocation_message_info(related_location):
-    """ Given a relatedLocation, extract message information.
-
-    The relatedLocation typically starts from 
-    get(sarif_struct, 'runs', [int], 'results', [int], 'relatedLocations', [int])
-
-    When used for a threadFlow, extract message information for a location contained in it.
-
-    In this case, the location typically starts from 
-    get(sarif_struct, 'runs', _i, 'results', _i, 'codeFlows', _i, 'threadFlows', _i, 'locations', _i) 
-
-    Returns:  (message, artifact, region) by default
-        For an empty 'physicalLocation' key, returns (message, sarif_cli.NoFile, sarif_cli.NoFile)
-        For an empty 'region' key, returns (message, artifact, sarif_cli.WholeFile)
-    """
-    message = get(related_location, 'message', 'text')
-    if 'physicalLocation' in related_location: 
-        ploc = get(related_location, 'physicalLocation')
-        artifact = ploc.get('artifactLocation')
-        region = ploc.get('region', WholeFile)
-    else:
-        artifact, region = NoFile, NoFile
-    return message, artifact, region
-
-def get_location_message_info(result):
-    """ Given one of the results, extract message information.
-
-    The `result` typically starts from get(sarif_struct, 'runs', run_index, 'results', res_index)
-
-    Returns: (message, artifact, region)
-        For an empty 'region' key, returns (message, artifact, sarif_cli.WholeFile)
-
-    """
-    message = get(result, 'message', 'text')
-    artifact = get(result, 'locations', 0, 'physicalLocation', 'artifactLocation')
-    # If there is no 'region' key, use the whole file
-    region = get(result,  'locations', 0, 'physicalLocation').get('region', WholeFile)
-    return (message, artifact, region)
-
-def display_underlined(l1, c1, l2, c2, line, line_num):
-    """ Display the given line followed by a second line with underscores at the locations.
-    
-    l1, c1, l2, c2: the line/column range
-    line: the line of text
-    line_num: the line number for the text, used with the line/column range
-    """
-    # Display the line
-    msg("%s" % (line))
-    msg("\n")
-    # Print the underline
-    underline = underline_for_result(l1, c1, l2, c2, line, line_num)
-    msg(underline)
-    # Next result
-    msg("\n")
-
-def underline_for_result(first_line, first_column, last_line, last_column, line, line_num):
-    """Provide the underline for a result line.
-
-    first_line, first_column, last_line, last_column : 
-        the region from lineinfo(region)
-    line: 
-        the line of source
-    line_num:  
-        the index of line, must satisfy first_line <= line_num <= last_line
-    """
-    # Underline the affected region
-    # col_* use the [start, end) indexing
-    #   From the first non-whitespace char
-    match = re.search("([^\s])+", line)
-    if match:
-        col_from = match.span()[0]
-    else:
-        col_from = 0
-    #   To the last non-whitespace char
-    match = re.search("(\s)+$", line)
-    if match:
-        col_to = match.span()[0]
-    else:
-        col_to = len(line)
-    #   Use 1-indexing
-    col_from += 1 ; col_to += 1
-    #   Adjust first line
-    if line_num == first_line:
-        col_from = max(col_from, first_column)
-    #   Adjust last line
-    if line_num == last_line:
-        col_to = min(col_to, last_column)
-    #   Use 0-indexing
-    col_from -= 1 ; col_to -= 1
-    # Return the underline
-    return " " * col_from + "^" * (col_to - col_from)
-    
-
-def load_lines(root, path, line_from, line_to):
-    """Load the line range [line_from, line_to], including both, 
-    from the file at root/path.  
-    Lines are counted from 1.  
-    Use 1 space for each tab.  This seems to be the codeql handling for beginning of line.
-    Newlines are dropped.
-    """
-    fname = os.path.join(root, path)
-    if not os.path.exists(fname):
-        dbg("Missing file: %s" % fname)
-        return []
-    with codecs.open(fname, 'r', encoding="latin-1") as file:
-        lines = file.readlines()
-        return [line.rstrip("\n\r").replace("\t", " ")
-                for line in lines[line_from-1 : line_to-1+1]]
-
-def lineinfo(region):
-    """ Return sensible values for start/end line/columns for the possibly empty
-    entries in the sarif 'region' structure.
-    """
-    startLine, startColumn, endLine, endColumn = map(
-        lambda e: region.get(e, -1), ['startLine', 'startColumn', 'endLine', 'endColumn'])
-    # Full information is startLine / startColumn / endLine / endcolumn
-    # - only have startLine / startColumn / _ / endcolumn
-    if endLine == -1: endLine = startLine 
-
-    # - only have startLine / _ / _ / endcolumn
-    if startColumn == -1: startColumn = 1 
-
-    return startLine, startColumn, endLine, endColumn
-
-def indices(sarif_struct, *path):
-    """ Return a range for the indices of PATH """
-    return range(0, len(get(sarif_struct, *path)))
-
-def get(sarif_struct, *path):
-    """ Get the sarif entry at PATH """
-    res = sarif_struct
-    for p in path:
-        res = res[p]
-    return res
-
-def msg(message):
-    """ Print message to stdout """
-    sys.stdout.write(message)
-
-def dbg(message):
-    """ Print message to stderr """
-    sys.stdout.flush()
-    sys.stderr.write("warning: %s\n" % message)
-    sys.stderr.flush()
diff --git a/sarif_cli/traverse.py b/sarif_cli/traverse.py
new file mode 100644
index 0000000..9abdda7
--- /dev/null
+++ b/sarif_cli/traverse.py
@@ -0,0 +1,178 @@
+import sys
+import os
+import re
+import codecs
+import csv
+
+MIN_PYTHON = (3, 7)
+if sys.version_info < MIN_PYTHON:
+    sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)
+
+class WholeFile:
+    """ Special case handling: use this class for non-existent regions where the
+    whole file is to be used.
+    """
+
+class NoFile:
+    """ Special case handling: use this class when no file is available.
+    """
+
+def is_sarif_struct(struct):
+    """A quick check to verify that `struct` is in fact a SARIF tree.
+    """
+    return type(struct) == dict and "$schema" in struct and \
+        "sarif" in struct["$schema"] and "version" in struct
+
+def get_csv_writer():
+    """ Set up and return the default csv writer on stdout.
+    """
+    return csv.writer(sys.stdout, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
+
+def write_csv(writer, *columns):
+    """ Print via `writer`, with some additional processing """
+    writer.writerow(columns)
+
+def get_relatedlocation_message_info(related_location):
+    """ Given a relatedLocation, extract message information.
+
+    The relatedLocation typically starts from 
+    get(sarif_struct, 'runs', [int], 'results', [int], 'relatedLocations', [int])
+
+    When used for a threadFlow, extract message information for a location contained in it.
+
+    In this case, the location typically starts from 
+    get(sarif_struct, 'runs', _i, 'results', _i, 'codeFlows', _i, 'threadFlows', _i, 'locations', _i) 
+
+    Returns:  (message, artifact, region) by default
+        For an empty 'physicalLocation' key, returns (message, sarif_cli.NoFile, sarif_cli.NoFile)
+        For an empty 'region' key, returns (message, artifact, sarif_cli.WholeFile)
+    """
+    message = get(related_location, 'message', 'text')
+    if 'physicalLocation' in related_location: 
+        ploc = get(related_location, 'physicalLocation')
+        artifact = ploc.get('artifactLocation')
+        region = ploc.get('region', WholeFile)
+    else:
+        artifact, region = NoFile, NoFile
+    return message, artifact, region
+
+def get_location_message_info(result):
+    """ Given one of the results, extract message information.
+
+    The `result` typically starts from get(sarif_struct, 'runs', run_index, 'results', res_index)
+
+    Returns: (message, artifact, region)
+        For an empty 'region' key, returns (message, artifact, sarif_cli.WholeFile)
+
+    """
+    message = get(result, 'message', 'text')
+    artifact = get(result, 'locations', 0, 'physicalLocation', 'artifactLocation')
+    # If there is no 'region' key, use the whole file
+    region = get(result,  'locations', 0, 'physicalLocation').get('region', WholeFile)
+    return (message, artifact, region)
+
+def display_underlined(l1, c1, l2, c2, line, line_num):
+    """ Display the given line followed by a second line with underscores at the locations.
+    
+    l1, c1, l2, c2: the line/column range
+    line: the line of text
+    line_num: the line number for the text, used with the line/column range
+    """
+    # Display the line
+    msg("%s" % (line))
+    msg("\n")
+    # Print the underline
+    underline = underline_for_result(l1, c1, l2, c2, line, line_num)
+    msg(underline)
+    # Next result
+    msg("\n")
+
+def underline_for_result(first_line, first_column, last_line, last_column, line, line_num):
+    """Provide the underline for a result line.
+
+    first_line, first_column, last_line, last_column : 
+        the region from lineinfo(region)
+    line: 
+        the line of source
+    line_num:  
+        the index of line, must satisfy first_line <= line_num <= last_line
+    """
+    # Underline the affected region
+    # col_* use the [start, end) indexing
+    #   From the first non-whitespace char
+    match = re.search("([^\s])+", line)
+    if match:
+        col_from = match.span()[0]
+    else:
+        col_from = 0
+    #   To the last non-whitespace char
+    match = re.search("(\s)+$", line)
+    if match:
+        col_to = match.span()[0]
+    else:
+        col_to = len(line)
+    #   Use 1-indexing
+    col_from += 1 ; col_to += 1
+    #   Adjust first line
+    if line_num == first_line:
+        col_from = max(col_from, first_column)
+    #   Adjust last line
+    if line_num == last_line:
+        col_to = min(col_to, last_column)
+    #   Use 0-indexing
+    col_from -= 1 ; col_to -= 1
+    # Return the underline
+    return " " * col_from + "^" * (col_to - col_from)
+    
+
+def load_lines(root, path, line_from, line_to):
+    """Load the line range [line_from, line_to], including both, 
+    from the file at root/path.  
+    Lines are counted from 1.  
+    Use 1 space for each tab.  This seems to be the codeql handling for beginning of line.
+    Newlines are dropped.
+    """
+    fname = os.path.join(root, path)
+    if not os.path.exists(fname):
+        dbg("Missing file: %s" % fname)
+        return []
+    with codecs.open(fname, 'r', encoding="latin-1") as file:
+        lines = file.readlines()
+        return [line.rstrip("\n\r").replace("\t", " ")
+                for line in lines[line_from-1 : line_to-1+1]]
+
+def lineinfo(region):
+    """ Return sensible values for start/end line/columns for the possibly empty
+    entries in the sarif 'region' structure.
+    """
+    startLine, startColumn, endLine, endColumn = map(
+        lambda e: region.get(e, -1), ['startLine', 'startColumn', 'endLine', 'endColumn'])
+    # Full information is startLine / startColumn / endLine / endcolumn
+    # - only have startLine / startColumn / _ / endcolumn
+    if endLine == -1: endLine = startLine 
+
+    # - only have startLine / _ / _ / endcolumn
+    if startColumn == -1: startColumn = 1 
+
+    return startLine, startColumn, endLine, endColumn
+
+def indices(sarif_struct, *path):
+    """ Return a range for the indices of PATH """
+    return range(0, len(get(sarif_struct, *path)))
+
+def get(sarif_struct, *path):
+    """ Get the sarif entry at PATH """
+    res = sarif_struct
+    for p in path:
+        res = res[p]
+    return res
+
+def msg(message):
+    """ Print message to stdout """
+    sys.stdout.write(message)
+
+def dbg(message):
+    """ Print message to stderr """
+    sys.stdout.flush()
+    sys.stderr.write("warning: %s\n" % message)
+    sys.stderr.flush()