From ef08825b43c0178366b71885cbb8dc80b3976856 Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Wed, 22 Dec 2021 18:03:34 -0800 Subject: [PATCH] Processing in stages: Move the initial sarif_cli code to sarif_cli/traverse --- bin/sarif-digest | 2 +- bin/sarif-labeled | 2 +- bin/sarif-list-files | 2 +- bin/sarif-results-summary | 2 +- sarif_cli/__init__.py | 178 -------------------------------------- sarif_cli/traverse.py | 178 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 182 insertions(+), 182 deletions(-) create mode 100644 sarif_cli/traverse.py diff --git a/bin/sarif-digest b/bin/sarif-digest index 75470ab..94d2eec 100755 --- a/bin/sarif-digest +++ b/bin/sarif-digest @@ -1,6 +1,6 @@ #!/usr/bin/env python import json -import sarif_cli as S +import sarif_cli.traverse as S import sys # TODO command-line: sarif-digest [] diff --git a/bin/sarif-labeled b/bin/sarif-labeled index 4e06158..9868a3d 100755 --- a/bin/sarif-labeled +++ b/bin/sarif-labeled @@ -1,7 +1,7 @@ #!/usr/bin/env python import argparse import json -import sarif_cli as S +import sarif_cli.traverse as S import sys import collections diff --git a/bin/sarif-list-files b/bin/sarif-list-files index 2767208..b009687 100755 --- a/bin/sarif-list-files +++ b/bin/sarif-list-files @@ -1,7 +1,7 @@ #!/usr/bin/env python import argparse import json -import sarif_cli as S +import sarif_cli.traverse as S import sys import collections diff --git a/bin/sarif-results-summary b/bin/sarif-results-summary index 407438f..941db7a 100755 --- a/bin/sarif-results-summary +++ b/bin/sarif-results-summary @@ -1,7 +1,7 @@ #!/usr/bin/env python import argparse import json -import sarif_cli as S +import sarif_cli.traverse as S import re import sys import collections diff --git a/sarif_cli/__init__.py b/sarif_cli/__init__.py index 9abdda7..e69de29 100644 --- a/sarif_cli/__init__.py +++ b/sarif_cli/__init__.py @@ -1,178 +0,0 @@ -import sys -import os -import re -import codecs -import csv - -MIN_PYTHON = (3, 7) -if sys.version_info < MIN_PYTHON: - sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON) - -class WholeFile: - """ Special case handling: use this class for non-existent regions where the - whole file is to be used. - """ - -class NoFile: - """ Special case handling: use this class when no file is available. - """ - -def is_sarif_struct(struct): - """A quick check to verify that `struct` is in fact a SARIF tree. - """ - return type(struct) == dict and "$schema" in struct and \ - "sarif" in struct["$schema"] and "version" in struct - -def get_csv_writer(): - """ Set up and return the default csv writer on stdout. - """ - return csv.writer(sys.stdout, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) - -def write_csv(writer, *columns): - """ Print via `writer`, with some additional processing """ - writer.writerow(columns) - -def get_relatedlocation_message_info(related_location): - """ Given a relatedLocation, extract message information. - - The relatedLocation typically starts from - get(sarif_struct, 'runs', [int], 'results', [int], 'relatedLocations', [int]) - - When used for a threadFlow, extract message information for a location contained in it. - - In this case, the location typically starts from - get(sarif_struct, 'runs', _i, 'results', _i, 'codeFlows', _i, 'threadFlows', _i, 'locations', _i) - - Returns: (message, artifact, region) by default - For an empty 'physicalLocation' key, returns (message, sarif_cli.NoFile, sarif_cli.NoFile) - For an empty 'region' key, returns (message, artifact, sarif_cli.WholeFile) - """ - message = get(related_location, 'message', 'text') - if 'physicalLocation' in related_location: - ploc = get(related_location, 'physicalLocation') - artifact = ploc.get('artifactLocation') - region = ploc.get('region', WholeFile) - else: - artifact, region = NoFile, NoFile - return message, artifact, region - -def get_location_message_info(result): - """ Given one of the results, extract message information. - - The `result` typically starts from get(sarif_struct, 'runs', run_index, 'results', res_index) - - Returns: (message, artifact, region) - For an empty 'region' key, returns (message, artifact, sarif_cli.WholeFile) - - """ - message = get(result, 'message', 'text') - artifact = get(result, 'locations', 0, 'physicalLocation', 'artifactLocation') - # If there is no 'region' key, use the whole file - region = get(result, 'locations', 0, 'physicalLocation').get('region', WholeFile) - return (message, artifact, region) - -def display_underlined(l1, c1, l2, c2, line, line_num): - """ Display the given line followed by a second line with underscores at the locations. - - l1, c1, l2, c2: the line/column range - line: the line of text - line_num: the line number for the text, used with the line/column range - """ - # Display the line - msg("%s" % (line)) - msg("\n") - # Print the underline - underline = underline_for_result(l1, c1, l2, c2, line, line_num) - msg(underline) - # Next result - msg("\n") - -def underline_for_result(first_line, first_column, last_line, last_column, line, line_num): - """Provide the underline for a result line. - - first_line, first_column, last_line, last_column : - the region from lineinfo(region) - line: - the line of source - line_num: - the index of line, must satisfy first_line <= line_num <= last_line - """ - # Underline the affected region - # col_* use the [start, end) indexing - # From the first non-whitespace char - match = re.search("([^\s])+", line) - if match: - col_from = match.span()[0] - else: - col_from = 0 - # To the last non-whitespace char - match = re.search("(\s)+$", line) - if match: - col_to = match.span()[0] - else: - col_to = len(line) - # Use 1-indexing - col_from += 1 ; col_to += 1 - # Adjust first line - if line_num == first_line: - col_from = max(col_from, first_column) - # Adjust last line - if line_num == last_line: - col_to = min(col_to, last_column) - # Use 0-indexing - col_from -= 1 ; col_to -= 1 - # Return the underline - return " " * col_from + "^" * (col_to - col_from) - - -def load_lines(root, path, line_from, line_to): - """Load the line range [line_from, line_to], including both, - from the file at root/path. - Lines are counted from 1. - Use 1 space for each tab. This seems to be the codeql handling for beginning of line. - Newlines are dropped. - """ - fname = os.path.join(root, path) - if not os.path.exists(fname): - dbg("Missing file: %s" % fname) - return [] - with codecs.open(fname, 'r', encoding="latin-1") as file: - lines = file.readlines() - return [line.rstrip("\n\r").replace("\t", " ") - for line in lines[line_from-1 : line_to-1+1]] - -def lineinfo(region): - """ Return sensible values for start/end line/columns for the possibly empty - entries in the sarif 'region' structure. - """ - startLine, startColumn, endLine, endColumn = map( - lambda e: region.get(e, -1), ['startLine', 'startColumn', 'endLine', 'endColumn']) - # Full information is startLine / startColumn / endLine / endcolumn - # - only have startLine / startColumn / _ / endcolumn - if endLine == -1: endLine = startLine - - # - only have startLine / _ / _ / endcolumn - if startColumn == -1: startColumn = 1 - - return startLine, startColumn, endLine, endColumn - -def indices(sarif_struct, *path): - """ Return a range for the indices of PATH """ - return range(0, len(get(sarif_struct, *path))) - -def get(sarif_struct, *path): - """ Get the sarif entry at PATH """ - res = sarif_struct - for p in path: - res = res[p] - return res - -def msg(message): - """ Print message to stdout """ - sys.stdout.write(message) - -def dbg(message): - """ Print message to stderr """ - sys.stdout.flush() - sys.stderr.write("warning: %s\n" % message) - sys.stderr.flush() diff --git a/sarif_cli/traverse.py b/sarif_cli/traverse.py new file mode 100644 index 0000000..9abdda7 --- /dev/null +++ b/sarif_cli/traverse.py @@ -0,0 +1,178 @@ +import sys +import os +import re +import codecs +import csv + +MIN_PYTHON = (3, 7) +if sys.version_info < MIN_PYTHON: + sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON) + +class WholeFile: + """ Special case handling: use this class for non-existent regions where the + whole file is to be used. + """ + +class NoFile: + """ Special case handling: use this class when no file is available. + """ + +def is_sarif_struct(struct): + """A quick check to verify that `struct` is in fact a SARIF tree. + """ + return type(struct) == dict and "$schema" in struct and \ + "sarif" in struct["$schema"] and "version" in struct + +def get_csv_writer(): + """ Set up and return the default csv writer on stdout. + """ + return csv.writer(sys.stdout, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) + +def write_csv(writer, *columns): + """ Print via `writer`, with some additional processing """ + writer.writerow(columns) + +def get_relatedlocation_message_info(related_location): + """ Given a relatedLocation, extract message information. + + The relatedLocation typically starts from + get(sarif_struct, 'runs', [int], 'results', [int], 'relatedLocations', [int]) + + When used for a threadFlow, extract message information for a location contained in it. + + In this case, the location typically starts from + get(sarif_struct, 'runs', _i, 'results', _i, 'codeFlows', _i, 'threadFlows', _i, 'locations', _i) + + Returns: (message, artifact, region) by default + For an empty 'physicalLocation' key, returns (message, sarif_cli.NoFile, sarif_cli.NoFile) + For an empty 'region' key, returns (message, artifact, sarif_cli.WholeFile) + """ + message = get(related_location, 'message', 'text') + if 'physicalLocation' in related_location: + ploc = get(related_location, 'physicalLocation') + artifact = ploc.get('artifactLocation') + region = ploc.get('region', WholeFile) + else: + artifact, region = NoFile, NoFile + return message, artifact, region + +def get_location_message_info(result): + """ Given one of the results, extract message information. + + The `result` typically starts from get(sarif_struct, 'runs', run_index, 'results', res_index) + + Returns: (message, artifact, region) + For an empty 'region' key, returns (message, artifact, sarif_cli.WholeFile) + + """ + message = get(result, 'message', 'text') + artifact = get(result, 'locations', 0, 'physicalLocation', 'artifactLocation') + # If there is no 'region' key, use the whole file + region = get(result, 'locations', 0, 'physicalLocation').get('region', WholeFile) + return (message, artifact, region) + +def display_underlined(l1, c1, l2, c2, line, line_num): + """ Display the given line followed by a second line with underscores at the locations. + + l1, c1, l2, c2: the line/column range + line: the line of text + line_num: the line number for the text, used with the line/column range + """ + # Display the line + msg("%s" % (line)) + msg("\n") + # Print the underline + underline = underline_for_result(l1, c1, l2, c2, line, line_num) + msg(underline) + # Next result + msg("\n") + +def underline_for_result(first_line, first_column, last_line, last_column, line, line_num): + """Provide the underline for a result line. + + first_line, first_column, last_line, last_column : + the region from lineinfo(region) + line: + the line of source + line_num: + the index of line, must satisfy first_line <= line_num <= last_line + """ + # Underline the affected region + # col_* use the [start, end) indexing + # From the first non-whitespace char + match = re.search("([^\s])+", line) + if match: + col_from = match.span()[0] + else: + col_from = 0 + # To the last non-whitespace char + match = re.search("(\s)+$", line) + if match: + col_to = match.span()[0] + else: + col_to = len(line) + # Use 1-indexing + col_from += 1 ; col_to += 1 + # Adjust first line + if line_num == first_line: + col_from = max(col_from, first_column) + # Adjust last line + if line_num == last_line: + col_to = min(col_to, last_column) + # Use 0-indexing + col_from -= 1 ; col_to -= 1 + # Return the underline + return " " * col_from + "^" * (col_to - col_from) + + +def load_lines(root, path, line_from, line_to): + """Load the line range [line_from, line_to], including both, + from the file at root/path. + Lines are counted from 1. + Use 1 space for each tab. This seems to be the codeql handling for beginning of line. + Newlines are dropped. + """ + fname = os.path.join(root, path) + if not os.path.exists(fname): + dbg("Missing file: %s" % fname) + return [] + with codecs.open(fname, 'r', encoding="latin-1") as file: + lines = file.readlines() + return [line.rstrip("\n\r").replace("\t", " ") + for line in lines[line_from-1 : line_to-1+1]] + +def lineinfo(region): + """ Return sensible values for start/end line/columns for the possibly empty + entries in the sarif 'region' structure. + """ + startLine, startColumn, endLine, endColumn = map( + lambda e: region.get(e, -1), ['startLine', 'startColumn', 'endLine', 'endColumn']) + # Full information is startLine / startColumn / endLine / endcolumn + # - only have startLine / startColumn / _ / endcolumn + if endLine == -1: endLine = startLine + + # - only have startLine / _ / _ / endcolumn + if startColumn == -1: startColumn = 1 + + return startLine, startColumn, endLine, endColumn + +def indices(sarif_struct, *path): + """ Return a range for the indices of PATH """ + return range(0, len(get(sarif_struct, *path))) + +def get(sarif_struct, *path): + """ Get the sarif entry at PATH """ + res = sarif_struct + for p in path: + res = res[p] + return res + +def msg(message): + """ Print message to stdout """ + sys.stdout.write(message) + +def dbg(message): + """ Print message to stderr """ + sys.stdout.flush() + sys.stderr.write("warning: %s\n" % message) + sys.stderr.flush()