mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 17:23:03 +01:00
Processing in stages: Move the initial sarif_cli code to sarif_cli/traverse
This commit is contained in:
committed by
=Michael Hohn
parent
7d49c3bd08
commit
ef08825b43
@@ -1,178 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import codecs
|
||||
import csv
|
||||
|
||||
MIN_PYTHON = (3, 7)
|
||||
if sys.version_info < MIN_PYTHON:
|
||||
sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)
|
||||
|
||||
class WholeFile:
|
||||
""" Special case handling: use this class for non-existent regions where the
|
||||
whole file is to be used.
|
||||
"""
|
||||
|
||||
class NoFile:
|
||||
""" Special case handling: use this class when no file is available.
|
||||
"""
|
||||
|
||||
def is_sarif_struct(struct):
|
||||
"""A quick check to verify that `struct` is in fact a SARIF tree.
|
||||
"""
|
||||
return type(struct) == dict and "$schema" in struct and \
|
||||
"sarif" in struct["$schema"] and "version" in struct
|
||||
|
||||
def get_csv_writer():
|
||||
""" Set up and return the default csv writer on stdout.
|
||||
"""
|
||||
return csv.writer(sys.stdout, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
|
||||
|
||||
def write_csv(writer, *columns):
|
||||
""" Print via `writer`, with some additional processing """
|
||||
writer.writerow(columns)
|
||||
|
||||
def get_relatedlocation_message_info(related_location):
|
||||
""" Given a relatedLocation, extract message information.
|
||||
|
||||
The relatedLocation typically starts from
|
||||
get(sarif_struct, 'runs', [int], 'results', [int], 'relatedLocations', [int])
|
||||
|
||||
When used for a threadFlow, extract message information for a location contained in it.
|
||||
|
||||
In this case, the location typically starts from
|
||||
get(sarif_struct, 'runs', _i, 'results', _i, 'codeFlows', _i, 'threadFlows', _i, 'locations', _i)
|
||||
|
||||
Returns: (message, artifact, region) by default
|
||||
For an empty 'physicalLocation' key, returns (message, sarif_cli.NoFile, sarif_cli.NoFile)
|
||||
For an empty 'region' key, returns (message, artifact, sarif_cli.WholeFile)
|
||||
"""
|
||||
message = get(related_location, 'message', 'text')
|
||||
if 'physicalLocation' in related_location:
|
||||
ploc = get(related_location, 'physicalLocation')
|
||||
artifact = ploc.get('artifactLocation')
|
||||
region = ploc.get('region', WholeFile)
|
||||
else:
|
||||
artifact, region = NoFile, NoFile
|
||||
return message, artifact, region
|
||||
|
||||
def get_location_message_info(result):
|
||||
""" Given one of the results, extract message information.
|
||||
|
||||
The `result` typically starts from get(sarif_struct, 'runs', run_index, 'results', res_index)
|
||||
|
||||
Returns: (message, artifact, region)
|
||||
For an empty 'region' key, returns (message, artifact, sarif_cli.WholeFile)
|
||||
|
||||
"""
|
||||
message = get(result, 'message', 'text')
|
||||
artifact = get(result, 'locations', 0, 'physicalLocation', 'artifactLocation')
|
||||
# If there is no 'region' key, use the whole file
|
||||
region = get(result, 'locations', 0, 'physicalLocation').get('region', WholeFile)
|
||||
return (message, artifact, region)
|
||||
|
||||
def display_underlined(l1, c1, l2, c2, line, line_num):
|
||||
""" Display the given line followed by a second line with underscores at the locations.
|
||||
|
||||
l1, c1, l2, c2: the line/column range
|
||||
line: the line of text
|
||||
line_num: the line number for the text, used with the line/column range
|
||||
"""
|
||||
# Display the line
|
||||
msg("%s" % (line))
|
||||
msg("\n")
|
||||
# Print the underline
|
||||
underline = underline_for_result(l1, c1, l2, c2, line, line_num)
|
||||
msg(underline)
|
||||
# Next result
|
||||
msg("\n")
|
||||
|
||||
def underline_for_result(first_line, first_column, last_line, last_column, line, line_num):
|
||||
"""Provide the underline for a result line.
|
||||
|
||||
first_line, first_column, last_line, last_column :
|
||||
the region from lineinfo(region)
|
||||
line:
|
||||
the line of source
|
||||
line_num:
|
||||
the index of line, must satisfy first_line <= line_num <= last_line
|
||||
"""
|
||||
# Underline the affected region
|
||||
# col_* use the [start, end) indexing
|
||||
# From the first non-whitespace char
|
||||
match = re.search("([^\s])+", line)
|
||||
if match:
|
||||
col_from = match.span()[0]
|
||||
else:
|
||||
col_from = 0
|
||||
# To the last non-whitespace char
|
||||
match = re.search("(\s)+$", line)
|
||||
if match:
|
||||
col_to = match.span()[0]
|
||||
else:
|
||||
col_to = len(line)
|
||||
# Use 1-indexing
|
||||
col_from += 1 ; col_to += 1
|
||||
# Adjust first line
|
||||
if line_num == first_line:
|
||||
col_from = max(col_from, first_column)
|
||||
# Adjust last line
|
||||
if line_num == last_line:
|
||||
col_to = min(col_to, last_column)
|
||||
# Use 0-indexing
|
||||
col_from -= 1 ; col_to -= 1
|
||||
# Return the underline
|
||||
return " " * col_from + "^" * (col_to - col_from)
|
||||
|
||||
|
||||
def load_lines(root, path, line_from, line_to):
|
||||
"""Load the line range [line_from, line_to], including both,
|
||||
from the file at root/path.
|
||||
Lines are counted from 1.
|
||||
Use 1 space for each tab. This seems to be the codeql handling for beginning of line.
|
||||
Newlines are dropped.
|
||||
"""
|
||||
fname = os.path.join(root, path)
|
||||
if not os.path.exists(fname):
|
||||
dbg("Missing file: %s" % fname)
|
||||
return []
|
||||
with codecs.open(fname, 'r', encoding="latin-1") as file:
|
||||
lines = file.readlines()
|
||||
return [line.rstrip("\n\r").replace("\t", " ")
|
||||
for line in lines[line_from-1 : line_to-1+1]]
|
||||
|
||||
def lineinfo(region):
|
||||
""" Return sensible values for start/end line/columns for the possibly empty
|
||||
entries in the sarif 'region' structure.
|
||||
"""
|
||||
startLine, startColumn, endLine, endColumn = map(
|
||||
lambda e: region.get(e, -1), ['startLine', 'startColumn', 'endLine', 'endColumn'])
|
||||
# Full information is startLine / startColumn / endLine / endcolumn
|
||||
# - only have startLine / startColumn / _ / endcolumn
|
||||
if endLine == -1: endLine = startLine
|
||||
|
||||
# - only have startLine / _ / _ / endcolumn
|
||||
if startColumn == -1: startColumn = 1
|
||||
|
||||
return startLine, startColumn, endLine, endColumn
|
||||
|
||||
def indices(sarif_struct, *path):
|
||||
""" Return a range for the indices of PATH """
|
||||
return range(0, len(get(sarif_struct, *path)))
|
||||
|
||||
def get(sarif_struct, *path):
|
||||
""" Get the sarif entry at PATH """
|
||||
res = sarif_struct
|
||||
for p in path:
|
||||
res = res[p]
|
||||
return res
|
||||
|
||||
def msg(message):
|
||||
""" Print message to stdout """
|
||||
sys.stdout.write(message)
|
||||
|
||||
def dbg(message):
|
||||
""" Print message to stderr """
|
||||
sys.stdout.flush()
|
||||
sys.stderr.write("warning: %s\n" % message)
|
||||
sys.stderr.flush()
|
||||
|
||||
Reference in New Issue
Block a user