Move sarif parsing code to a location that can be shared.
This commit is contained in:
124
extensions/ql-vscode/src/sarif-utils.ts
Normal file
124
extensions/ql-vscode/src/sarif-utils.ts
Normal file
@@ -0,0 +1,124 @@
|
||||
import * as Sarif from "sarif"
|
||||
import * as path from "path"
|
||||
import { LocationStyle, ResolvableLocationValue } from "semmle-bqrs";
|
||||
|
||||
export interface SarifLink {
|
||||
dest: number
|
||||
text: string
|
||||
}
|
||||
|
||||
|
||||
type ParsedSarifLocation =
|
||||
| ResolvableLocationValue
|
||||
// Resolvable locations have a `file` field, but it will sometimes include
|
||||
// a source location prefix, which contains build-specific information the user
|
||||
// doesn't really need to see. We ensure that `userVisibleFile` will not contain
|
||||
// that, and is appropriate for display in the UI.
|
||||
& { userVisibleFile: string }
|
||||
| { t: 'NoLocation', hint: string };
|
||||
|
||||
export type SarifMessageComponent = string | SarifLink
|
||||
|
||||
/**
|
||||
* Unescape "[", "]" and "\\" like in sarif plain text messages
|
||||
*/
|
||||
export function unescapeSarifText(message: string): string {
|
||||
return message.replace(/\\\[/g, "[").replace(/\\\]/g, "]").replace(/\\\\/, "\\");
|
||||
}
|
||||
|
||||
export function parseSarifPlainTextMessage(message: string): SarifMessageComponent[] {
|
||||
let results: SarifMessageComponent[] = [];
|
||||
|
||||
// We want something like "[linkText](4)", except that "[" and "]" may be escaped. The lookbehind asserts
|
||||
// that the initial [ is not escaped. Then we parse a link text with "[" and "]" escaped. Then we parse the numerical target.
|
||||
// Technically we could have any uri in the target but we don't output that yet.
|
||||
// The possibility of escaping outside the link is not mentioned in the sarif spec but we always output sartif this way.
|
||||
const linkRegex = /(?<=(?<!\\)(\\\\)*)\[(?<linkText>([^\\\]\[]|\\\\|\\\]|\\\[)*)\]\((?<linkTarget>[0-9]+)\)/g;
|
||||
let result: RegExpExecArray | null;
|
||||
let curIndex = 0;
|
||||
while ((result = linkRegex.exec(message)) !== null) {
|
||||
results.push(unescapeSarifText(message.substring(curIndex, result.index)));
|
||||
const linkText = result.groups!["linkText"];
|
||||
const linkTarget = +result.groups!["linkTarget"];
|
||||
results.push({ dest: linkTarget, text: unescapeSarifText(linkText) });
|
||||
curIndex = result.index + result[0].length;
|
||||
}
|
||||
results.push(unescapeSarifText(message.substring(curIndex, message.length)));
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes a path normalized to reflect conventional normalization
|
||||
* of windows paths into zip archive paths.
|
||||
* @param sourceLocationPrefix The source location prefix of a database. May be
|
||||
* unix style `/foo/bar/baz` or windows-style `C:\foo\bar\baz`.
|
||||
* @param sarifRelativeUri A uri relative to sourceLocationPrefix.
|
||||
* @returns A string that is valid for the `.file` field of a `FivePartLocation`:
|
||||
* directory separators are normalized, but drive letters `C:` may appear.
|
||||
*/
|
||||
export function getPathRelativeToSourceLocationPrefix(sourceLocationPrefix: string, sarifRelativeUui: string) {
|
||||
const normalizedSourceLocationPrefix = sourceLocationPrefix.replace(/\\/g, '/');
|
||||
return path.join(normalizedSourceLocationPrefix, decodeURIComponent(sarifRelativeUui));
|
||||
}
|
||||
|
||||
export function parseSarifLocation(loc: Sarif.Location, sourceLocationPrefix: string): ParsedSarifLocation {
|
||||
const physicalLocation = loc.physicalLocation;
|
||||
if (physicalLocation === undefined)
|
||||
return { t: 'NoLocation', hint: 'no physical location' };
|
||||
if (physicalLocation.artifactLocation === undefined)
|
||||
return { t: 'NoLocation', hint: 'no artifact location' };
|
||||
if (physicalLocation.artifactLocation.uri === undefined)
|
||||
return { t: 'NoLocation', hint: 'artifact location has no uri' };
|
||||
|
||||
// This is not necessarily really an absolute uri; it could either be a
|
||||
// file uri or a relative uri.
|
||||
const uri = physicalLocation.artifactLocation.uri;
|
||||
|
||||
const fileUriRegex = /^file:/;
|
||||
const effectiveLocation = uri.match(fileUriRegex) ?
|
||||
decodeURIComponent(uri.replace(fileUriRegex, '')) :
|
||||
getPathRelativeToSourceLocationPrefix(sourceLocationPrefix, uri);
|
||||
const userVisibleFile = uri.match(fileUriRegex) ?
|
||||
decodeURIComponent(uri.replace(fileUriRegex, '')) :
|
||||
uri;
|
||||
|
||||
if (physicalLocation.region === undefined) {
|
||||
// If the region property is absent, the physicalLocation object refers to the entire file.
|
||||
// Source: https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Toc16012638.
|
||||
// TODO: Do we get here if we provide a non-filesystem URL?
|
||||
return {
|
||||
t: LocationStyle.WholeFile,
|
||||
file: effectiveLocation,
|
||||
userVisibleFile,
|
||||
};
|
||||
} else {
|
||||
const region = physicalLocation.region;
|
||||
// We assume that the SARIF we're given always has startLine
|
||||
// This is not mandated by the SARIF spec, but should be true of
|
||||
// SARIF output by our own tools.
|
||||
const lineStart = region.startLine!;
|
||||
|
||||
// These defaults are from SARIF 2.1.0 spec, section 3.30.2, "Text Regions"
|
||||
// https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Ref493492556
|
||||
const lineEnd = region.endLine === undefined ? lineStart : region.endLine;
|
||||
const colStart = region.startColumn === undefined ? 1 : region.startColumn;
|
||||
|
||||
// We also assume that our tools will always supply `endColumn` field, which is
|
||||
// fortunate, since the SARIF spec says that it defaults to the end of the line, whose
|
||||
// length we don't know at this point in the code.
|
||||
//
|
||||
// It is off by one with respect to the way vscode counts columns in selections.
|
||||
const colEnd = region.endColumn! - 1;
|
||||
|
||||
return {
|
||||
t: LocationStyle.FivePart,
|
||||
file: effectiveLocation,
|
||||
userVisibleFile,
|
||||
lineStart,
|
||||
colStart,
|
||||
lineEnd,
|
||||
colEnd,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -2,10 +2,11 @@ import * as path from 'path';
|
||||
import * as React from 'react';
|
||||
import * as Sarif from 'sarif';
|
||||
import * as Keys from '../result-keys';
|
||||
import { LocationStyle, ResolvableLocationValue } from 'semmle-bqrs';
|
||||
import { LocationStyle } from 'semmle-bqrs';
|
||||
import * as octicons from './octicons';
|
||||
import { className, renderLocation, ResultTableProps, zebraStripe, selectableZebraStripe, jumpToLocation } from './result-table-utils';
|
||||
import { PathTableResultSet, onNavigation, NavigationEvent } from './results';
|
||||
import { parseSarifPlainTextMessage, parseSarifLocation } from '../sarif-utils';
|
||||
|
||||
export type PathTableProps = ResultTableProps & { resultSet: PathTableResultSet };
|
||||
export interface PathTableState {
|
||||
@@ -13,64 +14,6 @@ export interface PathTableState {
|
||||
selectedPathNode: undefined | Keys.PathNode;
|
||||
}
|
||||
|
||||
interface SarifLink {
|
||||
dest: number
|
||||
text: string
|
||||
}
|
||||
|
||||
type ParsedSarifLocation =
|
||||
| ResolvableLocationValue
|
||||
// Resolvable locations have a `file` field, but it will sometimes include
|
||||
// a source location prefix, which contains build-specific information the user
|
||||
// doesn't really need to see. We ensure that `userVisibleFile` will not contain
|
||||
// that, and is appropriate for display in the UI.
|
||||
& { userVisibleFile: string }
|
||||
| { t: 'NoLocation', hint: string };
|
||||
|
||||
type SarifMessageComponent = string | SarifLink
|
||||
|
||||
/**
|
||||
* Unescape "[", "]" and "\\" like in sarif plain text messages
|
||||
*/
|
||||
function unescapeSarifText(message: string): string {
|
||||
return message.replace(/\\\[/g, "[").replace(/\\\]/g, "]").replace(/\\\\/, "\\");
|
||||
}
|
||||
|
||||
function parseSarifPlainTextMessage(message: string): SarifMessageComponent[] {
|
||||
let results: SarifMessageComponent[] = [];
|
||||
|
||||
// We want something like "[linkText](4)", except that "[" and "]" may be escaped. The lookbehind asserts
|
||||
// that the initial [ is not escaped. Then we parse a link text with "[" and "]" escaped. Then we parse the numerical target.
|
||||
// Technically we could have any uri in the target but we don't output that yet.
|
||||
// The possibility of escaping outside the link is not mentioned in the sarif spec but we always output sartif this way.
|
||||
const linkRegex = /(?<=(?<!\\)(\\\\)*)\[(?<linkText>([^\\\]\[]|\\\\|\\\]|\\\[)*)\]\((?<linkTarget>[0-9]+)\)/g;
|
||||
let result: RegExpExecArray | null;
|
||||
let curIndex = 0;
|
||||
while ((result = linkRegex.exec(message)) !== null) {
|
||||
results.push(unescapeSarifText(message.substring(curIndex, result.index)));
|
||||
const linkText = result.groups!["linkText"];
|
||||
const linkTarget = +result.groups!["linkTarget"];
|
||||
results.push({ dest: linkTarget, text: unescapeSarifText(linkText) });
|
||||
curIndex = result.index + result[0].length;
|
||||
}
|
||||
results.push(unescapeSarifText(message.substring(curIndex, message.length)));
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a path normalized to reflect conventional normalization
|
||||
* of windows paths into zip archive paths.
|
||||
* @param sourceLocationPrefix The source location prefix of a database. May be
|
||||
* unix style `/foo/bar/baz` or windows-style `C:\foo\bar\baz`.
|
||||
* @param sarifRelativeUri A uri relative to sourceLocationPrefix.
|
||||
* @returns A string that is valid for the `.file` field of a `FivePartLocation`:
|
||||
* directory separators are normalized, but drive letters `C:` may appear.
|
||||
*/
|
||||
export function getPathRelativeToSourceLocationPrefix(sourceLocationPrefix: string, sarifRelativeUui: string) {
|
||||
const normalizedSourceLocationPrefix = sourceLocationPrefix.replace(/\\/g, '/');
|
||||
return path.join(normalizedSourceLocationPrefix, decodeURIComponent(sarifRelativeUui));
|
||||
}
|
||||
|
||||
export class PathTable extends React.Component<PathTableProps, PathTableState> {
|
||||
constructor(props: PathTableProps) {
|
||||
super(props);
|
||||
@@ -323,64 +266,3 @@ export class PathTable extends React.Component<PathTableProps, PathTableState> {
|
||||
onNavigation.removeListener(this.handleNavigationEvent);
|
||||
}
|
||||
}
|
||||
|
||||
function parseSarifLocation(loc: Sarif.Location, sourceLocationPrefix: string): ParsedSarifLocation {
|
||||
const physicalLocation = loc.physicalLocation;
|
||||
if (physicalLocation === undefined)
|
||||
return { t: 'NoLocation', hint: 'no physical location' };
|
||||
if (physicalLocation.artifactLocation === undefined)
|
||||
return { t: 'NoLocation', hint: 'no artifact location' };
|
||||
if (physicalLocation.artifactLocation.uri === undefined)
|
||||
return { t: 'NoLocation', hint: 'artifact location has no uri' };
|
||||
|
||||
// This is not necessarily really an absolute uri; it could either be a
|
||||
// file uri or a relative uri.
|
||||
const uri = physicalLocation.artifactLocation.uri;
|
||||
|
||||
const fileUriRegex = /^file:/;
|
||||
const effectiveLocation = uri.match(fileUriRegex) ?
|
||||
decodeURIComponent(uri.replace(fileUriRegex, '')) :
|
||||
getPathRelativeToSourceLocationPrefix(sourceLocationPrefix, uri);
|
||||
const userVisibleFile = uri.match(fileUriRegex) ?
|
||||
decodeURIComponent(uri.replace(fileUriRegex, '')) :
|
||||
uri;
|
||||
|
||||
if (physicalLocation.region === undefined) {
|
||||
// If the region property is absent, the physicalLocation object refers to the entire file.
|
||||
// Source: https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Toc16012638.
|
||||
// TODO: Do we get here if we provide a non-filesystem URL?
|
||||
return {
|
||||
t: LocationStyle.WholeFile,
|
||||
file: effectiveLocation,
|
||||
userVisibleFile,
|
||||
};
|
||||
} else {
|
||||
const region = physicalLocation.region;
|
||||
// We assume that the SARIF we're given always has startLine
|
||||
// This is not mandated by the SARIF spec, but should be true of
|
||||
// SARIF output by our own tools.
|
||||
const lineStart = region.startLine!;
|
||||
|
||||
// These defaults are from SARIF 2.1.0 spec, section 3.30.2, "Text Regions"
|
||||
// https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Ref493492556
|
||||
const lineEnd = region.endLine === undefined ? lineStart : region.endLine;
|
||||
const colStart = region.startColumn === undefined ? 1 : region.startColumn;
|
||||
|
||||
// We also assume that our tools will always supply `endColumn` field, which is
|
||||
// fortunate, since the SARIF spec says that it defaults to the end of the line, whose
|
||||
// length we don't know at this point in the code.
|
||||
//
|
||||
// It is off by one with respect to the way vscode counts columns in selections.
|
||||
const colEnd = region.endColumn! - 1;
|
||||
|
||||
return {
|
||||
t: LocationStyle.FivePart,
|
||||
file: effectiveLocation,
|
||||
userVisibleFile,
|
||||
lineStart,
|
||||
colStart,
|
||||
lineEnd,
|
||||
colEnd,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user