Ensure backslashes are properly escaped in sarif messages

Problem was that we were not globally replaceing `\\` with `\`.

Also, this PR adds some new tests to sarif-utils.ts. In doing so, we
have fixed a small bug in getPathRelativeToSourceLocationPrefix.

Previously, we were uri decoding the sarifRelativeUri. However, this is
no longer correct because the result is another URI and it should
remain encoded if it originally was.

Resolves #585
This commit is contained in:
Andrew Eisenberg
2020-09-29 12:28:31 -07:00
parent 7ca456d6a0
commit 06b6a4705a
3 changed files with 137 additions and 49 deletions

View File

@@ -15,7 +15,10 @@ interface NoLocation {
}
type ParsedSarifLocation =
| (ResolvableLocationValue & { userVisibleFile: string })
| (ResolvableLocationValue & {
userVisibleFile: string;
})
// Resolvable locations have a `uri` field, but it will sometimes include
// a source location prefix, which contains build-specific information the user
// doesn't really need to see. We ensure that `userVisibleFile` will not contain
@@ -28,7 +31,10 @@ export type SarifMessageComponent = string | SarifLink
* Unescape "[", "]" and "\\" like in sarif plain text messages
*/
export function unescapeSarifText(message: string): string {
return message.replace(/\\\[/g, '[').replace(/\\\]/g, ']').replace(/\\\\/, '\\');
return message
.replace(/\\\[/g, '[')
.replace(/\\\]/g, ']')
.replace(/\\\\/g, '\\');
}
export function parseSarifPlainTextMessage(message: string): SarifMessageComponent[] {
@@ -59,17 +65,18 @@ export function parseSarifPlainTextMessage(message: string): SarifMessageCompone
* @param sourceLocationPrefix The source location prefix of a database. May be
* unix style `/foo/bar/baz` or windows-style `C:\foo\bar\baz`.
* @param sarifRelativeUri A uri relative to sourceLocationPrefix.
* @returns A string that is valid for the `.file` field of a `FivePartLocation`:
*
* @returns A URI string that is valid for the `.file` field of a `FivePartLocation`:
* directory separators are normalized, but drive letters `C:` may appear.
*/
export function getPathRelativeToSourceLocationPrefix(
sourceLocationPrefix: string,
sarifRelativeUui: string
sarifRelativeUri: string
) {
const normalizedSourceLocationPrefix = sourceLocationPrefix.replace(/\\/g, '/');
return `file:${
path.join(normalizedSourceLocationPrefix, decodeURIComponent(sarifRelativeUui))
}`;
path.join(normalizedSourceLocationPrefix, sarifRelativeUri)
}`;
}
export function parseSarifLocation(
@@ -89,12 +96,13 @@ export function parseSarifLocation(
const uri = physicalLocation.artifactLocation.uri;
const fileUriRegex = /^file:/;
const effectiveLocation = uri.match(fileUriRegex)
const hasFilePrefix = uri.match(fileUriRegex);
const effectiveLocation = hasFilePrefix
? uri
: getPathRelativeToSourceLocationPrefix(sourceLocationPrefix, uri);
const userVisibleFile = uri.match(fileUriRegex)
? decodeURIComponent(uri.replace(fileUriRegex, ''))
: uri;
const userVisibleFile = decodeURIComponent(hasFilePrefix
? uri.replace(fileUriRegex, '')
: uri);
if (physicalLocation.region === undefined) {
// If the region property is absent, the physicalLocation object refers to the entire file.

View File

@@ -1,39 +0,0 @@
import 'mocha';
import { expect } from 'chai';
import { parseSarifPlainTextMessage } from '../../sarif-utils';
describe('parsing sarif', () => {
it('should be able to parse a simple message from the spec', async function() {
const message = 'Tainted data was used. The data came from [here](3).';
const results = parseSarifPlainTextMessage(message);
expect(results).to.deep.equal([
'Tainted data was used. The data came from ',
{ dest: 3, text: 'here' }, '.'
]);
});
it('should be able to parse a complex message from the spec', async function() {
const message = 'Prohibited term used in [para\\[0\\]\\\\spans\\[2\\]](1).';
const results = parseSarifPlainTextMessage(message);
expect(results).to.deep.equal([
'Prohibited term used in ',
{ dest: 1, text: 'para[0]\\spans[2]' }, '.'
]);
});
it('should be able to parse a broken complex message from the spec', async function() {
const message = 'Prohibited term used in [para\\[0\\]\\\\spans\\[2\\](1).';
const results = parseSarifPlainTextMessage(message);
expect(results).to.deep.equal([
'Prohibited term used in [para[0]\\spans[2](1).'
]);
});
it('should be able to parse a message with extra escaping the spec', async function() {
const message = 'Tainted data was used. The data came from \\[here](3).';
const results = parseSarifPlainTextMessage(message);
expect(results).to.deep.equal([
'Tainted data was used. The data came from [here](3).'
]);
});
});

View File

@@ -0,0 +1,119 @@
import 'mocha';
import { expect } from 'chai';
import * as Sarif from 'sarif';
import { getPathRelativeToSourceLocationPrefix, parseSarifLocation, parseSarifPlainTextMessage, unescapeSarifText } from '../../src/sarif-utils';
describe('parsing sarif', () => {
it('should be able to parse a simple message from the spec', async function() {
const message = 'Tainted data was used. The data came from [here](3).';
const results = parseSarifPlainTextMessage(message);
expect(results).to.deep.equal([
'Tainted data was used. The data came from ',
{ dest: 3, text: 'here' }, '.'
]);
});
it('should be able to parse a complex message from the spec', async function() {
const message = 'Prohibited term used in [para\\[0\\]\\\\spans\\[2\\]](1).';
const results = parseSarifPlainTextMessage(message);
expect(results).to.deep.equal([
'Prohibited term used in ',
{ dest: 1, text: 'para[0]\\spans[2]' }, '.'
]);
});
it('should be able to parse a broken complex message from the spec', async function() {
const message = 'Prohibited term used in [para\\[0\\]\\\\spans\\[2\\](1).';
const results = parseSarifPlainTextMessage(message);
expect(results).to.deep.equal([
'Prohibited term used in [para[0]\\spans[2](1).'
]);
});
it('should be able to parse a message with extra escaping the spec', async function() {
const message = 'Tainted data was used. The data came from \\[here](3).';
const results = parseSarifPlainTextMessage(message);
expect(results).to.deep.equal([
'Tainted data was used. The data came from [here](3).'
]);
});
it('should unescape sarif text', () => {
expect(unescapeSarifText('\\\\ \\\\ \\[ \\[ \\] \\]')).to.eq('\\ \\ [ [ ] ]');
// Also show that unescaped special chars are unchanged...is this correct?
expect(unescapeSarifText('\\ \\ [ [ ] ]')).to.eq('\\ \\ [ [ ] ]');
});
it('should normalize source locations', () => {
expect(getPathRelativeToSourceLocationPrefix('C:\\a\\b', '?x=test'))
.to.eq('file:C:/a/b/?x=test');
expect(getPathRelativeToSourceLocationPrefix('C:\\a\\b', '%3Fx%3Dtest'))
.to.eq('file:C:/a/b/%3Fx%3Dtest');
});
describe('parseSarifLocation', () => {
it('should parse a sarif location with "no location"', () => {
expect(parseSarifLocation({ }, '')).to.deep.equal({
hint: 'no physical location'
});
expect(parseSarifLocation({ physicalLocation: {} }, '')).to.deep.equal({
hint: 'no artifact location'
});
expect(parseSarifLocation({ physicalLocation: { artifactLocation: { } } }, '')).to.deep.equal({
hint: 'artifact location has no uri'
});
});
it('should parse a sarif location with no region and no file protocol', () => {
const location: Sarif.Location = {
physicalLocation: {
artifactLocation: {
uri: 'abc?x=test'
}
}
};
expect(parseSarifLocation(location, 'prefix')).to.deep.equal({
uri: 'file:prefix/abc?x=test',
userVisibleFile: 'abc?x=test'
});
});
it('should parse a sarif location with no region and file protocol', () => {
const location: Sarif.Location = {
physicalLocation: {
artifactLocation: {
uri: 'file:abc%3Fx%3Dtest'
}
}
};
expect(parseSarifLocation(location, 'prefix')).to.deep.equal({
uri: 'file:abc%3Fx%3Dtest',
userVisibleFile: 'abc?x=test'
});
});
it('should parse a sarif location with a region and file protocol', () => {
const location: Sarif.Location = {
physicalLocation: {
artifactLocation: {
uri: 'file:abc%3Fx%3Dtest'
},
region: {
startLine: 1,
startColumn: 2,
endLine: 3,
endColumn: 4
}
}
};
expect(parseSarifLocation(location, 'prefix')).to.deep.equal({
uri: 'file:abc%3Fx%3Dtest',
userVisibleFile: 'abc?x=test',
startLine: 1,
startColumn: 2,
endLine: 3,
endColumn: 3
});
});
});
});