Merge pull request #3772 from github/koesie10/only-compare-source-sink

Only compare source and sink in SARIF comparison
This commit is contained in:
Koen Vlaswinkel
2024-10-28 13:53:36 +01:00
committed by GitHub
4 changed files with 37373 additions and 39 deletions

View File

@@ -1,4 +1,4 @@
import type { Location, Result } from "sarif"; import type { Location, Result, ThreadFlowLocation } from "sarif";
function toCanonicalLocation(location: Location): Location { function toCanonicalLocation(location: Location): Location {
if (location.physicalLocation?.artifactLocation?.index !== undefined) { if (location.physicalLocation?.artifactLocation?.index !== undefined) {
@@ -25,6 +25,19 @@ function toCanonicalLocation(location: Location): Location {
return location; return location;
} }
function toCanonicalThreadFlowLocation(
threadFlowLocation: ThreadFlowLocation,
): ThreadFlowLocation {
if (threadFlowLocation.location) {
return {
...threadFlowLocation,
location: toCanonicalLocation(threadFlowLocation.location),
};
}
return threadFlowLocation;
}
function toCanonicalResult(result: Result): Result { function toCanonicalResult(result: Result): Result {
const canonicalResult = { const canonicalResult = {
...result, ...result,
@@ -40,37 +53,30 @@ function toCanonicalResult(result: Result): Result {
canonicalResult.relatedLocations.map(toCanonicalLocation); canonicalResult.relatedLocations.map(toCanonicalLocation);
} }
if (canonicalResult.codeFlows) { if (canonicalResult.codeFlows && canonicalResult.codeFlows.length > 0) {
canonicalResult.codeFlows = canonicalResult.codeFlows.map((codeFlow) => { // If there are codeFlows, we don't want to compare the full codeFlows. Instead, we just want to compare the
if (codeFlow.threadFlows) { // source and the sink (i.e. the first and last item). CodeQL should guarantee that the first and last threadFlow
return { // of every codeFlow is the same (i.e. every codeFlow has the same source and sink). Therefore, we just compare the
...codeFlow, // first codeFlow and ignore the other codeFlows completely.
threadFlows: codeFlow.threadFlows.map((threadFlow) => { // If the codeFlow has a length of 1, this doesn't change the result.
if (threadFlow.locations) {
return {
...threadFlow,
locations: threadFlow.locations.map((threadFlowLocation) => {
if (threadFlowLocation.location) {
return {
...threadFlowLocation,
location: toCanonicalLocation(
threadFlowLocation.location,
),
};
}
return threadFlowLocation; const source = {
}), ...canonicalResult.codeFlows[0].threadFlows[0],
}; };
} const sink = {
...canonicalResult.codeFlows[0].threadFlows[
canonicalResult.codeFlows[0].threadFlows.length - 1
],
};
source.locations = source.locations.map(toCanonicalThreadFlowLocation);
sink.locations = sink.locations.map(toCanonicalThreadFlowLocation);
return threadFlow; canonicalResult.codeFlows = [
}), {
}; ...canonicalResult.codeFlows[0],
} threadFlows: [source, sink],
},
return codeFlow; ];
});
} }
return canonicalResult; return canonicalResult;
@@ -79,11 +85,9 @@ function toCanonicalResult(result: Result): Result {
/** /**
* Compare the alerts of two queries. Use deep equality to determine if * Compare the alerts of two queries. Use deep equality to determine if
* results have been added or removed across two invocations of a query. * results have been added or removed across two invocations of a query.
* * It first canonicalizes the results to ensure that when small changes
* Assumptions: * to the query are made, the results are still considered the same. This
* * includes the removal of all paths except for the source and sink.
* 1. Queries have the same sort order
* 2. Results are not changed or re-ordered, they are only added or removed
* *
* @param fromResults the source query * @param fromResults the source query
* @param toResults the target query * @param toResults the target query
@@ -104,19 +108,30 @@ export function sarifDiff(fromResults: Result[], toResults: Result[]) {
const canonicalFromResults = fromResults.map(toCanonicalResult); const canonicalFromResults = fromResults.map(toCanonicalResult);
const canonicalToResults = toResults.map(toCanonicalResult); const canonicalToResults = toResults.map(toCanonicalResult);
const results = { const diffResults = {
from: arrayDiff(canonicalFromResults, canonicalToResults), from: arrayDiff(canonicalFromResults, canonicalToResults),
to: arrayDiff(canonicalToResults, canonicalFromResults), to: arrayDiff(canonicalToResults, canonicalFromResults),
}; };
if ( if (
fromResults.length === results.from.length && fromResults.length === diffResults.from.length &&
toResults.length === results.to.length toResults.length === diffResults.to.length
) { ) {
throw new Error("CodeQL Compare: No overlap between the selected queries."); throw new Error("CodeQL Compare: No overlap between the selected queries.");
} }
return results; // We don't want to return the canonical results, we want to return the original results.
// We can retrieve this by finding the index of the canonical result in the canonical results
// and then using that index to find the original result. This is possible because we know that
// we did a 1-to-1 map between the canonical results and the original results.
return {
from: diffResults.from.map(
(result) => fromResults[canonicalFromResults.indexOf(result)],
),
to: diffResults.to.map(
(result) => toResults[canonicalToResults.indexOf(result)],
),
};
} }
function arrayDiff<T>(source: readonly T[], toRemove: readonly T[]): T[] { function arrayDiff<T>(source: readonly T[], toRemove: readonly T[]): T[] {

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,7 @@
import type { Result } from "sarif"; import type { Result } from "sarif";
import { sarifDiff } from "../../../src/compare/sarif-diff"; import { sarifDiff } from "../../../src/compare/sarif-diff";
import { readJson } from "fs-extra";
import { resolve } from "path";
describe("sarifDiff", () => { describe("sarifDiff", () => {
const result1: Result = { const result1: Result = {
@@ -496,6 +498,28 @@ describe("sarifDiff", () => {
}); });
}); });
it("only compares the source and sink of a result", async () => {
const { result1, result2 } = (await readJson(
resolve(__dirname, "differentPathsSameSourceSink.json"),
)) as { result1: Result; result2: Result };
expect(sarifDiff([result1], [result2])).toEqual({
from: [],
to: [],
});
});
it("gives a diff when the source and sink of a result differ", async () => {
const { result1, result2 } = (await readJson(
resolve(__dirname, "differentPathsDifferentSourceSink.json"),
)) as { result1: Result; result2: Result };
expect(sarifDiff([result1, result2], [result2])).toEqual({
from: [result1],
to: [],
});
});
it("does not modify the input", () => { it("does not modify the input", () => {
const result1: Result = { const result1: Result = {
message: { message: {