Limit SARIF code snippet size

This adds a new filtering on SARIF code snippets for very large code
snippets (defined as 8MB or more). If less than 1% of such a snippet
is highlighted, it will not include the code snippet in the analysed
results, and it will thus not be shown in the UI.

This is to avoid very large SARIF files that can cause the extension
host to crash when the analysis results are send to the UI. I don't
think any of these snippets would ever be useful to show, so it should
be fine to just not include them.
This commit is contained in:
Koen Vlaswinkel
2022-12-02 15:33:44 +01:00
parent c9d6bfd32e
commit c0187a5650
2 changed files with 40 additions and 6 deletions

View File

@@ -1,5 +1,5 @@
import * as Sarif from "sarif";
import { HighlightedRegion } from "../remote-queries/shared/analysis-result";
import type { HighlightedRegion } from "../remote-queries/shared/analysis-result";
import { ResolvableLocationValue } from "./bqrs-cli-types";
export interface SarifLink {

View File

@@ -1,5 +1,6 @@
import * as sarif from "sarif";
import {
parseHighlightedLine,
parseSarifPlainTextMessage,
parseSarifRegion,
} from "../pure/sarif-utils";
@@ -15,6 +16,11 @@ import {
HighlightedRegion,
} from "./shared/analysis-result";
// A line of more than 8k characters is probably generated.
const CODE_SNIPPET_LARGE_LINE_SIZE_LIMIT = 8192;
// If less than 1% of the line is highlighted, we consider it a small snippet.
const CODE_SNIPPET_HIGHLIGHTED_REGION_MINIMUM_PERCENTAGE = 0.01;
const defaultSeverity = "Warning";
export function extractAnalysisAlerts(
@@ -163,17 +169,45 @@ export function tryGetRule(
}
function getCodeSnippet(
contextRegion?: sarif.Region,
region?: sarif.Region,
alternateRegion?: sarif.Region,
): CodeSnippet | undefined {
region = region ?? alternateRegion;
const actualRegion = contextRegion ?? region;
if (!region) {
if (!actualRegion) {
return undefined;
}
const text = region.snippet?.text || "";
const { startLine, endLine } = parseSarifRegion(region);
const text = actualRegion.snippet?.text || "";
const { startLine, endLine } = parseSarifRegion(actualRegion);
if (
contextRegion &&
region &&
text.length > CODE_SNIPPET_LARGE_LINE_SIZE_LIMIT
) {
const code = text.split("\n");
const highlightedRegion = parseSarifRegion(region);
const highlightedLines = code.map((line, index) => {
return parseHighlightedLine(line, startLine + index, highlightedRegion);
});
const highlightedCharactersCount = highlightedLines
.map((line) => line.highlightedSection.length)
.reduce((a, b) => a + b, 0);
const highlightedPercentage = highlightedCharactersCount / text.length;
if (
highlightedPercentage < CODE_SNIPPET_HIGHLIGHTED_REGION_MINIMUM_PERCENTAGE
) {
// If not enough is highlighted and the snippet is large, it's probably generated or bundled code and
// we don't want to show it.
return undefined;
}
}
return {
startLine,