Use streaming when creating log symbols file.

This commit is contained in:
Dave Bartolomeo
2023-09-25 14:24:10 -04:00
parent e0509f684b
commit c972a5c0de
3 changed files with 166 additions and 158 deletions

View File

@@ -6,7 +6,6 @@ import { dirname, join, delimiter } from "path";
import * as sarif from "sarif";
import { SemVer } from "semver";
import { Readable } from "stream";
import { StringDecoder } from "string_decoder";
import tk from "tree-kill";
import { promisify } from "util";
import { CancellationToken, Disposable, Uri } from "vscode";
@@ -31,6 +30,7 @@ import { CompilationMessage } from "../query-server/legacy-messages";
import { sarifParser } from "../common/sarif-parser";
import { App } from "../common/app";
import { QueryLanguage } from "../common/query-language";
import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";
/**
* The version of the SARIF format that we are using.
@@ -1649,120 +1649,13 @@ export async function runCodeQlCliCommand(
}
}
/**
* Buffer to hold state used when splitting a text stream into lines.
*/
class SplitBuffer {
private readonly decoder = new StringDecoder("utf8");
private readonly maxSeparatorLength: number;
private buffer = "";
private searchIndex = 0;
constructor(private readonly separators: readonly string[]) {
this.maxSeparatorLength = separators
.map((s) => s.length)
.reduce((a, b) => Math.max(a, b), 0);
}
/**
* Append new text data to the buffer.
* @param chunk The chunk of data to append.
*/
public addChunk(chunk: Buffer): void {
this.buffer += this.decoder.write(chunk);
}
/**
* Signal that the end of the input stream has been reached.
*/
public end(): void {
this.buffer += this.decoder.end();
this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
}
/**
* A version of startsWith that isn't overriden by a broken version of ms-python.
*
* The definition comes from
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
* which is CC0/public domain
*
* See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
*/
private static startsWith(
s: string,
searchString: string,
position: number,
): boolean {
const pos = position > 0 ? position | 0 : 0;
return s.substring(pos, pos + searchString.length) === searchString;
}
/**
* Extract the next full line from the buffer, if one is available.
* @returns The text of the next available full line (without the separator), or `undefined` if no
* line is available.
*/
public getNextLine(): string | undefined {
while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
for (const separator of this.separators) {
if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
const line = this.buffer.slice(0, this.searchIndex);
this.buffer = this.buffer.slice(this.searchIndex + separator.length);
this.searchIndex = 0;
return line;
}
}
this.searchIndex++;
}
return undefined;
}
}
/**
* Splits a text stream into lines based on a list of valid line separators.
* @param stream The text stream to split. This stream will be fully consumed.
* @param separators The list of strings that act as line separators.
* @returns A sequence of lines (not including separators).
*/
async function* splitStreamAtSeparators(
stream: Readable,
separators: string[],
): AsyncGenerator<string, void, unknown> {
const buffer = new SplitBuffer(separators);
for await (const chunk of stream) {
buffer.addChunk(chunk);
let line: string | undefined;
do {
line = buffer.getNextLine();
if (line !== undefined) {
yield line;
}
} while (line !== undefined);
}
buffer.end();
let line: string | undefined;
do {
line = buffer.getNextLine();
if (line !== undefined) {
yield line;
}
} while (line !== undefined);
}
/**
* Standard line endings for splitting human-readable text.
*/
const lineEndings = ["\r\n", "\r", "\n"];
/**
* Log a text stream to a `Logger` interface.
* @param stream The stream to log.
* @param logger The logger that will consume the stream output.
*/
async function logStream(stream: Readable, logger: BaseLogger): Promise<void> {
for await (const line of splitStreamAtSeparators(stream, lineEndings)) {
for await (const line of splitStreamAtSeparators(stream, LINE_ENDINGS)) {
// Await the result of log here in order to ensure the logs are written in the correct order.
await logger.log(line);
}

View File

@@ -0,0 +1,109 @@
import { Readable } from "stream";
import { StringDecoder } from "string_decoder";
/**
* Buffer to hold state used when splitting a text stream into lines.
*/
class SplitBuffer {
private readonly decoder = new StringDecoder("utf8");
private readonly maxSeparatorLength: number;
private buffer = "";
private searchIndex = 0;
constructor(private readonly separators: readonly string[]) {
this.maxSeparatorLength = separators
.map((s) => s.length)
.reduce((a, b) => Math.max(a, b), 0);
}
/**
* Append new text data to the buffer.
* @param chunk The chunk of data to append.
*/
public addChunk(chunk: Buffer): void {
this.buffer += this.decoder.write(chunk);
}
/**
* Signal that the end of the input stream has been reached.
*/
public end(): void {
this.buffer += this.decoder.end();
this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
}
/**
* A version of startsWith that isn't overriden by a broken version of ms-python.
*
* The definition comes from
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
* which is CC0/public domain
*
* See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
*/
private static startsWith(
s: string,
searchString: string,
position: number,
): boolean {
const pos = position > 0 ? position | 0 : 0;
return s.substring(pos, pos + searchString.length) === searchString;
}
/**
* Extract the next full line from the buffer, if one is available.
* @returns The text of the next available full line (without the separator), or `undefined` if no
* line is available.
*/
public getNextLine(): string | undefined {
while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
for (const separator of this.separators) {
if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
const line = this.buffer.slice(0, this.searchIndex);
this.buffer = this.buffer.slice(this.searchIndex + separator.length);
this.searchIndex = 0;
return line;
}
}
this.searchIndex++;
}
return undefined;
}
}
/**
* Splits a text stream into lines based on a list of valid line separators.
* @param stream The text stream to split. This stream will be fully consumed.
* @param separators The list of strings that act as line separators.
* @returns A sequence of lines (not including separators).
*/
export async function* splitStreamAtSeparators(
stream: Readable,
separators: string[],
): AsyncGenerator<string, void, unknown> {
const buffer = new SplitBuffer(separators);
for await (const chunk of stream) {
buffer.addChunk(chunk);
let line: string | undefined;
do {
line = buffer.getNextLine();
if (line !== undefined) {
yield line;
}
} while (line !== undefined);
}
buffer.end();
let line: string | undefined;
do {
line = buffer.getNextLine();
if (line !== undefined) {
yield line;
}
} while (line !== undefined);
}
/**
* Standard line endings for splitting human-readable text.
*/
export const LINE_ENDINGS = ["\r\n", "\r", "\n"];

View File

@@ -1,4 +1,5 @@
import { writeFile, promises } from "fs-extra";
import { createReadStream, writeFile } from "fs-extra";
import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";
/**
* Location information for a single pipeline invocation in the RA.
@@ -64,59 +65,64 @@ export async function generateSummarySymbolsFile(
async function generateSummarySymbols(
summaryPath: string,
): Promise<SummarySymbols> {
const summary = await promises.readFile(summaryPath, {
const stream = createReadStream(summaryPath, {
encoding: "utf-8",
});
const symbols: SummarySymbols = {
predicates: {},
};
try {
const lines = splitStreamAtSeparators(stream, LINE_ENDINGS);
const lines = summary.split(/\r?\n/);
let lineNumber = 0;
while (lineNumber < lines.length) {
const startLineNumber = lineNumber;
lineNumber++;
const startLine = lines[startLineNumber];
const nonRecursiveMatch = startLine.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
let predicateName: string | undefined = undefined;
const symbols: SummarySymbols = {
predicates: {},
};
let lineNumber = 0;
let raStartLine = 0;
let iteration = 0;
if (nonRecursiveMatch) {
predicateName = nonRecursiveMatch.groups!.predicateName;
} else {
const recursiveMatch = startLine.match(RECURSIVE_TUPLE_COUNT_REGEXP);
if (recursiveMatch?.groups) {
predicateName = recursiveMatch.groups.predicateName;
iteration = parseInt(recursiveMatch.groups.iteration);
}
}
if (predicateName !== undefined) {
const raStartLine = lineNumber;
let raEndLine: number | undefined = undefined;
while (lineNumber < lines.length && raEndLine === undefined) {
const raLine = lines[lineNumber];
const returnMatch = raLine.match(RETURN_REGEXP);
let predicateName: string | undefined = undefined;
let startLine = 0;
for await (const line of lines) {
if (predicateName === undefined) {
// Looking for the start of the predicate.
const nonRecursiveMatch = line.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
if (nonRecursiveMatch) {
iteration = 0;
predicateName = nonRecursiveMatch.groups!.predicateName;
} else {
const recursiveMatch = line.match(RECURSIVE_TUPLE_COUNT_REGEXP);
if (recursiveMatch?.groups) {
predicateName = recursiveMatch.groups.predicateName;
iteration = parseInt(recursiveMatch.groups.iteration);
}
}
if (predicateName !== undefined) {
startLine = lineNumber;
raStartLine = lineNumber + 1;
}
} else {
const returnMatch = line.match(RETURN_REGEXP);
if (returnMatch) {
raEndLine = lineNumber;
}
lineNumber++;
}
if (raEndLine !== undefined) {
let symbol = symbols.predicates[predicateName];
if (symbol === undefined) {
symbol = {
iterations: {},
let symbol = symbols.predicates[predicateName];
if (symbol === undefined) {
symbol = {
iterations: {},
};
symbols.predicates[predicateName] = symbol;
}
symbol.iterations[iteration] = {
startLine,
raStartLine,
raEndLine: lineNumber,
};
symbols.predicates[predicateName] = symbol;
}
symbol.iterations[iteration] = {
startLine: lineNumber,
raStartLine,
raEndLine,
};
}
}
}
return symbols;
predicateName = undefined;
}
}
lineNumber++;
}
return symbols;
} finally {
stream.close();
}
}