Merge pull request #2858 from github/dbartol/long-strings
Use streaming when creating log symbols file.
This commit is contained in:
@@ -6,7 +6,6 @@ import { dirname, join, delimiter } from "path";
|
||||
import * as sarif from "sarif";
|
||||
import { SemVer } from "semver";
|
||||
import { Readable } from "stream";
|
||||
import { StringDecoder } from "string_decoder";
|
||||
import tk from "tree-kill";
|
||||
import { promisify } from "util";
|
||||
import { CancellationToken, Disposable, Uri } from "vscode";
|
||||
@@ -31,6 +30,7 @@ import { CompilationMessage } from "../query-server/legacy-messages";
|
||||
import { sarifParser } from "../common/sarif-parser";
|
||||
import { App } from "../common/app";
|
||||
import { QueryLanguage } from "../common/query-language";
|
||||
import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";
|
||||
|
||||
/**
|
||||
* The version of the SARIF format that we are using.
|
||||
@@ -1649,120 +1649,13 @@ export async function runCodeQlCliCommand(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Buffer to hold state used when splitting a text stream into lines.
|
||||
*/
|
||||
class SplitBuffer {
|
||||
private readonly decoder = new StringDecoder("utf8");
|
||||
private readonly maxSeparatorLength: number;
|
||||
private buffer = "";
|
||||
private searchIndex = 0;
|
||||
|
||||
constructor(private readonly separators: readonly string[]) {
|
||||
this.maxSeparatorLength = separators
|
||||
.map((s) => s.length)
|
||||
.reduce((a, b) => Math.max(a, b), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Append new text data to the buffer.
|
||||
* @param chunk The chunk of data to append.
|
||||
*/
|
||||
public addChunk(chunk: Buffer): void {
|
||||
this.buffer += this.decoder.write(chunk);
|
||||
}
|
||||
|
||||
/**
|
||||
* Signal that the end of the input stream has been reached.
|
||||
*/
|
||||
public end(): void {
|
||||
this.buffer += this.decoder.end();
|
||||
this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
|
||||
}
|
||||
|
||||
/**
|
||||
* A version of startsWith that isn't overriden by a broken version of ms-python.
|
||||
*
|
||||
* The definition comes from
|
||||
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
|
||||
* which is CC0/public domain
|
||||
*
|
||||
* See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
|
||||
*/
|
||||
private static startsWith(
|
||||
s: string,
|
||||
searchString: string,
|
||||
position: number,
|
||||
): boolean {
|
||||
const pos = position > 0 ? position | 0 : 0;
|
||||
return s.substring(pos, pos + searchString.length) === searchString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the next full line from the buffer, if one is available.
|
||||
* @returns The text of the next available full line (without the separator), or `undefined` if no
|
||||
* line is available.
|
||||
*/
|
||||
public getNextLine(): string | undefined {
|
||||
while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
|
||||
for (const separator of this.separators) {
|
||||
if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
|
||||
const line = this.buffer.slice(0, this.searchIndex);
|
||||
this.buffer = this.buffer.slice(this.searchIndex + separator.length);
|
||||
this.searchIndex = 0;
|
||||
return line;
|
||||
}
|
||||
}
|
||||
this.searchIndex++;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a text stream into lines based on a list of valid line separators.
|
||||
* @param stream The text stream to split. This stream will be fully consumed.
|
||||
* @param separators The list of strings that act as line separators.
|
||||
* @returns A sequence of lines (not including separators).
|
||||
*/
|
||||
async function* splitStreamAtSeparators(
|
||||
stream: Readable,
|
||||
separators: string[],
|
||||
): AsyncGenerator<string, void, unknown> {
|
||||
const buffer = new SplitBuffer(separators);
|
||||
for await (const chunk of stream) {
|
||||
buffer.addChunk(chunk);
|
||||
let line: string | undefined;
|
||||
do {
|
||||
line = buffer.getNextLine();
|
||||
if (line !== undefined) {
|
||||
yield line;
|
||||
}
|
||||
} while (line !== undefined);
|
||||
}
|
||||
buffer.end();
|
||||
let line: string | undefined;
|
||||
do {
|
||||
line = buffer.getNextLine();
|
||||
if (line !== undefined) {
|
||||
yield line;
|
||||
}
|
||||
} while (line !== undefined);
|
||||
}
|
||||
|
||||
/**
|
||||
* Standard line endings for splitting human-readable text.
|
||||
*/
|
||||
const lineEndings = ["\r\n", "\r", "\n"];
|
||||
|
||||
/**
|
||||
* Log a text stream to a `Logger` interface.
|
||||
* @param stream The stream to log.
|
||||
* @param logger The logger that will consume the stream output.
|
||||
*/
|
||||
async function logStream(stream: Readable, logger: BaseLogger): Promise<void> {
|
||||
for await (const line of splitStreamAtSeparators(stream, lineEndings)) {
|
||||
for await (const line of splitStreamAtSeparators(stream, LINE_ENDINGS)) {
|
||||
// Await the result of log here in order to ensure the logs are written in the correct order.
|
||||
await logger.log(line);
|
||||
}
|
||||
|
||||
125
extensions/ql-vscode/src/common/split-stream.ts
Normal file
125
extensions/ql-vscode/src/common/split-stream.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
import { Readable } from "stream";
|
||||
import { StringDecoder } from "string_decoder";
|
||||
|
||||
/**
|
||||
* Buffer to hold state used when splitting a text stream into lines.
|
||||
*/
|
||||
export class SplitBuffer {
|
||||
private readonly decoder = new StringDecoder("utf8");
|
||||
private readonly maxSeparatorLength: number;
|
||||
private buffer = "";
|
||||
private searchIndex = 0;
|
||||
private ended = false;
|
||||
|
||||
constructor(private readonly separators: readonly string[]) {
|
||||
this.maxSeparatorLength = separators
|
||||
.map((s) => s.length)
|
||||
.reduce((a, b) => Math.max(a, b), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Append new text data to the buffer.
|
||||
* @param chunk The chunk of data to append.
|
||||
*/
|
||||
public addChunk(chunk: Buffer): void {
|
||||
this.buffer += this.decoder.write(chunk);
|
||||
}
|
||||
|
||||
/**
|
||||
* Signal that the end of the input stream has been reached.
|
||||
*/
|
||||
public end(): void {
|
||||
this.buffer += this.decoder.end();
|
||||
this.ended = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* A version of startsWith that isn't overriden by a broken version of ms-python.
|
||||
*
|
||||
* The definition comes from
|
||||
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
|
||||
* which is CC0/public domain
|
||||
*
|
||||
* See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
|
||||
*/
|
||||
private static startsWith(
|
||||
s: string,
|
||||
searchString: string,
|
||||
position: number,
|
||||
): boolean {
|
||||
const pos = position > 0 ? position | 0 : 0;
|
||||
return s.substring(pos, pos + searchString.length) === searchString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the next full line from the buffer, if one is available.
|
||||
* @returns The text of the next available full line (without the separator), or `undefined` if no
|
||||
* line is available.
|
||||
*/
|
||||
public getNextLine(): string | undefined {
|
||||
// If we haven't received all of the input yet, don't search too close to the end of the buffer,
|
||||
// or we could match a separator that's split across two chunks. For example, we could see "\r"
|
||||
// at the end of the buffer and match that, even though we were about to receive a "\n" right
|
||||
// after it.
|
||||
const maxSearchIndex = this.ended
|
||||
? this.buffer.length - 1
|
||||
: this.buffer.length - this.maxSeparatorLength;
|
||||
while (this.searchIndex <= maxSearchIndex) {
|
||||
for (const separator of this.separators) {
|
||||
if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
|
||||
const line = this.buffer.slice(0, this.searchIndex);
|
||||
this.buffer = this.buffer.slice(this.searchIndex + separator.length);
|
||||
this.searchIndex = 0;
|
||||
return line;
|
||||
}
|
||||
}
|
||||
this.searchIndex++;
|
||||
}
|
||||
|
||||
if (this.ended && this.buffer.length > 0) {
|
||||
// If we still have some text left in the buffer, return it as the last line.
|
||||
const line = this.buffer;
|
||||
this.buffer = "";
|
||||
this.searchIndex = 0;
|
||||
return line;
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a text stream into lines based on a list of valid line separators.
|
||||
* @param stream The text stream to split. This stream will be fully consumed.
|
||||
* @param separators The list of strings that act as line separators.
|
||||
* @returns A sequence of lines (not including separators).
|
||||
*/
|
||||
export async function* splitStreamAtSeparators(
|
||||
stream: Readable,
|
||||
separators: string[],
|
||||
): AsyncGenerator<string, void, unknown> {
|
||||
const buffer = new SplitBuffer(separators);
|
||||
for await (const chunk of stream) {
|
||||
buffer.addChunk(chunk);
|
||||
let line: string | undefined;
|
||||
do {
|
||||
line = buffer.getNextLine();
|
||||
if (line !== undefined) {
|
||||
yield line;
|
||||
}
|
||||
} while (line !== undefined);
|
||||
}
|
||||
buffer.end();
|
||||
let line: string | undefined;
|
||||
do {
|
||||
line = buffer.getNextLine();
|
||||
if (line !== undefined) {
|
||||
yield line;
|
||||
}
|
||||
} while (line !== undefined);
|
||||
}
|
||||
|
||||
/**
|
||||
* Standard line endings for splitting human-readable text.
|
||||
*/
|
||||
export const LINE_ENDINGS = ["\r\n", "\r", "\n"];
|
||||
@@ -1,4 +1,5 @@
|
||||
import { writeFile, promises } from "fs-extra";
|
||||
import { createReadStream, writeFile } from "fs-extra";
|
||||
import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";
|
||||
|
||||
/**
|
||||
* Location information for a single pipeline invocation in the RA.
|
||||
@@ -64,59 +65,64 @@ export async function generateSummarySymbolsFile(
|
||||
async function generateSummarySymbols(
|
||||
summaryPath: string,
|
||||
): Promise<SummarySymbols> {
|
||||
const summary = await promises.readFile(summaryPath, {
|
||||
const stream = createReadStream(summaryPath, {
|
||||
encoding: "utf-8",
|
||||
});
|
||||
const symbols: SummarySymbols = {
|
||||
predicates: {},
|
||||
};
|
||||
try {
|
||||
const lines = splitStreamAtSeparators(stream, LINE_ENDINGS);
|
||||
|
||||
const lines = summary.split(/\r?\n/);
|
||||
let lineNumber = 0;
|
||||
while (lineNumber < lines.length) {
|
||||
const startLineNumber = lineNumber;
|
||||
lineNumber++;
|
||||
const startLine = lines[startLineNumber];
|
||||
const nonRecursiveMatch = startLine.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
|
||||
let predicateName: string | undefined = undefined;
|
||||
const symbols: SummarySymbols = {
|
||||
predicates: {},
|
||||
};
|
||||
|
||||
let lineNumber = 0;
|
||||
let raStartLine = 0;
|
||||
let iteration = 0;
|
||||
if (nonRecursiveMatch) {
|
||||
predicateName = nonRecursiveMatch.groups!.predicateName;
|
||||
} else {
|
||||
const recursiveMatch = startLine.match(RECURSIVE_TUPLE_COUNT_REGEXP);
|
||||
if (recursiveMatch?.groups) {
|
||||
predicateName = recursiveMatch.groups.predicateName;
|
||||
iteration = parseInt(recursiveMatch.groups.iteration);
|
||||
}
|
||||
}
|
||||
|
||||
if (predicateName !== undefined) {
|
||||
const raStartLine = lineNumber;
|
||||
let raEndLine: number | undefined = undefined;
|
||||
while (lineNumber < lines.length && raEndLine === undefined) {
|
||||
const raLine = lines[lineNumber];
|
||||
const returnMatch = raLine.match(RETURN_REGEXP);
|
||||
let predicateName: string | undefined = undefined;
|
||||
let startLine = 0;
|
||||
for await (const line of lines) {
|
||||
if (predicateName === undefined) {
|
||||
// Looking for the start of the predicate.
|
||||
const nonRecursiveMatch = line.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
|
||||
if (nonRecursiveMatch) {
|
||||
iteration = 0;
|
||||
predicateName = nonRecursiveMatch.groups!.predicateName;
|
||||
} else {
|
||||
const recursiveMatch = line.match(RECURSIVE_TUPLE_COUNT_REGEXP);
|
||||
if (recursiveMatch?.groups) {
|
||||
predicateName = recursiveMatch.groups.predicateName;
|
||||
iteration = parseInt(recursiveMatch.groups.iteration);
|
||||
}
|
||||
}
|
||||
if (predicateName !== undefined) {
|
||||
startLine = lineNumber;
|
||||
raStartLine = lineNumber + 1;
|
||||
}
|
||||
} else {
|
||||
const returnMatch = line.match(RETURN_REGEXP);
|
||||
if (returnMatch) {
|
||||
raEndLine = lineNumber;
|
||||
}
|
||||
lineNumber++;
|
||||
}
|
||||
if (raEndLine !== undefined) {
|
||||
let symbol = symbols.predicates[predicateName];
|
||||
if (symbol === undefined) {
|
||||
symbol = {
|
||||
iterations: {},
|
||||
let symbol = symbols.predicates[predicateName];
|
||||
if (symbol === undefined) {
|
||||
symbol = {
|
||||
iterations: {},
|
||||
};
|
||||
symbols.predicates[predicateName] = symbol;
|
||||
}
|
||||
symbol.iterations[iteration] = {
|
||||
startLine,
|
||||
raStartLine,
|
||||
raEndLine: lineNumber,
|
||||
};
|
||||
symbols.predicates[predicateName] = symbol;
|
||||
}
|
||||
symbol.iterations[iteration] = {
|
||||
startLine: lineNumber,
|
||||
raStartLine,
|
||||
raEndLine,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return symbols;
|
||||
predicateName = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
lineNumber++;
|
||||
}
|
||||
|
||||
return symbols;
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
import { LINE_ENDINGS, SplitBuffer } from "../../../src/common/split-stream";
|
||||
|
||||
interface Chunk {
|
||||
chunk: string;
|
||||
lines: string[];
|
||||
}
|
||||
|
||||
function checkLines(
|
||||
buffer: SplitBuffer,
|
||||
expectedLinesForChunk: string[],
|
||||
chunkIndex: number | "end",
|
||||
): void {
|
||||
expectedLinesForChunk.forEach((expectedLine, lineIndex) => {
|
||||
const line = buffer.getNextLine();
|
||||
const location = `[chunk ${chunkIndex}, line ${lineIndex}]: `;
|
||||
expect(location + line).toEqual(location + expectedLine);
|
||||
});
|
||||
expect(buffer.getNextLine()).toBeUndefined();
|
||||
}
|
||||
|
||||
function testSplitBuffer(chunks: Chunk[], endLines: string[]): void {
|
||||
const buffer = new SplitBuffer(LINE_ENDINGS);
|
||||
chunks.forEach((chunk, chunkIndex) => {
|
||||
buffer.addChunk(Buffer.from(chunk.chunk, "utf-8"));
|
||||
checkLines(buffer, chunk.lines, chunkIndex);
|
||||
});
|
||||
buffer.end();
|
||||
checkLines(buffer, endLines, "end");
|
||||
}
|
||||
|
||||
describe("split buffer", () => {
|
||||
it("should handle a one-chunk string with no terminator", async () => {
|
||||
// Won't return the line until we call `end()`.
|
||||
testSplitBuffer([{ chunk: "some text", lines: [] }], ["some text"]);
|
||||
});
|
||||
|
||||
it("should handle a one-chunk string with a one-byte terminator", async () => {
|
||||
// Won't return the line until we call `end()` because the actual terminator is shorter than the
|
||||
// longest terminator.
|
||||
testSplitBuffer([{ chunk: "some text\n", lines: [] }], ["some text"]);
|
||||
});
|
||||
|
||||
it("should handle a one-chunk string with a two-byte terminator", async () => {
|
||||
testSplitBuffer([{ chunk: "some text\r\n", lines: ["some text"] }], []);
|
||||
});
|
||||
|
||||
it("should handle a multi-chunk string with terminators at the end of each chunk", async () => {
|
||||
testSplitBuffer(
|
||||
[
|
||||
{ chunk: "first line\n", lines: [] }, // Waiting for second potential terminator byte
|
||||
{ chunk: "second line\r", lines: ["first line"] }, // Waiting for second potential terminator byte
|
||||
{ chunk: "third line\r\n", lines: ["second line", "third line"] }, // No wait, because we're at the end
|
||||
],
|
||||
[],
|
||||
);
|
||||
});
|
||||
|
||||
it("should handle a multi-chunk string with terminators at random offsets", async () => {
|
||||
testSplitBuffer(
|
||||
[
|
||||
{ chunk: "first line\nsecond", lines: ["first line"] },
|
||||
{
|
||||
chunk: " line\rthird line",
|
||||
lines: ["second line"],
|
||||
},
|
||||
{ chunk: "\r\n", lines: ["third line"] },
|
||||
],
|
||||
[],
|
||||
);
|
||||
});
|
||||
|
||||
it("should handle a terminator split between chunks", async () => {
|
||||
testSplitBuffer(
|
||||
[
|
||||
{ chunk: "first line\r", lines: [] },
|
||||
{
|
||||
chunk: "\nsecond line",
|
||||
lines: ["first line"],
|
||||
},
|
||||
],
|
||||
["second line"],
|
||||
);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user