Use streaming when creating log symbols file.

2023-09-25 14:24:10 -04:00
parent e0509f684b
commit c972a5c0de
3 changed files with 166 additions and 158 deletions
--- a/extensions/ql-vscode/src/codeql-cli/cli.ts
+++ b/extensions/ql-vscode/src/codeql-cli/cli.ts
@@ -6,7 +6,6 @@ import { dirname, join, delimiter } from "path";
 import * as sarif from "sarif";
 import { SemVer } from "semver";
 import { Readable } from "stream";
-import { StringDecoder } from "string_decoder";
 import tk from "tree-kill";
 import { promisify } from "util";
 import { CancellationToken, Disposable, Uri } from "vscode";
@@ -31,6 +30,7 @@ import { CompilationMessage } from "../query-server/legacy-messages";
 import { sarifParser } from "../common/sarif-parser";
 import { App } from "../common/app";
 import { QueryLanguage } from "../common/query-language";
+import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";

 /**
 * The version of the SARIF format that we are using.
@@ -1649,120 +1649,13 @@ export async function runCodeQlCliCommand(
  }
 }

-/**
- * Buffer to hold state used when splitting a text stream into lines.
- */
-class SplitBuffer {
-  private readonly decoder = new StringDecoder("utf8");
-  private readonly maxSeparatorLength: number;
-  private buffer = "";
-  private searchIndex = 0;
-
-  constructor(private readonly separators: readonly string[]) {
-    this.maxSeparatorLength = separators
-      .map((s) => s.length)
-      .reduce((a, b) => Math.max(a, b), 0);
-  }
-
-  /**
-   * Append new text data to the buffer.
-   * @param chunk The chunk of data to append.
-   */
-  public addChunk(chunk: Buffer): void {
-    this.buffer += this.decoder.write(chunk);
-  }
-
-  /**
-   * Signal that the end of the input stream has been reached.
-   */
-  public end(): void {
-    this.buffer += this.decoder.end();
-    this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
-  }
-
-  /**
-   * A version of startsWith that isn't overriden by a broken version of ms-python.
-   *
-   * The definition comes from
-   * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
-   * which is CC0/public domain
-   *
-   * See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
-   */
-  private static startsWith(
-    s: string,
-    searchString: string,
-    position: number,
-  ): boolean {
-    const pos = position > 0 ? position | 0 : 0;
-    return s.substring(pos, pos + searchString.length) === searchString;
-  }
-
-  /**
-   * Extract the next full line from the buffer, if one is available.
-   * @returns The text of the next available full line (without the separator), or `undefined` if no
-   * line is available.
-   */
-  public getNextLine(): string | undefined {
-    while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
-      for (const separator of this.separators) {
-        if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
-          const line = this.buffer.slice(0, this.searchIndex);
-          this.buffer = this.buffer.slice(this.searchIndex + separator.length);
-          this.searchIndex = 0;
-          return line;
-        }
-      }
-      this.searchIndex++;
-    }
-
-    return undefined;
-  }
-}
-
-/**
- * Splits a text stream into lines based on a list of valid line separators.
- * @param stream The text stream to split. This stream will be fully consumed.
- * @param separators The list of strings that act as line separators.
- * @returns A sequence of lines (not including separators).
- */
-async function* splitStreamAtSeparators(
-  stream: Readable,
-  separators: string[],
-): AsyncGenerator<string, void, unknown> {
-  const buffer = new SplitBuffer(separators);
-  for await (const chunk of stream) {
-    buffer.addChunk(chunk);
-    let line: string | undefined;
-    do {
-      line = buffer.getNextLine();
-      if (line !== undefined) {
-        yield line;
-      }
-    } while (line !== undefined);
-  }
-  buffer.end();
-  let line: string | undefined;
-  do {
-    line = buffer.getNextLine();
-    if (line !== undefined) {
-      yield line;
-    }
-  } while (line !== undefined);
-}
-
-/**
- *  Standard line endings for splitting human-readable text.
- */
-const lineEndings = ["\r\n", "\r", "\n"];
-
 /**
 * Log a text stream to a `Logger` interface.
 * @param stream The stream to log.
 * @param logger The logger that will consume the stream output.
 */
 async function logStream(stream: Readable, logger: BaseLogger): Promise<void> {
-  for await (const line of splitStreamAtSeparators(stream, lineEndings)) {
+  for await (const line of splitStreamAtSeparators(stream, LINE_ENDINGS)) {
    // Await the result of log here in order to ensure the logs are written in the correct order.
    await logger.log(line);
  }
--- a/extensions/ql-vscode/src/common/split-stream.ts
+++ b/extensions/ql-vscode/src/common/split-stream.ts
@@ -0,0 +1,109 @@
+import { Readable } from "stream";
+import { StringDecoder } from "string_decoder";
+
+/**
+ * Buffer to hold state used when splitting a text stream into lines.
+ */
+class SplitBuffer {
+  private readonly decoder = new StringDecoder("utf8");
+  private readonly maxSeparatorLength: number;
+  private buffer = "";
+  private searchIndex = 0;
+
+  constructor(private readonly separators: readonly string[]) {
+    this.maxSeparatorLength = separators
+      .map((s) => s.length)
+      .reduce((a, b) => Math.max(a, b), 0);
+  }
+
+  /**
+   * Append new text data to the buffer.
+   * @param chunk The chunk of data to append.
+   */
+  public addChunk(chunk: Buffer): void {
+    this.buffer += this.decoder.write(chunk);
+  }
+
+  /**
+   * Signal that the end of the input stream has been reached.
+   */
+  public end(): void {
+    this.buffer += this.decoder.end();
+    this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
+  }
+
+  /**
+   * A version of startsWith that isn't overriden by a broken version of ms-python.
+   *
+   * The definition comes from
+   * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
+   * which is CC0/public domain
+   *
+   * See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
+   */
+  private static startsWith(
+    s: string,
+    searchString: string,
+    position: number,
+  ): boolean {
+    const pos = position > 0 ? position | 0 : 0;
+    return s.substring(pos, pos + searchString.length) === searchString;
+  }
+
+  /**
+   * Extract the next full line from the buffer, if one is available.
+   * @returns The text of the next available full line (without the separator), or `undefined` if no
+   * line is available.
+   */
+  public getNextLine(): string | undefined {
+    while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
+      for (const separator of this.separators) {
+        if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
+          const line = this.buffer.slice(0, this.searchIndex);
+          this.buffer = this.buffer.slice(this.searchIndex + separator.length);
+          this.searchIndex = 0;
+          return line;
+        }
+      }
+      this.searchIndex++;
+    }
+
+    return undefined;
+  }
+}
+
+/**
+ * Splits a text stream into lines based on a list of valid line separators.
+ * @param stream The text stream to split. This stream will be fully consumed.
+ * @param separators The list of strings that act as line separators.
+ * @returns A sequence of lines (not including separators).
+ */
+export async function* splitStreamAtSeparators(
+  stream: Readable,
+  separators: string[],
+): AsyncGenerator<string, void, unknown> {
+  const buffer = new SplitBuffer(separators);
+  for await (const chunk of stream) {
+    buffer.addChunk(chunk);
+    let line: string | undefined;
+    do {
+      line = buffer.getNextLine();
+      if (line !== undefined) {
+        yield line;
+      }
+    } while (line !== undefined);
+  }
+  buffer.end();
+  let line: string | undefined;
+  do {
+    line = buffer.getNextLine();
+    if (line !== undefined) {
+      yield line;
+    }
+  } while (line !== undefined);
+}
+
+/**
+ *  Standard line endings for splitting human-readable text.
+ */
+export const LINE_ENDINGS = ["\r\n", "\r", "\n"];
--- a/extensions/ql-vscode/src/log-insights/summary-parser.ts
+++ b/extensions/ql-vscode/src/log-insights/summary-parser.ts
@@ -1,4 +1,5 @@
-import { writeFile, promises } from "fs-extra";
+import { createReadStream, writeFile } from "fs-extra";
+import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";

 /**
 * Location information for a single pipeline invocation in the RA.
@@ -64,59 +65,64 @@ export async function generateSummarySymbolsFile(
 async function generateSummarySymbols(
  summaryPath: string,
 ): Promise<SummarySymbols> {
-  const summary = await promises.readFile(summaryPath, {
+  const stream = createReadStream(summaryPath, {
    encoding: "utf-8",
  });
-  const symbols: SummarySymbols = {
-    predicates: {},
-  };
+  try {
+    const lines = splitStreamAtSeparators(stream, LINE_ENDINGS);

-  const lines = summary.split(/\r?\n/);
-  let lineNumber = 0;
-  while (lineNumber < lines.length) {
-    const startLineNumber = lineNumber;
-    lineNumber++;
-    const startLine = lines[startLineNumber];
-    const nonRecursiveMatch = startLine.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
-    let predicateName: string | undefined = undefined;
+    const symbols: SummarySymbols = {
+      predicates: {},
+    };
+
+    let lineNumber = 0;
+    let raStartLine = 0;
    let iteration = 0;
-    if (nonRecursiveMatch) {
-      predicateName = nonRecursiveMatch.groups!.predicateName;
-    } else {
-      const recursiveMatch = startLine.match(RECURSIVE_TUPLE_COUNT_REGEXP);
-      if (recursiveMatch?.groups) {
-        predicateName = recursiveMatch.groups.predicateName;
-        iteration = parseInt(recursiveMatch.groups.iteration);
-      }
-    }
-
-    if (predicateName !== undefined) {
-      const raStartLine = lineNumber;
-      let raEndLine: number | undefined = undefined;
-      while (lineNumber < lines.length && raEndLine === undefined) {
-        const raLine = lines[lineNumber];
-        const returnMatch = raLine.match(RETURN_REGEXP);
+    let predicateName: string | undefined = undefined;
+    let startLine = 0;
+    for await (const line of lines) {
+      if (predicateName === undefined) {
+        // Looking for the start of the predicate.
+        const nonRecursiveMatch = line.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
+        if (nonRecursiveMatch) {
+          iteration = 0;
+          predicateName = nonRecursiveMatch.groups!.predicateName;
+        } else {
+          const recursiveMatch = line.match(RECURSIVE_TUPLE_COUNT_REGEXP);
+          if (recursiveMatch?.groups) {
+            predicateName = recursiveMatch.groups.predicateName;
+            iteration = parseInt(recursiveMatch.groups.iteration);
+          }
+        }
+        if (predicateName !== undefined) {
+          startLine = lineNumber;
+          raStartLine = lineNumber + 1;
+        }
+      } else {
+        const returnMatch = line.match(RETURN_REGEXP);
        if (returnMatch) {
-          raEndLine = lineNumber;
-        }
-        lineNumber++;
-      }
-      if (raEndLine !== undefined) {
-        let symbol = symbols.predicates[predicateName];
-        if (symbol === undefined) {
-          symbol = {
-            iterations: {},
+          let symbol = symbols.predicates[predicateName];
+          if (symbol === undefined) {
+            symbol = {
+              iterations: {},
+            };
+            symbols.predicates[predicateName] = symbol;
+          }
+          symbol.iterations[iteration] = {
+            startLine,
+            raStartLine,
+            raEndLine: lineNumber,
          };
-          symbols.predicates[predicateName] = symbol;
-        }
-        symbol.iterations[iteration] = {
-          startLine: lineNumber,
-          raStartLine,
-          raEndLine,
-        };
-      }
-    }
-  }

-  return symbols;
+          predicateName = undefined;
+        }
+      }
+
+      lineNumber++;
+    }
+
+    return symbols;
+  } finally {
+    stream.close();
+  }
 }