Initial implementation of join order metric scanning

2022-05-03 13:20:30 -04:00
parent 1d195cb347
commit 1089a052ec
12 changed files with 883 additions and 7 deletions
--- a/extensions/ql-vscode/package-lock.json
+++ b/extensions/ql-vscode/package-lock.json
@@ -18,6 +18,7 @@
        "d3-graphviz": "^2.6.1",
        "fs-extra": "^10.0.1",
        "glob-promise": "^3.4.0",
+        "immutable": "^4.0.0",
        "js-yaml": "^3.14.0",
        "minimist": "~1.2.6",
        "nanoid": "^3.2.0",
@@ -6910,6 +6911,11 @@
        "node": ">= 4"
      }
    },
+    "node_modules/immutable": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/immutable/-/immutable-4.0.0.tgz",
+      "integrity": "sha512-zIE9hX70qew5qTUjSS7wi1iwj/l7+m54KWU247nhM3v806UdGj1yDndXj+IOYxxtW9zyLI+xqFNZjTuDaLUqFw=="
+    },
    "node_modules/import-fresh": {
      "version": "3.2.1",
      "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.2.1.tgz",
@@ -19573,6 +19579,11 @@
      "integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==",
      "dev": true
    },
+    "immutable": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/immutable/-/immutable-4.0.0.tgz",
+      "integrity": "sha512-zIE9hX70qew5qTUjSS7wi1iwj/l7+m54KWU247nhM3v806UdGj1yDndXj+IOYxxtW9zyLI+xqFNZjTuDaLUqFw=="
+    },
    "import-fresh": {
      "version": "3.2.1",
      "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.2.1.tgz",
--- a/extensions/ql-vscode/package.json
+++ b/extensions/ql-vscode/package.json
@@ -746,11 +746,6 @@
          "group": "9_qlCommands",
          "when": "codeql.supportsEvalLog && viewItem == rawResultsItem || codeql.supportsEvalLog && viewItem == interpretedResultsItem || codeql.supportsEvalLog && viewItem == cancelledResultsItem"
        },
-        {
-          "command": "codeQLQueryHistory.scanEvalLog",
-          "group": "9_qlCommands",
-          "when": "codeql.supportsEvalLog && (viewItem == rawResultsItem || viewItem == interpretedResultsItem || viewItem == cancelledResultsItem)"
-        },
        {
          "command": "codeQLQueryHistory.showQueryText",
          "group": "9_qlCommands",
--- a/extensions/ql-vscode/src/cli.ts
+++ b/extensions/ql-vscode/src/cli.ts
@@ -685,6 +685,23 @@ export class CodeQLCliServer implements Disposable {
    return await this.runCodeQlCliCommand(['generate', 'log-summary'], subcommandArgs, 'Generating log summary');
  }

+  /**
+  * Generate a JSON summary of an evaluation log.
+  * @param inputPath The path of an evaluation event log.
+  * @param outputPath The path to write a JSON summary of it to.
+  */
+   async generateJsonLogSummary(
+    inputPath: string,
+    outputPath: string
+  ): Promise<string> {
+    const subcommandArgs = [
+      '--minify-output',
+      inputPath,
+      outputPath
+    ];
+    return await this.runCodeQlCliCommand(['generate', 'log-summary'], subcommandArgs, 'Generating log summary');
+  }
+
  /**
  * Gets the results from a bqrs.
  * @param bqrsPath The path to the bqrs.
--- a/extensions/ql-vscode/src/extension.ts
+++ b/extensions/ql-vscode/src/extension.ts
@@ -97,6 +97,7 @@ import { URLSearchParams } from 'url';
 import { handleDownloadPacks, handleInstallPackDependencies } from './packaging';
 import { RemoteQueryHistoryItem } from './remote-queries/remote-query-history-item';
 import { HistoryItemLabelProvider } from './history-item-label-provider';
+import { JoinOrderScannerProvider } from './log-insights/join-order';

 /**
 * extension.ts
@@ -470,6 +471,9 @@ async function activateWithInstalledDistribution(

  ctx.subscriptions.push(qhm);

+  void logger.log('Initializing evaluation log scanners.');
+  ctx.subscriptions.push(qhm.registerLogScannerProvider(new JoinOrderScannerProvider()));
+
  void logger.log('Initializing results panel interface.');
  const intm = new InterfaceManager(ctx, dbm, cliServer, queryServerLogger, labelProvider);
  ctx.subscriptions.push(intm);
@@ -504,6 +508,8 @@ async function activateWithInstalledDistribution(
    forceReveal: WebviewReveal
  ): Promise<void> {
    await intm.showResults(query, forceReveal, false);
+    // Always update the log warnings so they stay in sync with the results.
+    await qhm.scanEvalLog(query);
  }

  async function compileAndRunQuery(
--- a/extensions/ql-vscode/src/log-insights/join-order.ts
+++ b/extensions/ql-vscode/src/log-insights/join-order.ts
@@ -0,0 +1,458 @@
+import * as I from 'immutable';
+import { EvaluationLogProblemReporter, EvaluationLogScanner, EvaluationLogScannerProvider } from './log-scanner';
+import { InLayer, ComputeRecursive, SummaryEvent, PipelineRun, ComputeSimple } from './log-summary';
+
+const DEFAULT_WARNING_THRESHOLD = 50;
+
+/**
+ * Like `max`, but returns 0 if no meaningful maximum can be computed.
+ */
+function safeMax(it: Iterable<number>) {
+  const m = Math.max(...it);
+  return Number.isFinite(m) ? m : 0;
+}
+
+/**
+ * Compute a key for the maps that that is sent to report generation.
+ * Should only be used on events that are known to define queryCausingWork.
+ */
+function makeKey(
+  queryCausingWork: string | undefined,
+  predicate: string,
+  suffix = ''
+): string {
+  if (queryCausingWork === undefined) {
+    throw new Error(
+      'queryCausingWork was not defined on an event we expected it to be defined for!'
+    );
+  }
+  return (
+    queryCausingWork +
+    ':' +
+    predicate +
+    (suffix ? ' ' + suffix : '')
+  );
+}
+
+function getDependentPredicates(operations: string[]): I.List<string> {
+  const regexps = [
+    // SCAN id
+    String.raw`SCAN\s+([0-9a-zA-Z:#_]+)\s`,
+    // JOIN id WITH id
+    String.raw`JOIN\s+([0-9a-zA-Z:#_]+)\s+WITH\s+([0-9a-zA-Z:#_]+)\s`,
+    // AGGREGATE id, id
+    String.raw`AGGREGATE\s+([0-9a-zA-Z:#_]+)\s*,\s+([0-9a-zA-Z:#_]+)`,
+    // id AND NOT id
+    String.raw`([0-9a-zA-Z:#_]+)\s+AND\s+NOT\s+([0-9a-zA-Z:#_]+)`,
+    // INVOKE HIGHER-ORDER RELATION rel ON <id, ..., id>
+    String.raw`INVOKE\s+HIGHER-ORDER\s+RELATION\s[^\s]+\sON\s+<([0-9a-zA-Z:#_<>]+)((?:,[0-9a-zA-Z:#_<>]+)*)>`,
+    // SELECT id
+    String.raw`SELECT\s+([0-9a-zA-Z:#_]+)`
+  ];
+  const r = new RegExp(
+    `${String.raw`\{[0-9]+\}\s+[0-9a-zA-Z]+\s=\s(?:` + regexps.join('|')})`
+  );
+  return I.List(operations).flatMap(operation => {
+    const matches = r.exec(operation.trim());
+    return I.List(matches!)
+      .rest() // Skip the first group as it's just the entire string
+      .filter(x => !!x && !x.match('r[0-9]+|PRIMITIVE')) // Only keep the references to predicates.
+      .flatMap(x => x.split(',')) // Group 2 in the INVOKE HIGHER_ORDER RELATION case is a comma-separated list of identifiers.
+      .filter(x => !!x); // Remove empty strings
+  });
+}
+
+function getMainHash(event: InLayer | ComputeRecursive): string {
+  switch (event.evaluationStrategy) {
+    case 'IN_LAYER':
+      return event.mainHash;
+    case 'COMPUTE_RECURSIVE':
+      return event.raHash;
+  }
+}
+
+/**
+ * Sum arrays a and b element-wise, and pad with 0s if the arrays are not the same length.
+ */
+function pointwiseSum(a: Int32Array, b: Int32Array): Int32Array {
+  function reportIfInconsistent(ai: number, bi: number) {
+    if (ai === -1 && bi !== -1) {
+      console.warn(
+        `Operation was not evaluated in the first pipeline, but it was evaluated in the accumulated pipeline (with tuple count ${bi}).`
+      );
+    }
+    if (ai !== -1 && bi === -1) {
+      console.warn(
+        `Operation was evaluated in the first pipeline (with tuple count ${ai}), but it was not evaluated in the accumulated pipeline.`
+      );
+    }
+  }
+
+  const length = Math.max(a.length, b.length);
+  const result = new Int32Array(length);
+  for (let i = 0; i < length; i++) {
+    const ai = a[i] || 0;
+    const bi = b[i] || 0;
+    // -1 is used to represent the absence of a tuple count for a line in the pretty-printed RA (e.g. an empty line), so we ignore those.
+    if (i < a.length && i < b.length && (ai === -1 || bi === -1)) {
+      result[i] = -1;
+      reportIfInconsistent(ai, bi);
+    } else {
+      result[i] = ai + bi;
+    }
+  }
+  return result;
+}
+
+function pushValue<K, V>(m: Map<K, V[]>, k: K, v: V) {
+  if (!m.has(k)) {
+    m.set(k, []);
+  }
+  m.get(k)!.push(v);
+  return m;
+}
+
+function computeJoinOrderBadness(
+  maxTupleCount: number,
+  maxDependentPredicateSize: number,
+  resultSize: number
+): number {
+  return maxTupleCount / Math.max(maxDependentPredicateSize, resultSize);
+}
+
+/**
+ * A bucket contains the pointwise sum of the tuple counts, result sizes and dependent predicate sizes
+ * For each (predicate, order) in an SCC, we will compute a bucket.
+ */
+interface Bucket {
+  tupleCounts: Int32Array;
+  resultSize: number;
+  dependentPredicateSizes: I.Map<string, number>;
+};
+
+class JoinOrderScanner implements EvaluationLogScanner {
+  // Map a predicate hash to its result size
+  private readonly predicateSizes = new Map<string, number>();
+  private readonly layerEvents = new Map<string, (ComputeRecursive | InLayer)[]>();
+  // Map a key of the form 'query-with-demand : predicate name' to its badness input.
+  private readonly maxTupleCountMap = new Map<string, number[]>();
+  private readonly resultSizeMap = new Map<string, number[]>();
+  private readonly maxDependentPredicateSizeMap = new Map<string, number[]>();
+  private readonly joinOrderMetricMap = new Map<string, number>();
+
+  constructor(
+    private readonly problemReporter: EvaluationLogProblemReporter,
+    private readonly warningThreshold: number) {
+  }
+
+  public onEvent(event: SummaryEvent): void {
+    if (
+      event.completionType !== undefined &&
+      event.completionType !== 'SUCCESS'
+    ) {
+      return; // Skip any evaluation that wasn't successful
+    }
+
+    this.recordPredicateSizes(event);
+    this.computeBadnessMetric(event);
+  }
+
+  public onDone(): void {
+    void this;
+  }
+
+  private recordPredicateSizes(event: SummaryEvent): void {
+    switch (event.evaluationStrategy) {
+      case 'EXTENSIONAL':
+      case 'COMPUTED_EXTENSIONAL':
+      case 'COMPUTE_SIMPLE':
+      case 'CACHACA':
+      case 'CACHE_HIT': {
+        this.predicateSizes.set(event.raHash, event.resultSize);
+        break;
+      }
+      case 'SENTINEL_EMPTY': {
+        this.predicateSizes.set(event.raHash, 0);
+        break;
+      }
+      case 'COMPUTE_RECURSIVE':
+      case 'IN_LAYER':
+        this.predicateSizes.set(event.raHash, event.resultSize);
+        // layerEvents are indexed by the mainHash.
+        const hash = getMainHash(event);
+        if (!this.layerEvents.has(hash)) {
+          this.layerEvents.set(hash, []);
+        }
+        this.layerEvents.get(hash)!.push(event);
+        break;
+    }
+  }
+
+  private reportProblemIfNecessary(event: SummaryEvent, iteration: number, metric: number): void {
+    if (metric >= this.warningThreshold) {
+      this.problemReporter.reportProblem(event.predicateName, event.raHash, iteration,
+        `Relation '${event.predicateName}' has an inefficient join order. Its join order metric is ${metric}, which is larger than the threshold of ${this.warningThreshold}.`);
+    }
+  }
+
+  private computeBadnessMetric(event: SummaryEvent): void {
+    if (
+      event.completionType !== undefined &&
+      event.completionType !== 'SUCCESS'
+    ) {
+      return; // Skip any evaluation that wasn't successful
+    }
+    switch (event.evaluationStrategy) {
+      case 'COMPUTE_SIMPLE':
+        if (!event.pipelineRuns) {
+          // skip if the optional pipelineRuns field is not present.
+          break;
+        }
+        // Compute the badness metric for a non-recursive predicate. The metric in this case is defined as:
+        // badness = (max tuple count in the pipeline) / (largest predicate this pipeline depends on)
+        const key = makeKey(event.queryCausingWork, event.predicateName);
+        const resultSize = event.resultSize;
+
+        // There is only one entry in `pipelineRuns` if it's a non-recursive predicate.
+        const { maxTupleCount, maxDependentPredicateSize } =
+          this.badnessInputsForNonRecursiveDelta(event.pipelineRuns[0], event);
+
+        if (maxDependentPredicateSize > 0) {
+          pushValue(this.maxTupleCountMap, key, maxTupleCount);
+          pushValue(this.resultSizeMap, key, resultSize);
+          pushValue(
+            this.maxDependentPredicateSizeMap,
+            key,
+            maxDependentPredicateSize
+          );
+          const metric = computeJoinOrderBadness(maxTupleCount, maxDependentPredicateSize, resultSize!);
+          this.joinOrderMetricMap.set(key, metric);
+          this.reportProblemIfNecessary(event, 0, metric);
+        }
+        break;
+      case 'COMPUTE_RECURSIVE':
+        // Compute the badness metric for a recursive predicate for each ordering.
+        // See https://github.com/github/codeql-coreql-team/issues/1289#issuecomment-1007237055 for
+        // the definition.
+        const sccMetricInput = this.badnessInputsForRecursiveDelta(event);
+        // Loop through each predicate in the SCC
+        sccMetricInput.forEach((buckets, predicate) => {
+          // Loop through each ordering of the predicate
+          buckets.forEach((bucket, raReference) => {
+            // Format the key as demanding-query:name (ordering)
+            const key = makeKey(
+              event.queryCausingWork,
+              predicate,
+              '(' + raReference + ')'
+            );
+            const maxTupleCount = Math.max(...bucket.tupleCounts);
+            const resultSize = bucket.resultSize;
+            const maxDependentPredicateSize = Math.max(
+              ...bucket.dependentPredicateSizes.values()
+            );
+
+            if (maxDependentPredicateSize > 0) {
+              pushValue(this.maxTupleCountMap, key, maxTupleCount);
+              pushValue(this.resultSizeMap, key, resultSize);
+              pushValue(
+                this.maxDependentPredicateSizeMap,
+                key,
+                maxDependentPredicateSize
+              );
+              const metric = computeJoinOrderBadness(maxTupleCount, maxDependentPredicateSize, resultSize);
+              const oldMetric = this.joinOrderMetricMap.get(key);
+              if ((oldMetric === undefined) || (metric > oldMetric)) {
+                this.joinOrderMetricMap.set(key, metric);
+              }
+            }
+          });
+        });
+        break;
+    }
+  }
+
+  /**
+   * Iterate through an SCC with main node `event`.
+   */
+  private iterateSCC(
+    event: ComputeRecursive,
+    func: (
+      inLayerEvent: ComputeRecursive | InLayer,
+      run: PipelineRun,
+      iteration: number
+    ) => void
+  ): void {
+    const sccEvents = this.layerEvents.get(event.raHash)!;
+    const nextPipeline: number[] = new Array(sccEvents.length).fill(0);
+
+    const maxIteration = Math.max(
+      ...sccEvents.map(e => e.predicateIterationMillis.length)
+    );
+
+    for (var iteration = 0; iteration < maxIteration; ++iteration) {
+      // Loop through each predicate in this iteration
+      for (var predicate = 0; predicate < sccEvents.length; ++predicate) {
+        const inLayerEvent = sccEvents[predicate];
+        const iterationTime =
+          inLayerEvent.predicateIterationMillis.length <= iteration
+            ? -1
+            : inLayerEvent.predicateIterationMillis[iteration];
+        if (iterationTime != -1) {
+          const run: PipelineRun =
+            inLayerEvent.pipelineRuns[nextPipeline[predicate]++];
+          func(inLayerEvent, run, iteration);
+        }
+      }
+    }
+  }
+
+  /**
+   * Compute the maximum tuple count and maximum dependent predicate size for a non-recursive pipeline
+   */
+  private badnessInputsForNonRecursiveDelta(
+    pipelineRun: PipelineRun,
+    event: ComputeSimple
+  ): { maxTupleCount: number; maxDependentPredicateSize: number } {
+    const dependentPredicateSizes = Object.values(event.dependencies).map(hash =>
+      this.predicateSizes.get(hash)!  // REVIEW: '!'
+    );
+    const maxDependentPredicateSize = safeMax(dependentPredicateSizes);
+    return {
+      maxTupleCount: safeMax(pipelineRun.counts),
+      maxDependentPredicateSize: maxDependentPredicateSize
+    };
+  }
+
+  private prevDeltaSizes(event: ComputeRecursive, predicate: string, i: number) {
+    // If an iteration isn't present in the map it means it was skipped because the optimizer
+    // inferred that it was empty. So its size is 0.
+    return this.curDeltaSizes(event, predicate, i - 1);
+  }
+
+  private curDeltaSizes(event: ComputeRecursive, predicate: string, i: number) {
+    // If an iteration isn't present in the map it means it was skipped because the optimizer
+    // inferred that it was empty. So its size is 0.
+    return (
+      this.layerEvents.get(event.raHash)!.find(x => x.predicateName === predicate)?.deltaSizes[i] || 0
+    );
+  }
+
+  /**
+   * Compute the metric dependent predicate sizes and the result size for a predicate in an SCC.
+   */
+  private badnessInputsForLayer(
+    event: ComputeRecursive,
+    inLayerEvent: InLayer | ComputeRecursive,
+    raReference: string,
+    iteration: number
+  ) {
+    const dependentPredicates = getDependentPredicates(
+      inLayerEvent.ra[raReference]
+    );
+    var dependentPredicateSizes: I.Map<string, number>;
+    // We treat the base case as a non-recursive pipeline. In that case, the dependent predicates are
+    // the dependencies of the base case and the cur_deltas.
+    if (raReference === 'base') {
+      dependentPredicateSizes = I.Map(
+        dependentPredicates.map((pred): [string, number] => {
+          // A base case cannot contain a `prev_delta`, but it can contain a `cur_delta`.
+          var size = 0;
+          if (pred.endsWith('#cur_delta')) {
+            size = this.curDeltaSizes(
+              event,
+              pred.slice(0, -'#cur_delta'.length),
+              iteration
+            );
+          } else {
+            const hash = event.dependencies[pred];
+            size = this.predicateSizes.get(hash)!;
+          }
+          return [pred, size];
+        })
+      );
+    } else {
+      // It's a non-base case in a recursive pipeline. In that case, the dependent predicates are
+      // only the prev_deltas.
+      dependentPredicateSizes = I.Map(
+        dependentPredicates
+          .flatMap(pred => {
+            // If it's actually a prev_delta
+            if (pred.endsWith('#prev_delta')) {
+              // Return the predicate without the #prev_delta suffix.
+              return [pred.slice(0, -'#prev_delta'.length)];
+            } else {
+              // Not a recursive delta. Skip it.
+              return [];
+            }
+          })
+          .map((prev): [string, number] => {
+            const size = this.prevDeltaSizes(event, prev, iteration);
+            return [prev, size];
+          })
+      );
+    }
+
+    const resultSizes = inLayerEvent.resultSize;
+    return { dependentPredicateSizes, resultSizes };
+  }
+
+  /**
+   * Compute the metric input for all the events in a SCC that starts with main node `event`
+   */
+  private badnessInputsForRecursiveDelta(event: ComputeRecursive) {
+    // nameToOrderToBucket : predicate name -> ordering (i.e., standard, order_500000, etc.) -> bucket
+    const nameToOrderToBucket = new Map<string, Map<string, Bucket>>();
+
+    // Iterate through the SCC and compute the metric inputs
+    this.iterateSCC(event, (inLayerEvent, run, iteration) => {
+      const raReference = run.raReference;
+      const predicateName = inLayerEvent.predicateName;
+      if (!nameToOrderToBucket.has(predicateName)) {
+        nameToOrderToBucket.set(predicateName, new Map());
+      }
+      const orderTobucket = nameToOrderToBucket.get(predicateName)!;
+      if (!orderTobucket.has(raReference)) {
+        orderTobucket.set(raReference, {
+          tupleCounts: new Int32Array(0),
+          resultSize: 0,
+          dependentPredicateSizes: I.Map()
+        });
+      }
+
+      const { resultSizes, dependentPredicateSizes } = this.badnessInputsForLayer(
+        event,
+        inLayerEvent,
+        raReference,
+        iteration
+      );
+
+      const bucket = orderTobucket.get(raReference)!;
+      // Pointwise sum the tuple counts
+      const newTupleCounts = pointwiseSum(
+        bucket.tupleCounts,
+        new Int32Array(run.counts)
+      );
+      const newResultSizes = bucket.resultSize + resultSizes!;
+      // Pointwise sum the deltas.
+      const newDependentPredicateSizes = bucket.dependentPredicateSizes.mergeWith(
+        (oldSize, newSize) => oldSize + newSize,
+        dependentPredicateSizes
+      );
+      orderTobucket.set(raReference, {
+        tupleCounts: newTupleCounts,
+        resultSize: newResultSizes,
+        dependentPredicateSizes: newDependentPredicateSizes
+      });
+    });
+    return nameToOrderToBucket;
+  }
+}
+
+export class JoinOrderScannerProvider implements EvaluationLogScannerProvider {
+  constructor() {
+  }
+
+  public createScanner(problemReporter: EvaluationLogProblemReporter): EvaluationLogScanner {
+    return new JoinOrderScanner(problemReporter, DEFAULT_WARNING_THRESHOLD);
+  }
+}
--- a/extensions/ql-vscode/src/log-insights/log-scanner.ts
+++ b/extensions/ql-vscode/src/log-insights/log-scanner.ts
@@ -0,0 +1,49 @@
+import { SummaryEvent } from './log-summary';
+
+/**
+ * Callback interface used to report diagnostics from a log scanner.
+ */
+export interface EvaluationLogProblemReporter {
+  /**
+   * Report a potential problem detected in the evaluation log.
+   *
+   * @param predicateName The mangled name of the predicate with the problem.
+   * @param raHash The RA hash of the predicate with the problem.
+   * @param iteration The iteration number with the problem. For a non-recursive predicate, this
+   * must be zero.
+   * @param message The problem message.
+   */
+  reportProblem(predicateName: string, raHash: string, iteration: number, message: string): void;
+}
+
+/**
+ * Interface implemented by a log scanner. Instances are created via
+ * `EvaluationLogScannerProvider.createScanner()`.
+ */
+export interface EvaluationLogScanner {
+  /**
+   * Called for each event in the log summary, in order. The implementation can report problems via
+   * the `EvaluationLogProblemReporter` interface that was supplied to `createScanner()`.
+   * @param event The log summary event.
+   */
+  onEvent(event: SummaryEvent): void;
+  /**
+   * Called after all events in the log summary have been processed. The implementation can report
+   * problems via the `EvaluationLogProblemReporter` interface that was supplied to
+   * `createScanner()`.
+   */
+  onDone(): void;
+}
+
+/**
+ * A factory for log scanners. When a log is to be scanned, all registered
+ * `EvaluationLogScannerProviders` will be asked to create a new instance of `EvaluationLogScanner`
+ * to do the scanning.
+ */
+export interface EvaluationLogScannerProvider {
+  /**
+   * Create a new instance of `EvaluationLogScanner` to scan a single summary log.
+   * @param problemReporter Callback interface for reporting any problems discovered.
+   */
+  createScanner(problemReporter: EvaluationLogProblemReporter): EvaluationLogScanner;
+}
--- a/extensions/ql-vscode/src/log-insights/log-summary.ts
+++ b/extensions/ql-vscode/src/log-insights/log-summary.ts
@@ -0,0 +1,93 @@
+export interface PipelineRun {
+  raReference: string;
+  counts: number[];
+  duplicationPercentages: number[];
+}
+
+export interface Ra {
+  [key: string]: string[];
+}
+
+export type EvaluationStrategy =
+  'COMPUTE_SIMPLE' |
+  'COMPUTE_RECURSIVE' |
+  'IN_LAYER' |
+  'COMPUTED_EXTENSIONAL' |
+  'EXTENSIONAL' |
+  'SENTINEL_EMPTY' |
+  'CACHACA' |
+  'CACHE_HIT';
+
+interface SummaryEventBase {
+  evaluationStrategy: EvaluationStrategy;
+  predicateName: string;
+  raHash: string;
+  appearsAs: { [key: string]: { [key: string]: number[] } };
+  completionType?: string;
+}
+
+interface ResultEventBase extends SummaryEventBase {
+  resultSize: number;
+}
+
+export interface ComputeSimple extends ResultEventBase {
+  evaluationStrategy: 'COMPUTE_SIMPLE';
+  ra: Ra;
+  pipelineRuns?: [PipelineRun];
+  queryCausingWork?: string;
+  dependencies: { [key: string]: string };
+}
+
+export interface ComputeRecursive extends ResultEventBase {
+  evaluationStrategy: 'COMPUTE_RECURSIVE';
+  deltaSizes: number[];
+  ra: Ra;
+  pipelineRuns: PipelineRun[];
+  queryCausingWork?: string;
+  dependencies: { [key: string]: string };
+  predicateIterationMillis: number[];
+}
+
+export interface InLayer extends ResultEventBase {
+  evaluationStrategy: 'IN_LAYER';
+  deltaSizes: number[];
+  ra: Ra;
+  pipelineRuns: PipelineRun[];
+  queryCausingWork?: string;
+  mainHash: string;
+  predicateIterationMillis: number[];
+}
+
+export interface ComputedExtensional extends ResultEventBase {
+  evaluationStrategy: 'COMPUTED_EXTENSIONAL';
+  queryCausingWork?: string;
+}
+
+export interface NonComputedExtensional extends ResultEventBase {
+  evaluationStrategy: 'EXTENSIONAL';
+  queryCausingWork?: string;
+}
+
+export interface SentinelEmpty extends SummaryEventBase {
+  evaluationStrategy: 'SENTINEL_EMPTY';
+  sentinelRaHash: string;
+}
+
+export interface Cachaca extends ResultEventBase {
+  evaluationStrategy: 'CACHACA';
+}
+
+export interface CacheHit extends ResultEventBase {
+  evaluationStrategy: 'CACHE_HIT';
+}
+
+export type Extensional = ComputedExtensional | NonComputedExtensional;
+
+export type SummaryEvent =
+  | ComputeSimple
+  | ComputeRecursive
+  | InLayer
+  | Extensional
+  | SentinelEmpty
+  | Cachaca
+  | CacheHit;
--- a/extensions/ql-vscode/src/log-insights/summary-parser.ts
+++ b/extensions/ql-vscode/src/log-insights/summary-parser.ts
@@ -0,0 +1,102 @@
+import * as fs from 'fs-extra';
+
+/**
+ * Location information for a single pipeline invocation in the RA.
+ */
+export interface PipelineInfo {
+  startLine: number;
+  raStartLine: number;
+  raEndLine: number;
+}
+
+/**
+ * Location information for a single predicate in the RA.
+ */
+export interface PredicateSymbol {
+  /**
+   * `PipelineInfo` for each iteration. A non-recursive predicate will have a single iteration `0`.
+   */
+  iterations: Record<number, PipelineInfo>;
+}
+
+/**
+ * Location information for the RA from an evaluation log. Line numbers point into the
+ * human-readable log summary.
+ */
+export interface SummarySymbols {
+  predicates: Record<string, PredicateSymbol>;
+}
+
+// Tuple counts for Expr::Expr::getParent#dispred#f0820431#ff@76d6745o:
+const NON_RECURSIVE_TUPLE_COUNT_REGEXP = /^Tuple counts for (?<predicateName>\S+):$/;
+// Tuple counts for Expr::Expr::getEnclosingStmt#f0820431#bf@923ddwj9 on iteration 0 running pipeline base:
+const RECURSIVE_TUPLE_COUNT_REGEXP = /^Tuple counts for (?<predicateName>\S+) on iteration (?<iteration>\d+) /;
+const RETURN_REGEXP = /^\s*return /;
+
+/**
+ * Parse a human-readable evaluation log summary to find the location of the RA for each pipeline
+ * run.
+ *
+ * TODO: Once we're more certain about the symbol format, we should have the CLI generate this as it
+ * generates the human-readabe summary to avoid having to rely on regular expression matching of the
+ * human-readable text.
+ *
+ * @param fileLocation The path to the summary file.
+ * @returns Symbol information for the summary file.
+ */
+export async function generateSummarySymbols(fileLocation: string): Promise<SummarySymbols> {
+  const summary = await fs.promises.readFile(fileLocation, { encoding: 'utf-8' });
+  const symbols: SummarySymbols = {
+    predicates: {}
+  };
+
+  const lines = summary.split(/\r?\n/);
+  var lineNumber = 0;
+  while (lineNumber < lines.length) {
+    const startLineNumber = lineNumber;
+    lineNumber++;
+    const startLine = lines[startLineNumber];
+    const nonRecursiveMatch = startLine.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
+    var predicateName: string | undefined = undefined;
+    var iteration: number = 0;
+    if (nonRecursiveMatch) {
+      predicateName = nonRecursiveMatch.groups!.predicateName;
+    } else {
+      const recursiveMatch = startLine.match(RECURSIVE_TUPLE_COUNT_REGEXP);
+      if (recursiveMatch) {
+        predicateName = recursiveMatch.groups!.predicateName;
+        iteration = parseInt(recursiveMatch.groups!.iteration);
+      }
+    }
+
+    if (predicateName !== undefined) {
+      const raStartLine = lineNumber;
+      var raEndLine: number | undefined = undefined;
+      while ((lineNumber < lines.length) && (raEndLine === undefined)) {
+        const raLine = lines[lineNumber];
+        const returnMatch = raLine.match(RETURN_REGEXP);
+        if (returnMatch) {
+          raEndLine = lineNumber;
+        }
+        lineNumber++;
+      }
+      if (raEndLine === undefined) {
+        raEndLine = lineNumber - 1;
+      }
+      var symbol = symbols.predicates[predicateName];
+      if (symbol === undefined) {
+        symbol = {
+          iterations: {}
+        };
+        symbols.predicates[predicateName] = symbol;
+      }
+      symbol.iterations[iteration] = {
+        startLine: lineNumber,
+        raStartLine: raStartLine,
+        raEndLine: raEndLine
+      };
+    }
+  }
+
+  return symbols;
+}
--- a/extensions/ql-vscode/src/query-history.ts
+++ b/extensions/ql-vscode/src/query-history.ts
@@ -1,11 +1,13 @@
 import * as path from 'path';
 import {
  commands,
+  Diagnostic,
  Disposable,
  env,
  Event,
  EventEmitter,
  ExtensionContext,
+  languages,
  ProviderResult,
  Range,
  ThemeIcon,
@@ -16,6 +18,7 @@ import {
  window,
  workspace,
 } from 'vscode';
+import * as JsonlParser from 'stream-json/jsonl/Parser';
 import { QueryHistoryConfig } from './config';
 import {
  showAndLogErrorMessage,
@@ -39,6 +42,9 @@ import { CliVersionConstraint } from './cli';
 import { HistoryItemLabelProvider } from './history-item-label-provider';
 import { Credentials } from './authentication';
 import { cancelRemoteQuery } from './remote-queries/gh-actions-api-client';
+import { PipelineInfo, SummarySymbols } from './log-insights/summary-parser';
+import { DiagnosticSeverity } from 'vscode-languageclient';
+import { EvaluationLogProblemReporter, EvaluationLogScannerProvider } from './log-insights/log-scanner';

 /**
 * query-history.ts
@@ -297,6 +303,41 @@ export class HistoryTreeDataProvider extends DisposableObject {
  }
 }

+/**
+ * Compute the key used to find a predicate in the summary symbols.
+ * @param name The name of the predicate.
+ * @param raHash The RA hash of the predicate.
+ * @returns The key of the predicate, consisting of `name@shortHash`, where `shortHash` is the first
+ * eight characters of `raHash`.
+ */
+function predicateSymbolKey(name: string, raHash: string): string {
+  return `${name}@${raHash.substring(0, 8)}`;
+}
+
+/**
+ * Implementation of `EvaluationLogProblemReporter` that generates `Diagnostic` objects to display
+ * in the VS Code "Problems" view.
+ */
+class ProblemReporter implements EvaluationLogProblemReporter {
+  public readonly diagnostics: Diagnostic[] = [];
+
+  constructor(private readonly symbols: SummarySymbols | undefined) {
+  }
+
+  public reportProblem(predicateName: string, raHash: string, iteration: number, message: string): void {
+    const nameWithHash = predicateSymbolKey(predicateName, raHash);
+    const predicateSymbol = this.symbols?.predicates[nameWithHash];
+    var predicateInfo: PipelineInfo | undefined = undefined;
+    if (predicateSymbol !== undefined) {
+      predicateInfo = predicateSymbol.iterations[iteration];
+    }
+    if (predicateInfo !== undefined) {
+      const range = new Range(predicateInfo.raStartLine, 0, predicateInfo.raEndLine + 1, 0);
+      this.diagnostics.push(new Diagnostic(range, message, DiagnosticSeverity.Error));
+    }
+  }
+}
+
 export class QueryHistoryManager extends DisposableObject {

  treeDataProvider: HistoryTreeDataProvider;
@@ -304,6 +345,7 @@ export class QueryHistoryManager extends DisposableObject {
  lastItemClick: { time: Date; item: QueryHistoryInfo } | undefined;
  compareWithItem: LocalQueryInfo | undefined;
  queryHistoryScrubber: Disposable | undefined;
+  private readonly diagnosticCollection = this.push(languages.createDiagnosticCollection('ql-eval-log'));
  private queryMetadataStorageLocation;

  private readonly _onDidAddQueryItem = super.push(new EventEmitter<QueryHistoryInfo>());
@@ -318,6 +360,9 @@ export class QueryHistoryManager extends DisposableObject {
  readonly onWillOpenQueryItem: Event<QueryHistoryInfo> = this
    ._onWillOpenQueryItem.event;

+  private readonly scannerProviders = new Map<number, EvaluationLogScannerProvider>();
+  private nextScannerProviderId = 0;
+
  constructor(
    private readonly qs: QueryServerClient,
    private readonly dbm: DatabaseManager,
@@ -834,6 +879,24 @@ export class QueryHistoryManager extends DisposableObject {
    }
  }

+  /**
+   * Scan the evaluation log for a query, and report any diagnostics.
+   *
+   * @param query The query whose log is to be scanned.
+   */
+  public async scanEvalLog(
+    query: LocalQueryInfo
+  ): Promise<void> {
+    this.diagnosticCollection.clear();
+    if (query.evalLogJsonSummaryLocation) {
+      const diagnostics = await this.scanLog(query.evalLogJsonSummaryLocation, query.evalLogSummarySymbolsLocation);
+      const uri = Uri.file(query.evalLogSummaryLocation!);
+      this.diagnosticCollection.set(uri, diagnostics);
+    } else {
+      this.warnNoEvalLog();
+    }
+  }
+
  async handleCancel(
    singleItem: QueryHistoryInfo,
    multiSelect: QueryHistoryInfo[]
@@ -993,6 +1056,59 @@ export class QueryHistoryManager extends DisposableObject {
    this._onDidAddQueryItem.fire(item);
  }

+  /**
+   * Register a provider that can create instances of `EvaluationLogScanner` to scan evaluation logs
+   * for problems.
+   * @param provider The provider.
+   * @returns A `Disposable` that, when disposed, will unregister the provider.
+   */
+  registerLogScannerProvider(provider: EvaluationLogScannerProvider): Disposable {
+    const id = this.nextScannerProviderId;
+    this.nextScannerProviderId++;
+
+    this.scannerProviders.set(id, provider);
+    const manager = this;
+    return {
+      dispose(): void {
+        manager.scannerProviders.delete(id);
+      }
+    };
+  }
+
+  /**
+   * Scan the evaluator summary log for problems, using the scanners for all registered providers.
+   * @param jsonSummaryLocation The file path of the JSON summary log.
+   * @param symbolsLocation The file path of the symbols file for the human-readable log summary.
+   * @returns An array of `Diagnostic`s representing the problems found by scanners.
+   */
+  private async scanLog(jsonSummaryLocation: string, symbolsLocation: string | undefined): Promise<Diagnostic[]> {
+    var symbols: SummarySymbols | undefined = undefined;
+    if (symbolsLocation !== undefined) {
+      symbols = JSON.parse(await fs.readFile(symbolsLocation, { encoding: 'utf-8' }));
+    }
+
+    const problemReporter = new ProblemReporter(symbols);
+
+    const scanners = [...this.scannerProviders.values()].map(p => p.createScanner(problemReporter));
+
+    const stream = fs.createReadStream(jsonSummaryLocation)
+      .pipe(JsonlParser.parser())
+      .on('data', ({ value }) => {
+        scanners.forEach(scanner => {
+          scanner.onEvent(value);
+        });
+      });
+
+    await new Promise(function(resolve, reject) {
+      stream.on('end', resolve);
+      stream.on('error', reject);
+    });
+
+    scanners.forEach(scanner => scanner.onDone());
+
+    return problemReporter.diagnostics;
+  }
+
  /**
   * Update the tree view selection if the tree view is visible.
   *
--- a/extensions/ql-vscode/src/query-results.ts
+++ b/extensions/ql-vscode/src/query-results.ts
@@ -217,6 +217,8 @@ export class LocalQueryInfo {
  public completedQuery: CompletedQueryInfo | undefined;
  public evalLogLocation: string | undefined;
  public evalLogSummaryLocation: string | undefined;
+  public evalLogSummarySymbolsLocation: string | undefined;
+  public evalLogJsonSummaryLocation: string | undefined;

  /**
   * Note that in the {@link slurpQueryHistory} method, we create a FullQueryInfo instance
--- a/extensions/ql-vscode/src/queryserver-client.ts
+++ b/extensions/ql-vscode/src/queryserver-client.ts
@@ -267,6 +267,14 @@ export function findQueryEvalLogSummaryFile(resultPath: string): string {
  return path.join(resultPath, 'evaluator-log.summary');
 }

+export function findQueryEvalLogSummarySymbolsFile(resultPath: string): string {
+  return path.join(resultPath, 'evaluator-log.summary.symbols.json');
+}
+
 export function findQueryEvalLogEndSummaryFile(resultPath: string): string {
  return path.join(resultPath, 'evaluator-log-end.summary');
-}
+}
+
+export function findQueryEvalJsonLogSummaryFile(resultPath: string): string {
+  return path.join(resultPath, 'evaluator-log.summary.jsonl');
+}
--- a/extensions/ql-vscode/src/run-queries.ts
+++ b/extensions/ql-vscode/src/run-queries.ts
@@ -37,6 +37,7 @@ import { ensureMetadataIsComplete } from './query-results';
 import { SELECT_QUERY_NAME } from './contextual/locationFinder';
 import { DecodedBqrsChunk } from './pure/bqrs-cli-types';
 import { getErrorMessage } from './pure/helpers-pure';
+import { generateSummarySymbols } from './log-insights/summary-parser';

 /**
 * run-queries.ts
@@ -103,10 +104,18 @@ export class QueryEvaluationInfo {
    return qsClient.findQueryEvalLogSummaryFile(this.querySaveDir);
  }

+  get evalLogSummarySymbolsPath() {
+    return qsClient.findQueryEvalLogSummarySymbolsFile(this.querySaveDir);
+  }
+
  get evalLogEndSummaryPath() {
    return qsClient.findQueryEvalLogEndSummaryFile(this.querySaveDir);
  }

+  get evalLogJsonSummaryPath() {
+    return qsClient.findQueryEvalJsonLogSummaryFile(this.querySaveDir);
+  }
+
  get resultsPaths() {
    return {
      resultsPath: path.join(this.querySaveDir, 'results.bqrs'),
@@ -174,7 +183,7 @@ export class QueryEvaluationInfo {
        db: dataset,
        logPath: this.evalLogPath,
      });
-      
+
    }
    const params: messages.EvaluateQueriesParams = {
      db: dataset,
@@ -208,6 +217,16 @@ export class QueryEvaluationInfo {
            void qs.logger.log(' --- Evaluator Log Summary --- ');
            void qs.logger.log(buffer.toString());
          });
+
+          // Create the symbol table for the summary file, so we know where each predicate and iteration
+          // is located. We use this info for jumping to the RA for a specific predicate and iteration.
+          // TODO: Move this into the CLI once we're more sure of the format.
+          const symbols = await generateSummarySymbols(this.evalLogSummaryPath);
+          await fs.writeFile(this.evalLogSummarySymbolsPath, JSON.stringify(symbols));
+          queryInfo.evalLogSummarySymbolsLocation = this.evalLogSummarySymbolsPath;
+
+          await qs.cliServer.generateJsonLogSummary(this.evalLogPath, this.evalLogJsonSummaryPath);
+          queryInfo.evalLogJsonSummaryLocation = this.evalLogJsonSummaryPath;
        } else {
          void showAndLogWarningMessage(`Failed to write structured evaluator log to ${this.evalLogPath}.`);
        }