Initial implementation of join order metric scanning

This commit is contained in:
Dave Bartolomeo
2022-05-03 13:20:30 -04:00
parent 1d195cb347
commit 1089a052ec
12 changed files with 883 additions and 7 deletions

View File

@@ -18,6 +18,7 @@
"d3-graphviz": "^2.6.1",
"fs-extra": "^10.0.1",
"glob-promise": "^3.4.0",
"immutable": "^4.0.0",
"js-yaml": "^3.14.0",
"minimist": "~1.2.6",
"nanoid": "^3.2.0",
@@ -6910,6 +6911,11 @@
"node": ">= 4"
}
},
"node_modules/immutable": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/immutable/-/immutable-4.0.0.tgz",
"integrity": "sha512-zIE9hX70qew5qTUjSS7wi1iwj/l7+m54KWU247nhM3v806UdGj1yDndXj+IOYxxtW9zyLI+xqFNZjTuDaLUqFw=="
},
"node_modules/import-fresh": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.2.1.tgz",
@@ -19573,6 +19579,11 @@
"integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==",
"dev": true
},
"immutable": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/immutable/-/immutable-4.0.0.tgz",
"integrity": "sha512-zIE9hX70qew5qTUjSS7wi1iwj/l7+m54KWU247nhM3v806UdGj1yDndXj+IOYxxtW9zyLI+xqFNZjTuDaLUqFw=="
},
"import-fresh": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.2.1.tgz",

View File

@@ -746,11 +746,6 @@
"group": "9_qlCommands",
"when": "codeql.supportsEvalLog && viewItem == rawResultsItem || codeql.supportsEvalLog && viewItem == interpretedResultsItem || codeql.supportsEvalLog && viewItem == cancelledResultsItem"
},
{
"command": "codeQLQueryHistory.scanEvalLog",
"group": "9_qlCommands",
"when": "codeql.supportsEvalLog && (viewItem == rawResultsItem || viewItem == interpretedResultsItem || viewItem == cancelledResultsItem)"
},
{
"command": "codeQLQueryHistory.showQueryText",
"group": "9_qlCommands",

View File

@@ -685,6 +685,23 @@ export class CodeQLCliServer implements Disposable {
return await this.runCodeQlCliCommand(['generate', 'log-summary'], subcommandArgs, 'Generating log summary');
}
/**
* Generate a JSON summary of an evaluation log.
* @param inputPath The path of an evaluation event log.
* @param outputPath The path to write a JSON summary of it to.
*/
async generateJsonLogSummary(
inputPath: string,
outputPath: string
): Promise<string> {
const subcommandArgs = [
'--minify-output',
inputPath,
outputPath
];
return await this.runCodeQlCliCommand(['generate', 'log-summary'], subcommandArgs, 'Generating log summary');
}
/**
* Gets the results from a bqrs.
* @param bqrsPath The path to the bqrs.

View File

@@ -97,6 +97,7 @@ import { URLSearchParams } from 'url';
import { handleDownloadPacks, handleInstallPackDependencies } from './packaging';
import { RemoteQueryHistoryItem } from './remote-queries/remote-query-history-item';
import { HistoryItemLabelProvider } from './history-item-label-provider';
import { JoinOrderScannerProvider } from './log-insights/join-order';
/**
* extension.ts
@@ -470,6 +471,9 @@ async function activateWithInstalledDistribution(
ctx.subscriptions.push(qhm);
void logger.log('Initializing evaluation log scanners.');
ctx.subscriptions.push(qhm.registerLogScannerProvider(new JoinOrderScannerProvider()));
void logger.log('Initializing results panel interface.');
const intm = new InterfaceManager(ctx, dbm, cliServer, queryServerLogger, labelProvider);
ctx.subscriptions.push(intm);
@@ -504,6 +508,8 @@ async function activateWithInstalledDistribution(
forceReveal: WebviewReveal
): Promise<void> {
await intm.showResults(query, forceReveal, false);
// Always update the log warnings so they stay in sync with the results.
await qhm.scanEvalLog(query);
}
async function compileAndRunQuery(

View File

@@ -0,0 +1,458 @@
import * as I from 'immutable';
import { EvaluationLogProblemReporter, EvaluationLogScanner, EvaluationLogScannerProvider } from './log-scanner';
import { InLayer, ComputeRecursive, SummaryEvent, PipelineRun, ComputeSimple } from './log-summary';
const DEFAULT_WARNING_THRESHOLD = 50;
/**
* Like `max`, but returns 0 if no meaningful maximum can be computed.
*/
function safeMax(it: Iterable<number>) {
const m = Math.max(...it);
return Number.isFinite(m) ? m : 0;
}
/**
* Compute a key for the maps that that is sent to report generation.
* Should only be used on events that are known to define queryCausingWork.
*/
function makeKey(
queryCausingWork: string | undefined,
predicate: string,
suffix = ''
): string {
if (queryCausingWork === undefined) {
throw new Error(
'queryCausingWork was not defined on an event we expected it to be defined for!'
);
}
return (
queryCausingWork +
':' +
predicate +
(suffix ? ' ' + suffix : '')
);
}
function getDependentPredicates(operations: string[]): I.List<string> {
const regexps = [
// SCAN id
String.raw`SCAN\s+([0-9a-zA-Z:#_]+)\s`,
// JOIN id WITH id
String.raw`JOIN\s+([0-9a-zA-Z:#_]+)\s+WITH\s+([0-9a-zA-Z:#_]+)\s`,
// AGGREGATE id, id
String.raw`AGGREGATE\s+([0-9a-zA-Z:#_]+)\s*,\s+([0-9a-zA-Z:#_]+)`,
// id AND NOT id
String.raw`([0-9a-zA-Z:#_]+)\s+AND\s+NOT\s+([0-9a-zA-Z:#_]+)`,
// INVOKE HIGHER-ORDER RELATION rel ON <id, ..., id>
String.raw`INVOKE\s+HIGHER-ORDER\s+RELATION\s[^\s]+\sON\s+<([0-9a-zA-Z:#_<>]+)((?:,[0-9a-zA-Z:#_<>]+)*)>`,
// SELECT id
String.raw`SELECT\s+([0-9a-zA-Z:#_]+)`
];
const r = new RegExp(
`${String.raw`\{[0-9]+\}\s+[0-9a-zA-Z]+\s=\s(?:` + regexps.join('|')})`
);
return I.List(operations).flatMap(operation => {
const matches = r.exec(operation.trim());
return I.List(matches!)
.rest() // Skip the first group as it's just the entire string
.filter(x => !!x && !x.match('r[0-9]+|PRIMITIVE')) // Only keep the references to predicates.
.flatMap(x => x.split(',')) // Group 2 in the INVOKE HIGHER_ORDER RELATION case is a comma-separated list of identifiers.
.filter(x => !!x); // Remove empty strings
});
}
function getMainHash(event: InLayer | ComputeRecursive): string {
switch (event.evaluationStrategy) {
case 'IN_LAYER':
return event.mainHash;
case 'COMPUTE_RECURSIVE':
return event.raHash;
}
}
/**
* Sum arrays a and b element-wise, and pad with 0s if the arrays are not the same length.
*/
function pointwiseSum(a: Int32Array, b: Int32Array): Int32Array {
function reportIfInconsistent(ai: number, bi: number) {
if (ai === -1 && bi !== -1) {
console.warn(
`Operation was not evaluated in the first pipeline, but it was evaluated in the accumulated pipeline (with tuple count ${bi}).`
);
}
if (ai !== -1 && bi === -1) {
console.warn(
`Operation was evaluated in the first pipeline (with tuple count ${ai}), but it was not evaluated in the accumulated pipeline.`
);
}
}
const length = Math.max(a.length, b.length);
const result = new Int32Array(length);
for (let i = 0; i < length; i++) {
const ai = a[i] || 0;
const bi = b[i] || 0;
// -1 is used to represent the absence of a tuple count for a line in the pretty-printed RA (e.g. an empty line), so we ignore those.
if (i < a.length && i < b.length && (ai === -1 || bi === -1)) {
result[i] = -1;
reportIfInconsistent(ai, bi);
} else {
result[i] = ai + bi;
}
}
return result;
}
function pushValue<K, V>(m: Map<K, V[]>, k: K, v: V) {
if (!m.has(k)) {
m.set(k, []);
}
m.get(k)!.push(v);
return m;
}
function computeJoinOrderBadness(
maxTupleCount: number,
maxDependentPredicateSize: number,
resultSize: number
): number {
return maxTupleCount / Math.max(maxDependentPredicateSize, resultSize);
}
/**
* A bucket contains the pointwise sum of the tuple counts, result sizes and dependent predicate sizes
* For each (predicate, order) in an SCC, we will compute a bucket.
*/
interface Bucket {
tupleCounts: Int32Array;
resultSize: number;
dependentPredicateSizes: I.Map<string, number>;
};
class JoinOrderScanner implements EvaluationLogScanner {
// Map a predicate hash to its result size
private readonly predicateSizes = new Map<string, number>();
private readonly layerEvents = new Map<string, (ComputeRecursive | InLayer)[]>();
// Map a key of the form 'query-with-demand : predicate name' to its badness input.
private readonly maxTupleCountMap = new Map<string, number[]>();
private readonly resultSizeMap = new Map<string, number[]>();
private readonly maxDependentPredicateSizeMap = new Map<string, number[]>();
private readonly joinOrderMetricMap = new Map<string, number>();
constructor(
private readonly problemReporter: EvaluationLogProblemReporter,
private readonly warningThreshold: number) {
}
public onEvent(event: SummaryEvent): void {
if (
event.completionType !== undefined &&
event.completionType !== 'SUCCESS'
) {
return; // Skip any evaluation that wasn't successful
}
this.recordPredicateSizes(event);
this.computeBadnessMetric(event);
}
public onDone(): void {
void this;
}
private recordPredicateSizes(event: SummaryEvent): void {
switch (event.evaluationStrategy) {
case 'EXTENSIONAL':
case 'COMPUTED_EXTENSIONAL':
case 'COMPUTE_SIMPLE':
case 'CACHACA':
case 'CACHE_HIT': {
this.predicateSizes.set(event.raHash, event.resultSize);
break;
}
case 'SENTINEL_EMPTY': {
this.predicateSizes.set(event.raHash, 0);
break;
}
case 'COMPUTE_RECURSIVE':
case 'IN_LAYER':
this.predicateSizes.set(event.raHash, event.resultSize);
// layerEvents are indexed by the mainHash.
const hash = getMainHash(event);
if (!this.layerEvents.has(hash)) {
this.layerEvents.set(hash, []);
}
this.layerEvents.get(hash)!.push(event);
break;
}
}
private reportProblemIfNecessary(event: SummaryEvent, iteration: number, metric: number): void {
if (metric >= this.warningThreshold) {
this.problemReporter.reportProblem(event.predicateName, event.raHash, iteration,
`Relation '${event.predicateName}' has an inefficient join order. Its join order metric is ${metric}, which is larger than the threshold of ${this.warningThreshold}.`);
}
}
private computeBadnessMetric(event: SummaryEvent): void {
if (
event.completionType !== undefined &&
event.completionType !== 'SUCCESS'
) {
return; // Skip any evaluation that wasn't successful
}
switch (event.evaluationStrategy) {
case 'COMPUTE_SIMPLE':
if (!event.pipelineRuns) {
// skip if the optional pipelineRuns field is not present.
break;
}
// Compute the badness metric for a non-recursive predicate. The metric in this case is defined as:
// badness = (max tuple count in the pipeline) / (largest predicate this pipeline depends on)
const key = makeKey(event.queryCausingWork, event.predicateName);
const resultSize = event.resultSize;
// There is only one entry in `pipelineRuns` if it's a non-recursive predicate.
const { maxTupleCount, maxDependentPredicateSize } =
this.badnessInputsForNonRecursiveDelta(event.pipelineRuns[0], event);
if (maxDependentPredicateSize > 0) {
pushValue(this.maxTupleCountMap, key, maxTupleCount);
pushValue(this.resultSizeMap, key, resultSize);
pushValue(
this.maxDependentPredicateSizeMap,
key,
maxDependentPredicateSize
);
const metric = computeJoinOrderBadness(maxTupleCount, maxDependentPredicateSize, resultSize!);
this.joinOrderMetricMap.set(key, metric);
this.reportProblemIfNecessary(event, 0, metric);
}
break;
case 'COMPUTE_RECURSIVE':
// Compute the badness metric for a recursive predicate for each ordering.
// See https://github.com/github/codeql-coreql-team/issues/1289#issuecomment-1007237055 for
// the definition.
const sccMetricInput = this.badnessInputsForRecursiveDelta(event);
// Loop through each predicate in the SCC
sccMetricInput.forEach((buckets, predicate) => {
// Loop through each ordering of the predicate
buckets.forEach((bucket, raReference) => {
// Format the key as demanding-query:name (ordering)
const key = makeKey(
event.queryCausingWork,
predicate,
'(' + raReference + ')'
);
const maxTupleCount = Math.max(...bucket.tupleCounts);
const resultSize = bucket.resultSize;
const maxDependentPredicateSize = Math.max(
...bucket.dependentPredicateSizes.values()
);
if (maxDependentPredicateSize > 0) {
pushValue(this.maxTupleCountMap, key, maxTupleCount);
pushValue(this.resultSizeMap, key, resultSize);
pushValue(
this.maxDependentPredicateSizeMap,
key,
maxDependentPredicateSize
);
const metric = computeJoinOrderBadness(maxTupleCount, maxDependentPredicateSize, resultSize);
const oldMetric = this.joinOrderMetricMap.get(key);
if ((oldMetric === undefined) || (metric > oldMetric)) {
this.joinOrderMetricMap.set(key, metric);
}
}
});
});
break;
}
}
/**
* Iterate through an SCC with main node `event`.
*/
private iterateSCC(
event: ComputeRecursive,
func: (
inLayerEvent: ComputeRecursive | InLayer,
run: PipelineRun,
iteration: number
) => void
): void {
const sccEvents = this.layerEvents.get(event.raHash)!;
const nextPipeline: number[] = new Array(sccEvents.length).fill(0);
const maxIteration = Math.max(
...sccEvents.map(e => e.predicateIterationMillis.length)
);
for (var iteration = 0; iteration < maxIteration; ++iteration) {
// Loop through each predicate in this iteration
for (var predicate = 0; predicate < sccEvents.length; ++predicate) {
const inLayerEvent = sccEvents[predicate];
const iterationTime =
inLayerEvent.predicateIterationMillis.length <= iteration
? -1
: inLayerEvent.predicateIterationMillis[iteration];
if (iterationTime != -1) {
const run: PipelineRun =
inLayerEvent.pipelineRuns[nextPipeline[predicate]++];
func(inLayerEvent, run, iteration);
}
}
}
}
/**
* Compute the maximum tuple count and maximum dependent predicate size for a non-recursive pipeline
*/
private badnessInputsForNonRecursiveDelta(
pipelineRun: PipelineRun,
event: ComputeSimple
): { maxTupleCount: number; maxDependentPredicateSize: number } {
const dependentPredicateSizes = Object.values(event.dependencies).map(hash =>
this.predicateSizes.get(hash)! // REVIEW: '!'
);
const maxDependentPredicateSize = safeMax(dependentPredicateSizes);
return {
maxTupleCount: safeMax(pipelineRun.counts),
maxDependentPredicateSize: maxDependentPredicateSize
};
}
private prevDeltaSizes(event: ComputeRecursive, predicate: string, i: number) {
// If an iteration isn't present in the map it means it was skipped because the optimizer
// inferred that it was empty. So its size is 0.
return this.curDeltaSizes(event, predicate, i - 1);
}
private curDeltaSizes(event: ComputeRecursive, predicate: string, i: number) {
// If an iteration isn't present in the map it means it was skipped because the optimizer
// inferred that it was empty. So its size is 0.
return (
this.layerEvents.get(event.raHash)!.find(x => x.predicateName === predicate)?.deltaSizes[i] || 0
);
}
/**
* Compute the metric dependent predicate sizes and the result size for a predicate in an SCC.
*/
private badnessInputsForLayer(
event: ComputeRecursive,
inLayerEvent: InLayer | ComputeRecursive,
raReference: string,
iteration: number
) {
const dependentPredicates = getDependentPredicates(
inLayerEvent.ra[raReference]
);
var dependentPredicateSizes: I.Map<string, number>;
// We treat the base case as a non-recursive pipeline. In that case, the dependent predicates are
// the dependencies of the base case and the cur_deltas.
if (raReference === 'base') {
dependentPredicateSizes = I.Map(
dependentPredicates.map((pred): [string, number] => {
// A base case cannot contain a `prev_delta`, but it can contain a `cur_delta`.
var size = 0;
if (pred.endsWith('#cur_delta')) {
size = this.curDeltaSizes(
event,
pred.slice(0, -'#cur_delta'.length),
iteration
);
} else {
const hash = event.dependencies[pred];
size = this.predicateSizes.get(hash)!;
}
return [pred, size];
})
);
} else {
// It's a non-base case in a recursive pipeline. In that case, the dependent predicates are
// only the prev_deltas.
dependentPredicateSizes = I.Map(
dependentPredicates
.flatMap(pred => {
// If it's actually a prev_delta
if (pred.endsWith('#prev_delta')) {
// Return the predicate without the #prev_delta suffix.
return [pred.slice(0, -'#prev_delta'.length)];
} else {
// Not a recursive delta. Skip it.
return [];
}
})
.map((prev): [string, number] => {
const size = this.prevDeltaSizes(event, prev, iteration);
return [prev, size];
})
);
}
const resultSizes = inLayerEvent.resultSize;
return { dependentPredicateSizes, resultSizes };
}
/**
* Compute the metric input for all the events in a SCC that starts with main node `event`
*/
private badnessInputsForRecursiveDelta(event: ComputeRecursive) {
// nameToOrderToBucket : predicate name -> ordering (i.e., standard, order_500000, etc.) -> bucket
const nameToOrderToBucket = new Map<string, Map<string, Bucket>>();
// Iterate through the SCC and compute the metric inputs
this.iterateSCC(event, (inLayerEvent, run, iteration) => {
const raReference = run.raReference;
const predicateName = inLayerEvent.predicateName;
if (!nameToOrderToBucket.has(predicateName)) {
nameToOrderToBucket.set(predicateName, new Map());
}
const orderTobucket = nameToOrderToBucket.get(predicateName)!;
if (!orderTobucket.has(raReference)) {
orderTobucket.set(raReference, {
tupleCounts: new Int32Array(0),
resultSize: 0,
dependentPredicateSizes: I.Map()
});
}
const { resultSizes, dependentPredicateSizes } = this.badnessInputsForLayer(
event,
inLayerEvent,
raReference,
iteration
);
const bucket = orderTobucket.get(raReference)!;
// Pointwise sum the tuple counts
const newTupleCounts = pointwiseSum(
bucket.tupleCounts,
new Int32Array(run.counts)
);
const newResultSizes = bucket.resultSize + resultSizes!;
// Pointwise sum the deltas.
const newDependentPredicateSizes = bucket.dependentPredicateSizes.mergeWith(
(oldSize, newSize) => oldSize + newSize,
dependentPredicateSizes
);
orderTobucket.set(raReference, {
tupleCounts: newTupleCounts,
resultSize: newResultSizes,
dependentPredicateSizes: newDependentPredicateSizes
});
});
return nameToOrderToBucket;
}
}
export class JoinOrderScannerProvider implements EvaluationLogScannerProvider {
constructor() {
}
public createScanner(problemReporter: EvaluationLogProblemReporter): EvaluationLogScanner {
return new JoinOrderScanner(problemReporter, DEFAULT_WARNING_THRESHOLD);
}
}

View File

@@ -0,0 +1,49 @@
import { SummaryEvent } from './log-summary';
/**
* Callback interface used to report diagnostics from a log scanner.
*/
export interface EvaluationLogProblemReporter {
/**
* Report a potential problem detected in the evaluation log.
*
* @param predicateName The mangled name of the predicate with the problem.
* @param raHash The RA hash of the predicate with the problem.
* @param iteration The iteration number with the problem. For a non-recursive predicate, this
* must be zero.
* @param message The problem message.
*/
reportProblem(predicateName: string, raHash: string, iteration: number, message: string): void;
}
/**
* Interface implemented by a log scanner. Instances are created via
* `EvaluationLogScannerProvider.createScanner()`.
*/
export interface EvaluationLogScanner {
/**
* Called for each event in the log summary, in order. The implementation can report problems via
* the `EvaluationLogProblemReporter` interface that was supplied to `createScanner()`.
* @param event The log summary event.
*/
onEvent(event: SummaryEvent): void;
/**
* Called after all events in the log summary have been processed. The implementation can report
* problems via the `EvaluationLogProblemReporter` interface that was supplied to
* `createScanner()`.
*/
onDone(): void;
}
/**
* A factory for log scanners. When a log is to be scanned, all registered
* `EvaluationLogScannerProviders` will be asked to create a new instance of `EvaluationLogScanner`
* to do the scanning.
*/
export interface EvaluationLogScannerProvider {
/**
* Create a new instance of `EvaluationLogScanner` to scan a single summary log.
* @param problemReporter Callback interface for reporting any problems discovered.
*/
createScanner(problemReporter: EvaluationLogProblemReporter): EvaluationLogScanner;
}

View File

@@ -0,0 +1,93 @@
export interface PipelineRun {
raReference: string;
counts: number[];
duplicationPercentages: number[];
}
export interface Ra {
[key: string]: string[];
}
export type EvaluationStrategy =
'COMPUTE_SIMPLE' |
'COMPUTE_RECURSIVE' |
'IN_LAYER' |
'COMPUTED_EXTENSIONAL' |
'EXTENSIONAL' |
'SENTINEL_EMPTY' |
'CACHACA' |
'CACHE_HIT';
interface SummaryEventBase {
evaluationStrategy: EvaluationStrategy;
predicateName: string;
raHash: string;
appearsAs: { [key: string]: { [key: string]: number[] } };
completionType?: string;
}
interface ResultEventBase extends SummaryEventBase {
resultSize: number;
}
export interface ComputeSimple extends ResultEventBase {
evaluationStrategy: 'COMPUTE_SIMPLE';
ra: Ra;
pipelineRuns?: [PipelineRun];
queryCausingWork?: string;
dependencies: { [key: string]: string };
}
export interface ComputeRecursive extends ResultEventBase {
evaluationStrategy: 'COMPUTE_RECURSIVE';
deltaSizes: number[];
ra: Ra;
pipelineRuns: PipelineRun[];
queryCausingWork?: string;
dependencies: { [key: string]: string };
predicateIterationMillis: number[];
}
export interface InLayer extends ResultEventBase {
evaluationStrategy: 'IN_LAYER';
deltaSizes: number[];
ra: Ra;
pipelineRuns: PipelineRun[];
queryCausingWork?: string;
mainHash: string;
predicateIterationMillis: number[];
}
export interface ComputedExtensional extends ResultEventBase {
evaluationStrategy: 'COMPUTED_EXTENSIONAL';
queryCausingWork?: string;
}
export interface NonComputedExtensional extends ResultEventBase {
evaluationStrategy: 'EXTENSIONAL';
queryCausingWork?: string;
}
export interface SentinelEmpty extends SummaryEventBase {
evaluationStrategy: 'SENTINEL_EMPTY';
sentinelRaHash: string;
}
export interface Cachaca extends ResultEventBase {
evaluationStrategy: 'CACHACA';
}
export interface CacheHit extends ResultEventBase {
evaluationStrategy: 'CACHE_HIT';
}
export type Extensional = ComputedExtensional | NonComputedExtensional;
export type SummaryEvent =
| ComputeSimple
| ComputeRecursive
| InLayer
| Extensional
| SentinelEmpty
| Cachaca
| CacheHit;

View File

@@ -0,0 +1,102 @@
import * as fs from 'fs-extra';
/**
* Location information for a single pipeline invocation in the RA.
*/
export interface PipelineInfo {
startLine: number;
raStartLine: number;
raEndLine: number;
}
/**
* Location information for a single predicate in the RA.
*/
export interface PredicateSymbol {
/**
* `PipelineInfo` for each iteration. A non-recursive predicate will have a single iteration `0`.
*/
iterations: Record<number, PipelineInfo>;
}
/**
* Location information for the RA from an evaluation log. Line numbers point into the
* human-readable log summary.
*/
export interface SummarySymbols {
predicates: Record<string, PredicateSymbol>;
}
// Tuple counts for Expr::Expr::getParent#dispred#f0820431#ff@76d6745o:
const NON_RECURSIVE_TUPLE_COUNT_REGEXP = /^Tuple counts for (?<predicateName>\S+):$/;
// Tuple counts for Expr::Expr::getEnclosingStmt#f0820431#bf@923ddwj9 on iteration 0 running pipeline base:
const RECURSIVE_TUPLE_COUNT_REGEXP = /^Tuple counts for (?<predicateName>\S+) on iteration (?<iteration>\d+) /;
const RETURN_REGEXP = /^\s*return /;
/**
* Parse a human-readable evaluation log summary to find the location of the RA for each pipeline
* run.
*
* TODO: Once we're more certain about the symbol format, we should have the CLI generate this as it
* generates the human-readabe summary to avoid having to rely on regular expression matching of the
* human-readable text.
*
* @param fileLocation The path to the summary file.
* @returns Symbol information for the summary file.
*/
export async function generateSummarySymbols(fileLocation: string): Promise<SummarySymbols> {
const summary = await fs.promises.readFile(fileLocation, { encoding: 'utf-8' });
const symbols: SummarySymbols = {
predicates: {}
};
const lines = summary.split(/\r?\n/);
var lineNumber = 0;
while (lineNumber < lines.length) {
const startLineNumber = lineNumber;
lineNumber++;
const startLine = lines[startLineNumber];
const nonRecursiveMatch = startLine.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
var predicateName: string | undefined = undefined;
var iteration: number = 0;
if (nonRecursiveMatch) {
predicateName = nonRecursiveMatch.groups!.predicateName;
} else {
const recursiveMatch = startLine.match(RECURSIVE_TUPLE_COUNT_REGEXP);
if (recursiveMatch) {
predicateName = recursiveMatch.groups!.predicateName;
iteration = parseInt(recursiveMatch.groups!.iteration);
}
}
if (predicateName !== undefined) {
const raStartLine = lineNumber;
var raEndLine: number | undefined = undefined;
while ((lineNumber < lines.length) && (raEndLine === undefined)) {
const raLine = lines[lineNumber];
const returnMatch = raLine.match(RETURN_REGEXP);
if (returnMatch) {
raEndLine = lineNumber;
}
lineNumber++;
}
if (raEndLine === undefined) {
raEndLine = lineNumber - 1;
}
var symbol = symbols.predicates[predicateName];
if (symbol === undefined) {
symbol = {
iterations: {}
};
symbols.predicates[predicateName] = symbol;
}
symbol.iterations[iteration] = {
startLine: lineNumber,
raStartLine: raStartLine,
raEndLine: raEndLine
};
}
}
return symbols;
}

View File

@@ -1,11 +1,13 @@
import * as path from 'path';
import {
commands,
Diagnostic,
Disposable,
env,
Event,
EventEmitter,
ExtensionContext,
languages,
ProviderResult,
Range,
ThemeIcon,
@@ -16,6 +18,7 @@ import {
window,
workspace,
} from 'vscode';
import * as JsonlParser from 'stream-json/jsonl/Parser';
import { QueryHistoryConfig } from './config';
import {
showAndLogErrorMessage,
@@ -39,6 +42,9 @@ import { CliVersionConstraint } from './cli';
import { HistoryItemLabelProvider } from './history-item-label-provider';
import { Credentials } from './authentication';
import { cancelRemoteQuery } from './remote-queries/gh-actions-api-client';
import { PipelineInfo, SummarySymbols } from './log-insights/summary-parser';
import { DiagnosticSeverity } from 'vscode-languageclient';
import { EvaluationLogProblemReporter, EvaluationLogScannerProvider } from './log-insights/log-scanner';
/**
* query-history.ts
@@ -297,6 +303,41 @@ export class HistoryTreeDataProvider extends DisposableObject {
}
}
/**
* Compute the key used to find a predicate in the summary symbols.
* @param name The name of the predicate.
* @param raHash The RA hash of the predicate.
* @returns The key of the predicate, consisting of `name@shortHash`, where `shortHash` is the first
* eight characters of `raHash`.
*/
function predicateSymbolKey(name: string, raHash: string): string {
return `${name}@${raHash.substring(0, 8)}`;
}
/**
* Implementation of `EvaluationLogProblemReporter` that generates `Diagnostic` objects to display
* in the VS Code "Problems" view.
*/
class ProblemReporter implements EvaluationLogProblemReporter {
public readonly diagnostics: Diagnostic[] = [];
constructor(private readonly symbols: SummarySymbols | undefined) {
}
public reportProblem(predicateName: string, raHash: string, iteration: number, message: string): void {
const nameWithHash = predicateSymbolKey(predicateName, raHash);
const predicateSymbol = this.symbols?.predicates[nameWithHash];
var predicateInfo: PipelineInfo | undefined = undefined;
if (predicateSymbol !== undefined) {
predicateInfo = predicateSymbol.iterations[iteration];
}
if (predicateInfo !== undefined) {
const range = new Range(predicateInfo.raStartLine, 0, predicateInfo.raEndLine + 1, 0);
this.diagnostics.push(new Diagnostic(range, message, DiagnosticSeverity.Error));
}
}
}
export class QueryHistoryManager extends DisposableObject {
treeDataProvider: HistoryTreeDataProvider;
@@ -304,6 +345,7 @@ export class QueryHistoryManager extends DisposableObject {
lastItemClick: { time: Date; item: QueryHistoryInfo } | undefined;
compareWithItem: LocalQueryInfo | undefined;
queryHistoryScrubber: Disposable | undefined;
private readonly diagnosticCollection = this.push(languages.createDiagnosticCollection('ql-eval-log'));
private queryMetadataStorageLocation;
private readonly _onDidAddQueryItem = super.push(new EventEmitter<QueryHistoryInfo>());
@@ -318,6 +360,9 @@ export class QueryHistoryManager extends DisposableObject {
readonly onWillOpenQueryItem: Event<QueryHistoryInfo> = this
._onWillOpenQueryItem.event;
private readonly scannerProviders = new Map<number, EvaluationLogScannerProvider>();
private nextScannerProviderId = 0;
constructor(
private readonly qs: QueryServerClient,
private readonly dbm: DatabaseManager,
@@ -834,6 +879,24 @@ export class QueryHistoryManager extends DisposableObject {
}
}
/**
* Scan the evaluation log for a query, and report any diagnostics.
*
* @param query The query whose log is to be scanned.
*/
public async scanEvalLog(
query: LocalQueryInfo
): Promise<void> {
this.diagnosticCollection.clear();
if (query.evalLogJsonSummaryLocation) {
const diagnostics = await this.scanLog(query.evalLogJsonSummaryLocation, query.evalLogSummarySymbolsLocation);
const uri = Uri.file(query.evalLogSummaryLocation!);
this.diagnosticCollection.set(uri, diagnostics);
} else {
this.warnNoEvalLog();
}
}
async handleCancel(
singleItem: QueryHistoryInfo,
multiSelect: QueryHistoryInfo[]
@@ -993,6 +1056,59 @@ export class QueryHistoryManager extends DisposableObject {
this._onDidAddQueryItem.fire(item);
}
/**
* Register a provider that can create instances of `EvaluationLogScanner` to scan evaluation logs
* for problems.
* @param provider The provider.
* @returns A `Disposable` that, when disposed, will unregister the provider.
*/
registerLogScannerProvider(provider: EvaluationLogScannerProvider): Disposable {
const id = this.nextScannerProviderId;
this.nextScannerProviderId++;
this.scannerProviders.set(id, provider);
const manager = this;
return {
dispose(): void {
manager.scannerProviders.delete(id);
}
};
}
/**
* Scan the evaluator summary log for problems, using the scanners for all registered providers.
* @param jsonSummaryLocation The file path of the JSON summary log.
* @param symbolsLocation The file path of the symbols file for the human-readable log summary.
* @returns An array of `Diagnostic`s representing the problems found by scanners.
*/
private async scanLog(jsonSummaryLocation: string, symbolsLocation: string | undefined): Promise<Diagnostic[]> {
var symbols: SummarySymbols | undefined = undefined;
if (symbolsLocation !== undefined) {
symbols = JSON.parse(await fs.readFile(symbolsLocation, { encoding: 'utf-8' }));
}
const problemReporter = new ProblemReporter(symbols);
const scanners = [...this.scannerProviders.values()].map(p => p.createScanner(problemReporter));
const stream = fs.createReadStream(jsonSummaryLocation)
.pipe(JsonlParser.parser())
.on('data', ({ value }) => {
scanners.forEach(scanner => {
scanner.onEvent(value);
});
});
await new Promise(function(resolve, reject) {
stream.on('end', resolve);
stream.on('error', reject);
});
scanners.forEach(scanner => scanner.onDone());
return problemReporter.diagnostics;
}
/**
* Update the tree view selection if the tree view is visible.
*

View File

@@ -217,6 +217,8 @@ export class LocalQueryInfo {
public completedQuery: CompletedQueryInfo | undefined;
public evalLogLocation: string | undefined;
public evalLogSummaryLocation: string | undefined;
public evalLogSummarySymbolsLocation: string | undefined;
public evalLogJsonSummaryLocation: string | undefined;
/**
* Note that in the {@link slurpQueryHistory} method, we create a FullQueryInfo instance

View File

@@ -267,6 +267,14 @@ export function findQueryEvalLogSummaryFile(resultPath: string): string {
return path.join(resultPath, 'evaluator-log.summary');
}
export function findQueryEvalLogSummarySymbolsFile(resultPath: string): string {
return path.join(resultPath, 'evaluator-log.summary.symbols.json');
}
export function findQueryEvalLogEndSummaryFile(resultPath: string): string {
return path.join(resultPath, 'evaluator-log-end.summary');
}
}
export function findQueryEvalJsonLogSummaryFile(resultPath: string): string {
return path.join(resultPath, 'evaluator-log.summary.jsonl');
}

View File

@@ -37,6 +37,7 @@ import { ensureMetadataIsComplete } from './query-results';
import { SELECT_QUERY_NAME } from './contextual/locationFinder';
import { DecodedBqrsChunk } from './pure/bqrs-cli-types';
import { getErrorMessage } from './pure/helpers-pure';
import { generateSummarySymbols } from './log-insights/summary-parser';
/**
* run-queries.ts
@@ -103,10 +104,18 @@ export class QueryEvaluationInfo {
return qsClient.findQueryEvalLogSummaryFile(this.querySaveDir);
}
get evalLogSummarySymbolsPath() {
return qsClient.findQueryEvalLogSummarySymbolsFile(this.querySaveDir);
}
get evalLogEndSummaryPath() {
return qsClient.findQueryEvalLogEndSummaryFile(this.querySaveDir);
}
get evalLogJsonSummaryPath() {
return qsClient.findQueryEvalJsonLogSummaryFile(this.querySaveDir);
}
get resultsPaths() {
return {
resultsPath: path.join(this.querySaveDir, 'results.bqrs'),
@@ -174,7 +183,7 @@ export class QueryEvaluationInfo {
db: dataset,
logPath: this.evalLogPath,
});
}
const params: messages.EvaluateQueriesParams = {
db: dataset,
@@ -208,6 +217,16 @@ export class QueryEvaluationInfo {
void qs.logger.log(' --- Evaluator Log Summary --- ');
void qs.logger.log(buffer.toString());
});
// Create the symbol table for the summary file, so we know where each predicate and iteration
// is located. We use this info for jumping to the RA for a specific predicate and iteration.
// TODO: Move this into the CLI once we're more sure of the format.
const symbols = await generateSummarySymbols(this.evalLogSummaryPath);
await fs.writeFile(this.evalLogSummarySymbolsPath, JSON.stringify(symbols));
queryInfo.evalLogSummarySymbolsLocation = this.evalLogSummarySymbolsPath;
await qs.cliServer.generateJsonLogSummary(this.evalLogPath, this.evalLogJsonSummaryPath);
queryInfo.evalLogJsonSummaryLocation = this.evalLogJsonSummaryPath;
} else {
void showAndLogWarningMessage(`Failed to write structured evaluator log to ${this.evalLogPath}.`);
}