Use sarif parser for reopened results (#1457)

2022-10-24 12:31:35 -07:00
parent e891169ca3
commit 63a5021e5e
3 changed files with 232 additions and 68 deletions
--- a/extensions/ql-vscode/src/query-results.ts
+++ b/extensions/ql-vscode/src/query-results.ts
@@ -18,6 +18,7 @@ import { DatabaseInfo } from './pure/interface-types';
 import { QueryStatus } from './query-status';
 import { QueryEvaluationInfo, QueryWithResults } from './run-queries-shared';
 import { formatLegacyMessage } from './legacy-query-server/run-queries';
+import { sarifParser } from './sarif-parser';

 /**
 * query-results.ts
@@ -158,10 +159,12 @@ export async function interpretResultsSarif(
  sourceInfo?: cli.SourceInfo
 ): Promise<SarifInterpretationData> {
  const { resultsPath, interpretedResultsPath } = resultsPaths;
+  let res;
  if (await fs.pathExists(interpretedResultsPath)) {
-    return { ...JSON.parse(await fs.readFile(interpretedResultsPath, 'utf8')), t: 'SarifInterpretationData' };
+    res = await sarifParser(interpretedResultsPath);
+  } else {
+    res = await cli.interpretBqrsSarif(ensureMetadataIsComplete(metadata), resultsPath, interpretedResultsPath, sourceInfo);
  }
-  const res = await cli.interpretBqrsSarif(ensureMetadataIsComplete(metadata), resultsPath, interpretedResultsPath, sourceInfo);
  return { ...res, t: 'SarifInterpretationData' };
 }

--- a/extensions/ql-vscode/src/sarif-parser.ts
+++ b/extensions/ql-vscode/src/sarif-parser.ts
@@ -1,33 +1,34 @@
 import * as Sarif from 'sarif';
 import * as fs from 'fs-extra';
-import { parser } from 'stream-json';
-import { pick } from 'stream-json/filters/Pick';
-import Assembler = require('stream-json/Assembler');
-import { chain } from 'stream-chain';
+import { connectTo } from 'stream-json/Assembler';
 import { getErrorMessage } from './pure/helpers-pure';
+import { withParser } from 'stream-json/filters/Pick';

 const DUMMY_TOOL: Sarif.Tool = { driver: { name: '' } };

 export async function sarifParser(interpretedResultsPath: string): Promise<Sarif.Log> {
  try {
    // Parse the SARIF file into token streams, filtering out only the results array.
-    const p = parser();
-    const pipeline = chain([
-      fs.createReadStream(interpretedResultsPath),
-      p,
-      pick({ filter: 'runs.0.results' })
-    ]);
+    const pipeline = fs.createReadStream(interpretedResultsPath).pipe(withParser({ filter: 'runs.0.results' }));

    // Creates JavaScript objects from the token stream
-    const asm = Assembler.connectTo(pipeline);
+    const asm = connectTo(pipeline);

-    // Returns a constructed Log object with the results or an empty array if no results were found.
+    // Returns a constructed Log object with the results of an empty array if no results were found.
    // If the parser fails for any reason, it will reject the promise.
    return await new Promise((resolve, reject) => {
+      let alreadyDone = false;
      pipeline.on('error', (error) => {
        reject(error);
      });

+      // If the parser pipeline completes before the assembler, we've reached end of file and have not found any results.
+      pipeline.on('end', () => {
+        if (!alreadyDone) {
+          reject(new Error('Invalid SARIF file: expecting at least one run with result.'));
+        }
+      });
+
      asm.on('done', (asm) => {

        const log: Sarif.Log = {
@@ -41,6 +42,7 @@ export async function sarifParser(interpretedResultsPath: string): Promise<Sarif
        };

        resolve(log);
+        alreadyDone = true;
      });
    });
  } catch (e) {
--- a/extensions/ql-vscode/src/vscode-tests/no-workspace/query-results.test.ts
+++ b/extensions/ql-vscode/src/vscode-tests/no-workspace/query-results.test.ts
@@ -1,6 +1,7 @@
 import { expect } from 'chai';
 import * as path from 'path';
 import * as fs from 'fs-extra';
+import * as os from 'os';
 import * as sinon from 'sinon';
 import { LocalQueryInfo, InitialQueryInfo, interpretResultsSarif } from '../../query-results';
 import { QueryWithResults } from '../../run-queries-shared';
@@ -11,6 +12,7 @@ import { tmpDir } from '../../helpers';
 import { slurpQueryHistory, splatQueryHistory } from '../../query-serialization';
 import { formatLegacyMessage, QueryInProgress } from '../../legacy-query-server/run-queries';
 import { EvaluationResult, QueryResultType } from '../../pure/legacy-messages';
+import Sinon = require('sinon');

 describe('query-results', () => {
  let disposeSpy: sinon.SinonSpy;
@@ -155,68 +157,213 @@ describe('query-results', () => {
    });
  });

-  it('should interpretResultsSarif', async () => {
-    const spy = sandbox.mock();
-    spy.returns({ a: '1234' });
-    const mockServer = {
-      interpretBqrsSarif: spy
-    } as unknown as CodeQLCliServer;
-
-    const interpretedResultsPath = path.join(tmpDir.name, 'interpreted.json');
-    const resultsPath = '123';
-    const sourceInfo = {};
+  describe('interpretResultsSarif', () => {
+    let mockServer: CodeQLCliServer;
+    let spy: Sinon.SinonExpectation;
    const metadata = {
      kind: 'my-kind',
      id: 'my-id' as string | undefined,
      scored: undefined
    };
-    const results1 = await interpretResultsSarif(
-      mockServer,
-      metadata,
-      {
-        resultsPath, interpretedResultsPath
-      },
-      sourceInfo as SourceInfo
-    );
+    const resultsPath = '123';
+    const interpretedResultsPath = path.join(tmpDir.name, 'interpreted.json');
+    const sourceInfo = {};

-    expect(results1).to.deep.eq({ a: '1234', t: 'SarifInterpretationData' });
-    expect(spy).to.have.been.calledWith(
-      metadata,
-      resultsPath, interpretedResultsPath, sourceInfo
-    );
+    beforeEach(() => {
+      spy = sandbox.mock();
+      spy.returns({ a: '1234' });

-    // Try again, but with no id
-    spy.reset();
-    spy.returns({ a: '1234' });
-    delete metadata.id;
-    const results2 = await interpretResultsSarif(
-      mockServer,
-      metadata,
-      {
-        resultsPath, interpretedResultsPath
-      },
-      sourceInfo as SourceInfo
-    );
-    expect(results2).to.deep.eq({ a: '1234', t: 'SarifInterpretationData' });
-    expect(spy).to.have.been.calledWith(
-      { kind: 'my-kind', id: 'dummy-id', scored: undefined },
-      resultsPath, interpretedResultsPath, sourceInfo
-    );
+      mockServer = {
+        interpretBqrsSarif: spy
+      } as unknown as CodeQLCliServer;
+    });

-    // try a third time, but this time we get from file
-    spy.reset();
-    fs.writeFileSync(interpretedResultsPath, JSON.stringify({
-      a: 6
-    }), 'utf8');
-    const results3 = await interpretResultsSarif(
-      mockServer,
-      metadata,
-      {
-        resultsPath, interpretedResultsPath
-      },
-      sourceInfo as SourceInfo
-    );
-    expect(results3).to.deep.eq({ a: 6, t: 'SarifInterpretationData' });
+    afterEach(async () => {
+      sandbox.restore();
+      safeDel(interpretedResultsPath);
+    });
+
+    it('should interpretResultsSarif', async function() {
+      // up to 2 minutes per test
+      this.timeout(2 * 60 * 1000);
+
+      const results = await interpretResultsSarif(
+        mockServer,
+        metadata,
+        {
+          resultsPath, interpretedResultsPath
+        },
+        sourceInfo as SourceInfo
+      );
+
+      expect(results).to.deep.eq({ a: '1234', t: 'SarifInterpretationData' });
+      expect(spy).to.have.been.calledWith(
+        metadata,
+        resultsPath, interpretedResultsPath, sourceInfo
+      );
+    });
+
+    it('should interpretBqrsSarif without ID', async function() {
+      // up to 2 minutes per test
+      this.timeout(2 * 60 * 1000);
+
+      delete metadata.id;
+      const results = await interpretResultsSarif(
+        mockServer,
+        metadata,
+        {
+          resultsPath, interpretedResultsPath
+        },
+        sourceInfo as SourceInfo
+      );
+      expect(results).to.deep.eq({ a: '1234', t: 'SarifInterpretationData' });
+      expect(spy).to.have.been.calledWith(
+        { kind: 'my-kind', id: 'dummy-id', scored: undefined },
+        resultsPath, interpretedResultsPath, sourceInfo
+      );
+    });
+
+    it('should use sarifParser on a valid small SARIF file', async function() {
+      // up to 2 minutes per test
+      this.timeout(2 * 60 * 1000);
+
+      fs.writeFileSync(interpretedResultsPath, JSON.stringify({
+        runs: [{ results: [] }] // A run needs results to succeed.
+      }), 'utf8');
+      const results = await interpretResultsSarif(
+        mockServer,
+        metadata,
+        {
+          resultsPath, interpretedResultsPath
+        },
+        sourceInfo as SourceInfo
+      );
+      // We do not re-interpret if we are reading from a SARIF file. 
+      expect(spy).to.not.have.been.called;
+
+      expect(results).to.have.property('t', 'SarifInterpretationData');
+      expect(results).to.have.nested.property('runs[0].results');
+    });
+
+    it('should throw an error on an invalid small SARIF file', async function() {
+      // up to 2 minutes per test
+      this.timeout(2 * 60 * 1000);
+
+      fs.writeFileSync(interpretedResultsPath, JSON.stringify({
+        a: '6' // Invalid: no runs or results
+      }), 'utf8');
+
+      await expect(
+        interpretResultsSarif(
+          mockServer,
+          metadata,
+          {
+            resultsPath, interpretedResultsPath
+          },
+          sourceInfo as SourceInfo)
+      ).to.be.rejectedWith('Parsing output of interpretation failed: Invalid SARIF file: expecting at least one run with result.');
+
+      // We do not attempt to re-interpret if we are reading from a SARIF file. 
+      expect(spy).to.not.have.been.called;
+    });
+
+    it('should use sarifParser on a valid large SARIF file', async function() {
+      // up to 2 minutes per test
+      this.timeout(2 * 60 * 1000);
+
+      const validSarifStream = fs.createWriteStream(interpretedResultsPath, { flags: 'w' });
+
+      const finished = new Promise((res, rej) => {
+        validSarifStream.addListener('close', res);
+        validSarifStream.addListener('error', rej);
+      });
+
+      validSarifStream.write(JSON.stringify({
+        runs: [{ results: [] }] // A run needs results to succeed.
+      }), 'utf8');
+
+      validSarifStream.write('[', 'utf8');
+      const iterations = 1_000_000;
+      for (let i = 0; i < iterations; i++) {
+        validSarifStream.write(JSON.stringify({
+          a: '6'
+        }), 'utf8');
+        if (i < iterations - 1) {
+          validSarifStream.write(',');
+        }
+      }
+      validSarifStream.write(']', 'utf8');
+      validSarifStream.end();
+      await finished;
+
+      // We need to sleep to wait for MSFT Defender to scan the file 
+      // so that it can be read by our test.  
+      if (os.platform() === 'win32') {
+        await sleep(10_000);
+      }
+
+      const results = await interpretResultsSarif(
+        mockServer,
+        metadata,
+        {
+          resultsPath, interpretedResultsPath
+        },
+        sourceInfo as SourceInfo
+      );
+      // We do not re-interpret if we are reading from a SARIF file. 
+      expect(spy).to.not.have.been.called;
+
+      expect(results).to.have.property('t', 'SarifInterpretationData');
+      expect(results).to.have.nested.property('runs[0].results');
+    });
+
+    it('should throw an error on an invalid large SARIF file', async function() {
+      // up to 2 minutes per test
+      this.timeout(2 * 60 * 1000);
+
+      // There is a problem on Windows where the file at the prior path isn't able
+      // to be deleted or written to, so we rename the path for this last test.
+      const interpretedResultsPath = path.join(tmpDir.name, 'interpreted-invalid.json');
+      const invalidSarifStream = fs.createWriteStream(interpretedResultsPath, { flags: 'w' });
+
+      const finished = new Promise((res, rej) => {
+        invalidSarifStream.addListener('close', res);
+        invalidSarifStream.addListener('error', rej);
+      });
+
+      invalidSarifStream.write('[', 'utf8');
+      const iterations = 1_000_000;
+      for (let i = 0; i < iterations; i++) {
+        invalidSarifStream.write(JSON.stringify({
+          a: '6'
+        }), 'utf8');
+        if (i < iterations - 1) {
+          invalidSarifStream.write(',');
+        }
+      }
+      invalidSarifStream.write(']', 'utf8');
+      invalidSarifStream.end();
+      await finished;
+
+      // We need to sleep to wait for MSFT Defender to scan the file 
+      // so that it can be read by our test.  
+      if (os.platform() === 'win32') {
+        await sleep(10_000);
+      }
+
+      await expect(
+        interpretResultsSarif(
+          mockServer,
+          metadata,
+          {
+            resultsPath, interpretedResultsPath
+          },
+          sourceInfo as SourceInfo)
+      ).to.be.rejectedWith('Parsing output of interpretation failed: Invalid SARIF file: expecting at least one run with result.');
+
+      // We do not attempt to re-interpret if we are reading from a SARIF file. 
+      expect(spy).to.not.have.been.called;
+    });
  });

  describe('splat and slurp', () => {
@@ -300,6 +447,18 @@ describe('query-results', () => {
    });
  });

+  function safeDel(file: string) {
+    try {
+      fs.unlinkSync(file);
+    } catch (e) {
+      // ignore
+    }
+  }
+
+  async function sleep(ms: number) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+  }
+
  function createMockQueryWithResults(
    queryPath: string,
    didRunSuccessfully = true,