Merge pull request #2457 from github/koesie10/auto-model-usages-sarif

Retrieve external API usage snippets using SARIF
This commit is contained in:
Koen Vlaswinkel
2023-06-01 11:28:42 +02:00
committed by GitHub
14 changed files with 325 additions and 72 deletions

View File

@@ -1073,6 +1073,7 @@ export class CodeQLCliServer implements Disposable {
resultsPath: string,
interpretedResultsPath: string,
sourceInfo?: SourceInfo,
args?: string[],
): Promise<sarif.Log> {
const additionalArgs = [
// TODO: This flag means that we don't group interpreted results
@@ -1080,6 +1081,7 @@ export class CodeQLCliServer implements Disposable {
// interpretation with and without this flag, or do some
// grouping client-side.
"--no-group-results",
...(args ?? []),
];
await this.runInterpretCommand(

View File

@@ -0,0 +1,136 @@
import { CancellationTokenSource } from "vscode";
import { join } from "path";
import { runQuery } from "./external-api-usage-query";
import { CodeQLCliServer } from "../codeql-cli/cli";
import { QueryRunner } from "../query-server";
import { DatabaseItem } from "../databases/local-databases";
import { interpretResultsSarif } from "../query-results";
import { ProgressCallback } from "../common/vscode/progress";
type Options = {
cliServer: CodeQLCliServer;
queryRunner: QueryRunner;
databaseItem: DatabaseItem;
queryStorageDir: string;
progress: ProgressCallback;
};
export type UsageSnippetsBySignature = Record<string, string[]>;
export async function getAutoModelUsages({
cliServer,
queryRunner,
databaseItem,
queryStorageDir,
progress,
}: Options): Promise<UsageSnippetsBySignature> {
const maxStep = 1500;
const cancellationTokenSource = new CancellationTokenSource();
// This will re-run the query that was already run when opening the data extensions editor. This
// might be unnecessary, but this makes it really easy to get the path to the BQRS file which we
// need to interpret the results.
const queryResult = await runQuery({
cliServer,
queryRunner,
queryStorageDir,
databaseItem,
progress: (update) =>
progress({
maxStep,
step: update.step,
message: update.message,
}),
token: cancellationTokenSource.token,
});
if (!queryResult) {
throw new Error("Query failed");
}
progress({
maxStep,
step: 1100,
message: "Retrieving source location prefix",
});
// CodeQL needs to have access to the database to be able to retrieve the
// snippets from it. The source location prefix is used to determine the
// base path of the database.
const sourceLocationPrefix = await databaseItem.getSourceLocationPrefix(
cliServer,
);
const sourceArchiveUri = databaseItem.sourceArchive;
const sourceInfo =
sourceArchiveUri === undefined
? undefined
: {
sourceArchive: sourceArchiveUri.fsPath,
sourceLocationPrefix,
};
progress({
maxStep,
step: 1200,
message: "Interpreting results",
});
// Convert the results to SARIF so that Codeql will retrieve the snippets
// from the datababe. This means we don't need to do that in the extension
// and everything is handled by the CodeQL CLI.
const sarif = await interpretResultsSarif(
cliServer,
{
// To interpret the results we need to provide metadata about the query. We could do this using
// `resolveMetadata` but that would be an extra call to the CodeQL CLI server and would require
// us to know the path to the query on the filesystem. Since we know what the metadata should
// look like and the only metadata that the CodeQL CLI requires is an ID and the kind, we can
// simply use constants here.
kind: "problem",
id: "usage",
},
{
resultsPath: queryResult.outputDir.bqrsPath,
interpretedResultsPath: join(
queryStorageDir,
"interpreted-results.sarif",
),
},
sourceInfo,
["--sarif-add-snippets"],
);
progress({
maxStep,
step: 1400,
message: "Parsing results",
});
const snippets: UsageSnippetsBySignature = {};
const results = sarif.runs[0]?.results;
if (!results) {
throw new Error("No results");
}
// This will group the snippets by the method signature.
for (const result of results) {
const signature = result.message.text;
const snippet =
result.locations?.[0]?.physicalLocation?.contextRegion?.snippet?.text;
if (!signature || !snippet) {
continue;
}
if (!(signature in snippets)) {
snippets[signature] = [];
}
snippets[signature].push(snippet);
}
return snippets;
}

View File

@@ -6,11 +6,13 @@ import {
Method,
ModelRequest,
} from "./auto-model-api";
import type { UsageSnippetsBySignature } from "./auto-model-usages-query";
export function createAutoModelRequest(
language: string,
externalApiUsages: ExternalApiUsage[],
modeledMethods: Record<string, ModeledMethod>,
usages: UsageSnippetsBySignature,
): ModelRequest {
const request: ModelRequest = {
language,
@@ -29,6 +31,10 @@ export function createAutoModelRequest(
type: "none",
};
const usagesForMethod =
usages[externalApiUsage.signature] ??
externalApiUsage.usages.map((usage) => usage.label);
const numberOfArguments =
externalApiUsage.methodParameters === "()"
? 0
@@ -48,9 +54,7 @@ export function createAutoModelRequest(
modeledMethod.type === "none"
? undefined
: toMethodClassification(modeledMethod),
usages: externalApiUsage.usages
.slice(0, 10)
.map((usage) => usage.label),
usages: usagesForMethod.slice(0, 10),
input: `Argument[${argumentIndex}]`,
};

View File

@@ -7,9 +7,9 @@ export function decodeBqrsToExternalApiUsages(
const methodsByApiName = new Map<string, ExternalApiUsage>();
chunk?.tuples.forEach((tuple) => {
const signature = tuple[0] as string;
const supported = tuple[1] as boolean;
const usage = tuple[2] as Call;
const usage = tuple[0] as Call;
const signature = tuple[1] as string;
const supported = (tuple[2] as string) === "true";
const [packageWithType, methodDeclaration] = signature.split("#");

View File

@@ -45,6 +45,7 @@ import {
parsePredictedClassifications,
} from "./auto-model";
import { showLlmGeneration } from "../config";
import { getAutoModelUsages } from "./auto-model-usages-query";
function getQlSubmoduleFolder(): WorkspaceFolder | undefined {
const workspaceFolder = workspace.workspaceFolders?.find(
@@ -385,23 +386,66 @@ export class DataExtensionsEditorView extends AbstractWebview<
externalApiUsages: ExternalApiUsage[],
modeledMethods: Record<string, ModeledMethod>,
): Promise<void> {
const maxStep = 3000;
await this.showProgress({
step: 0,
maxStep,
message: "Retrieving usages",
});
const usages = await getAutoModelUsages({
cliServer: this.cliServer,
queryRunner: this.queryRunner,
queryStorageDir: this.queryStorageDir,
databaseItem: this.databaseItem,
progress: (update) => this.showProgress(update, maxStep),
});
await this.showProgress({
step: 1800,
maxStep,
message: "Creating request",
});
const request = createAutoModelRequest(
this.databaseItem.language,
externalApiUsages,
modeledMethods,
usages,
);
await this.showProgress({
step: 2000,
maxStep,
message: "Sending request",
});
const response = await autoModel(this.app.credentials, request);
await this.showProgress({
step: 2500,
maxStep,
message: "Parsing response",
});
const predictedModeledMethods = parsePredictedClassifications(
response.predicted,
);
await this.showProgress({
step: 2800,
maxStep,
message: "Applying results",
});
await this.postMessage({
t: "addModeledMethods",
modeledMethods: predictedModeledMethods,
overrideNone: true,
});
await this.clearProgress();
}
/*

View File

@@ -5,30 +5,28 @@ export const fetchExternalApisQuery: Query = {
* @name Usage of APIs coming from external libraries
* @description A list of 3rd party APIs used in the codebase.
* @tags telemetry
* @kind problem
* @id cs/telemetry/fetch-external-apis
*/
import csharp
import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
import ExternalApi
private Call aUsage(ExternalApi api) {
result.getTarget().getUnboundDeclaration() = api
}
private boolean isSupported(ExternalApi api) {
api.isSupported() and result = true
or
not api.isSupported() and
result = false
}
from ExternalApi api, string apiName, boolean supported, Call usage
where
apiName = api.getApiName() and
supported = isSupported(api) and
usage = aUsage(api)
select apiName, supported, usage
import csharp
import ExternalApi
private Call aUsage(ExternalApi api) { result.getTarget().getUnboundDeclaration() = api }
private boolean isSupported(ExternalApi api) {
api.isSupported() and result = true
or
not api.isSupported() and
result = false
}
from ExternalApi api, string apiName, boolean supported, Call usage
where
apiName = api.getApiName() and
supported = isSupported(api) and
usage = aUsage(api)
select usage, apiName, supported.toString(), "supported"
`,
dependencies: {
"ExternalApi.qll": `/** Provides classes and predicates related to handling APIs from external libraries. */

View File

@@ -5,11 +5,11 @@ export const fetchExternalApisQuery: Query = {
* @name Usage of APIs coming from external libraries
* @description A list of 3rd party APIs used in the codebase. Excludes test and generated code.
* @tags telemetry
* @kind problem
* @id java/telemetry/fetch-external-apis
*/
import java
import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
import ExternalApi
private Call aUsage(ExternalApi api) {
@@ -28,7 +28,7 @@ where
apiName = api.getApiName() and
supported = isSupported(api) and
usage = aUsage(api)
select apiName, supported, usage
select usage, apiName, supported.toString(), "supported"
`,
dependencies: {
"ExternalApi.qll": `/** Provides classes and predicates related to handling APIs from external libraries. */

View File

@@ -1,4 +1,13 @@
export type Query = {
/**
* The main query.
*
* It should select all usages of external APIs, and return the following result pattern:
* - usage: the usage of the external API. This is an entity.
* - apiName: the name of the external API. This is a string.
* - supported: whether the external API is supported by the extension. This should be a string representation of a boolean to satify the result pattern for a problem query.
* - "supported": a string literal. This is required to make the query a valid problem query.
*/
mainQuery: string;
dependencies?: {
[filename: string]: string;

View File

@@ -115,7 +115,7 @@ export type BqrsKind =
| "Entity";
interface BqrsColumn {
name: string;
name?: string;
kind: BqrsKind;
}
export interface DecodedBqrsChunk {

View File

@@ -139,6 +139,7 @@ export async function interpretResultsSarif(
metadata: QueryMetadata | undefined,
resultsPaths: ResultsPaths,
sourceInfo?: cli.SourceInfo,
args?: string[],
): Promise<SarifInterpretationData> {
const { resultsPath, interpretedResultsPath } = resultsPaths;
let res;
@@ -150,6 +151,7 @@ export async function interpretResultsSarif(
resultsPath,
interpretedResultsPath,
sourceInfo,
args,
);
}
return { ...res, t: "SarifInterpretationData" };

View File

@@ -200,9 +200,36 @@ describe("createAutoModelRequest", () => {
},
};
const usages: Record<string, string[]> = {
"org.springframework.boot.SpringApplication#run(Class,String[])": [
"public class Sql2oExampleApplication {\n public static void main(String[] args) {\n SpringApplication.run(Sql2oExampleApplication.class, args);\n }\n}",
],
"org.sql2o.Connection#createQuery(String)": [
' public String index(@RequestParam("id") String id) {\n try (var con = sql2o.open()) {\n con.createQuery("select 1 where id = " + id).executeScalar(Integer.class);\n }\n\n',
'\n try (var con = sql2o.open()) {\n con.createQuery("select 1").executeScalar(Integer.class);\n }\n\n',
],
"org.sql2o.Query#executeScalar(Class)": [
' public String index(@RequestParam("id") String id) {\n try (var con = sql2o.open()) {\n con.createQuery("select 1 where id = " + id).executeScalar(Integer.class);\n }\n\n',
'\n try (var con = sql2o.open()) {\n con.createQuery("select 1").executeScalar(Integer.class);\n }\n\n',
],
"org.sql2o.Sql2o#open()": [
' @GetMapping("/")\n public String index(@RequestParam("id") String id) {\n try (var con = sql2o.open()) {\n con.createQuery("select 1 where id = " + id).executeScalar(Integer.class);\n }\n',
' Sql2o sql2o = new Sql2o(url);\n\n try (var con = sql2o.open()) {\n con.createQuery("select 1").executeScalar(Integer.class);\n }\n',
],
"java.io.PrintStream#println(String)": [
' }\n\n System.out.println("Connected to " + url);\n\n return "Greetings from Spring Boot!";\n',
],
"org.sql2o.Sql2o#Sql2o(String,String,String)": [
'@RestController\npublic class HelloController {\n private final Sql2o sql2o = new Sql2o("jdbc:h2:mem:test;DB_CLOSE_DELAY=-1","sa", "");\n\n @GetMapping("/")\n',
],
"org.sql2o.Sql2o#Sql2o(String)": [
' @GetMapping("/connect")\n public String connect(@RequestParam("url") String url) {\n Sql2o sql2o = new Sql2o(url);\n\n try (var con = sql2o.open()) {\n',
],
};
it("creates a matching request", () => {
expect(
createAutoModelRequest("java", externalApiUsages, modeledMethods),
createAutoModelRequest("java", externalApiUsages, modeledMethods, usages),
).toEqual({
language: "java",
samples: [
@@ -216,7 +243,7 @@ describe("createAutoModelRequest", () => {
kind: "jndi-injection",
explanation: "",
},
usages: ["new Sql2o(...)"],
usages: usages["org.sql2o.Sql2o#Sql2o(String)"],
input: "Argument[0]",
},
],
@@ -226,64 +253,78 @@ describe("createAutoModelRequest", () => {
type: "Connection",
name: "createQuery",
signature: "(String)",
usages: ["createQuery(...)", "createQuery(...)"],
usages: usages["org.sql2o.Connection#createQuery(String)"],
input: "Argument[0]",
classification: undefined,
},
{
package: "org.sql2o",
type: "Query",
name: "executeScalar",
signature: "(Class)",
usages: ["executeScalar(...)", "executeScalar(...)"],
usages: usages["org.sql2o.Query#executeScalar(Class)"],
input: "Argument[0]",
classification: undefined,
},
{
package: "org.springframework.boot",
type: "SpringApplication",
name: "run",
signature: "(Class,String[])",
usages: ["run(...)"],
usages:
usages[
"org.springframework.boot.SpringApplication#run(Class,String[])"
],
input: "Argument[0]",
classification: undefined,
},
{
package: "org.springframework.boot",
type: "SpringApplication",
name: "run",
signature: "(Class,String[])",
usages: ["run(...)"],
usages:
usages[
"org.springframework.boot.SpringApplication#run(Class,String[])"
],
input: "Argument[1]",
classification: undefined,
},
{
package: "java.io",
type: "PrintStream",
name: "println",
signature: "(String)",
usages: ["println(...)"],
usages: usages["java.io.PrintStream#println(String)"],
input: "Argument[0]",
classification: undefined,
},
{
package: "org.sql2o",
type: "Sql2o",
name: "Sql2o",
signature: "(String,String,String)",
usages: ["new Sql2o(...)"],
usages: usages["org.sql2o.Sql2o#Sql2o(String,String,String)"],
input: "Argument[0]",
classification: undefined,
},
{
package: "org.sql2o",
type: "Sql2o",
name: "Sql2o",
signature: "(String,String,String)",
usages: ["new Sql2o(...)"],
usages: usages["org.sql2o.Sql2o#Sql2o(String,String,String)"],
input: "Argument[1]",
classification: undefined,
},
{
package: "org.sql2o",
type: "Sql2o",
name: "Sql2o",
signature: "(String,String,String)",
usages: ["new Sql2o(...)"],
usages: usages["org.sql2o.Sql2o#Sql2o(String,String,String)"],
input: "Argument[2]",
classification: undefined,
},
],
});

View File

@@ -4,14 +4,13 @@ import { DecodedBqrsChunk } from "../../../src/pure/bqrs-cli-types";
describe("decodeBqrsToExternalApiUsages", () => {
const chunk: DecodedBqrsChunk = {
columns: [
{ name: "apiName", kind: "String" },
{ name: "supported", kind: "Boolean" },
{ name: "usage", kind: "Entity" },
{ name: "apiName", kind: "String" },
{ kind: "String" },
{ kind: "String" },
],
tuples: [
[
"java.io.PrintStream#println(String)",
true,
{
label: "println(...)",
url: {
@@ -22,10 +21,11 @@ describe("decodeBqrsToExternalApiUsages", () => {
endColumn: 49,
},
},
"java.io.PrintStream#println(String)",
"true",
"supported",
],
[
"org.springframework.boot.SpringApplication#run(Class,String[])",
false,
{
label: "run(...)",
url: {
@@ -36,10 +36,11 @@ describe("decodeBqrsToExternalApiUsages", () => {
endColumn: 66,
},
},
"org.springframework.boot.SpringApplication#run(Class,String[])",
"false",
"supported",
],
[
"org.sql2o.Connection#createQuery(String)",
true,
{
label: "createQuery(...)",
url: {
@@ -50,10 +51,11 @@ describe("decodeBqrsToExternalApiUsages", () => {
endColumn: 56,
},
},
"org.sql2o.Connection#createQuery(String)",
"true",
"supported",
],
[
"org.sql2o.Connection#createQuery(String)",
true,
{
label: "createQuery(...)",
url: {
@@ -64,10 +66,11 @@ describe("decodeBqrsToExternalApiUsages", () => {
endColumn: 39,
},
},
"org.sql2o.Connection#createQuery(String)",
"true",
"supported",
],
[
"org.sql2o.Query#executeScalar(Class)",
true,
{
label: "executeScalar(...)",
url: {
@@ -78,10 +81,11 @@ describe("decodeBqrsToExternalApiUsages", () => {
endColumn: 85,
},
},
"org.sql2o.Query#executeScalar(Class)",
"true",
"supported",
],
[
"org.sql2o.Query#executeScalar(Class)",
true,
{
label: "executeScalar(...)",
url: {
@@ -92,10 +96,11 @@ describe("decodeBqrsToExternalApiUsages", () => {
endColumn: 68,
},
},
"org.sql2o.Query#executeScalar(Class)",
"true",
"supported",
],
[
"org.sql2o.Sql2o#open()",
true,
{
label: "open(...)",
url: {
@@ -106,10 +111,11 @@ describe("decodeBqrsToExternalApiUsages", () => {
endColumn: 35,
},
},
"org.sql2o.Sql2o#open()",
"true",
"supported",
],
[
"org.sql2o.Sql2o#open()",
true,
{
label: "open(...)",
url: {
@@ -120,10 +126,11 @@ describe("decodeBqrsToExternalApiUsages", () => {
endColumn: 35,
},
},
"org.sql2o.Sql2o#open()",
"true",
"supported",
],
[
"org.sql2o.Sql2o#Sql2o(String,String,String)",
true,
{
label: "new Sql2o(...)",
url: {
@@ -134,10 +141,11 @@ describe("decodeBqrsToExternalApiUsages", () => {
endColumn: 88,
},
},
"org.sql2o.Sql2o#Sql2o(String,String,String)",
"true",
"supported",
],
[
"org.sql2o.Sql2o#Sql2o(String)",
true,
{
label: "new Sql2o(...)",
url: {
@@ -148,6 +156,9 @@ describe("decodeBqrsToExternalApiUsages", () => {
endColumn: 36,
},
},
"org.sql2o.Sql2o#Sql2o(String)",
"true",
"supported",
],
],
};

View File

@@ -10,7 +10,7 @@ import { QueryResultType } from "../../../../src/pure/new-messages";
import { readdir, readFile } from "fs-extra";
import { load } from "js-yaml";
import { dirname, join } from "path";
import { fetchExternalApiQueries } from "../../../../src/data-extensions-editor/queries/index";
import { fetchExternalApiQueries } from "../../../../src/data-extensions-editor/queries";
import * as helpers from "../../../../src/helpers";
import { RedactableError } from "../../../../src/pure/errors";
@@ -162,18 +162,20 @@ describe("readQueryResults", () => {
name: "#select",
rows: 10,
columns: [
{ name: "apiName", kind: "s" },
{ name: "supported", kind: "b" },
{ name: "usage", kind: "e" },
{ name: "apiName", kind: "s" },
{ kind: "s" },
{ kind: "s" },
],
},
{
name: "#select2",
rows: 10,
columns: [
{ name: "apiName", kind: "s" },
{ name: "supported", kind: "b" },
{ name: "usage", kind: "e" },
{ name: "apiName", kind: "s" },
{ kind: "s" },
{ kind: "s" },
],
},
],
@@ -192,9 +194,10 @@ describe("readQueryResults", () => {
name: "#select",
rows: 10,
columns: [
{ name: "apiName", kind: "s" },
{ name: "supported", kind: "b" },
{ name: "usage", kind: "e" },
{ name: "apiName", kind: "s" },
{ kind: "s" },
{ kind: "s" },
],
},
],
@@ -202,9 +205,10 @@ describe("readQueryResults", () => {
});
const decodedResultSet = {
columns: [
{ name: "apiName", kind: "String" },
{ name: "supported", kind: "Boolean" },
{ name: "usage", kind: "Entity" },
{ name: "usage", kind: "e" },
{ name: "apiName", kind: "s" },
{ kind: "s" },
{ kind: "s" },
],
tuples: [
[

View File

@@ -225,6 +225,7 @@ describe("query-results", () => {
resultsPath,
interpretedResultsPath,
sourceInfo,
undefined,
);
},
2 * 60 * 1000, // up to 2 minutes per test
@@ -249,6 +250,7 @@ describe("query-results", () => {
resultsPath,
interpretedResultsPath,
sourceInfo,
undefined,
);
},
2 * 60 * 1000, // up to 2 minutes per test