Retrieve external API usage snippets using SARIF

This commit is contained in:
Koen Vlaswinkel
2023-05-30 12:28:44 +02:00
parent 5a66d6ff2d
commit 5c81671e67
15 changed files with 333 additions and 58 deletions

View File

@@ -1050,6 +1050,7 @@ export class CodeQLCliServer implements Disposable {
resultsPath: string,
interpretedResultsPath: string,
sourceInfo?: SourceInfo,
args?: string[],
): Promise<sarif.Log> {
const additionalArgs = [
// TODO: This flag means that we don't group interpreted results
@@ -1057,6 +1058,7 @@ export class CodeQLCliServer implements Disposable {
// interpretation with and without this flag, or do some
// grouping client-side.
"--no-group-results",
...(args ?? []),
];
await this.runInterpretCommand(

View File

@@ -0,0 +1,129 @@
import { CancellationTokenSource } from "vscode";
import { join } from "path";
import { runQuery } from "./external-api-usage-query";
import { CodeQLCliServer } from "../codeql-cli/cli";
import { QueryRunner } from "../query-server";
import { DatabaseItem } from "../databases/local-databases";
import { interpretResultsSarif } from "../query-results";
import { ProgressCallback } from "../common/vscode/progress";
type Options = {
cliServer: Pick<
CodeQLCliServer,
"resolveDatabase" | "resolveQlpacks" | "interpretBqrsSarif"
>;
queryRunner: Pick<QueryRunner, "createQueryRun" | "logger">;
databaseItem: Pick<
DatabaseItem,
| "contents"
| "databaseUri"
| "language"
| "sourceArchive"
| "getSourceLocationPrefix"
>;
queryStorageDir: string;
progress: ProgressCallback;
};
export async function getAutoModelUsages({
cliServer,
queryRunner,
databaseItem,
queryStorageDir,
progress,
}: Options): Promise<Record<string, string[]>> {
const maxStep = 1500;
const cancellationTokenSource = new CancellationTokenSource();
const queryResult = await runQuery("usagesQuery", {
cliServer,
queryRunner,
queryStorageDir,
databaseItem,
progress: (update) =>
progress({
maxStep,
step: update.step,
message: update.message,
}),
token: cancellationTokenSource.token,
});
if (!queryResult) {
throw new Error("Query failed");
}
progress({
maxStep,
step: 1100,
message: "Retrieving source locatin prefix",
});
const sourceLocationPrefix = await databaseItem.getSourceLocationPrefix(
cliServer,
);
const sourceArchiveUri = databaseItem.sourceArchive;
const sourceInfo =
sourceArchiveUri === undefined
? undefined
: {
sourceArchive: sourceArchiveUri.fsPath,
sourceLocationPrefix,
};
progress({
maxStep,
step: 1200,
message: "Interpreting results",
});
const sarif = await interpretResultsSarif(
cliServer,
{
kind: "problem",
id: "usage",
},
{
resultsPath: queryResult.outputDir.bqrsPath,
interpretedResultsPath: join(
queryStorageDir,
"interpreted-results.sarif",
),
},
sourceInfo,
["--sarif-add-snippets"],
);
progress({
maxStep,
step: 1400,
message: "Parsing results",
});
const snippets: Record<string, string[]> = {};
const results = sarif.runs[0]?.results;
if (!results) {
throw new Error("No results");
}
for (const result of results) {
const signature = result.message.text;
const snippet =
result.locations?.[0]?.physicalLocation?.contextRegion?.snippet?.text;
if (!signature || !snippet) {
continue;
}
if (!(signature in snippets)) {
snippets[signature] = [];
}
snippets[signature].push(snippet);
}
return snippets;
}

View File

@@ -11,6 +11,7 @@ export function createAutoModelRequest(
language: string,
externalApiUsages: ExternalApiUsage[],
modeledMethods: Record<string, ModeledMethod>,
usages: Record<string, string[]>,
): ModelRequest {
const request: ModelRequest = {
language,
@@ -29,6 +30,10 @@ export function createAutoModelRequest(
type: "none",
};
const usagesForMethod =
usages[externalApiUsage.signature] ??
externalApiUsage.usages.map((usage) => usage.label);
const numberOfArguments =
externalApiUsage.methodParameters === "()"
? 0
@@ -48,9 +53,7 @@ export function createAutoModelRequest(
modeledMethod.type === "none"
? undefined
: toMethodClassification(modeledMethod),
usages: externalApiUsage.usages
.slice(0, 10)
.map((usage) => usage.label),
usages: usagesForMethod.slice(0, 10),
input: `Argument[${argumentIndex}]`,
};

View File

@@ -45,6 +45,7 @@ import {
parsePredictedClassifications,
} from "./auto-model";
import { showLlmGeneration } from "../config";
import { getAutoModelUsages } from "./auto-model-usages-query";
function getQlSubmoduleFolder(): WorkspaceFolder | undefined {
const workspaceFolder = workspace.workspaceFolders?.find(
@@ -242,7 +243,7 @@ export class DataExtensionsEditorView extends AbstractWebview<
const cancellationTokenSource = new CancellationTokenSource();
try {
const queryResult = await runQuery({
const queryResult = await runQuery("mainQuery", {
cliServer: this.cliServer,
queryRunner: this.queryRunner,
databaseItem: this.databaseItem,
@@ -385,23 +386,66 @@ export class DataExtensionsEditorView extends AbstractWebview<
externalApiUsages: ExternalApiUsage[],
modeledMethods: Record<string, ModeledMethod>,
): Promise<void> {
const maxStep = 3000;
await this.showProgress({
step: 0,
maxStep,
message: "Retrieving usages",
});
const usages = await getAutoModelUsages({
cliServer: this.cliServer,
queryRunner: this.queryRunner,
queryStorageDir: this.queryStorageDir,
databaseItem: this.databaseItem,
progress: (update) => this.showProgress(update, maxStep),
});
await this.showProgress({
step: 1800,
maxStep,
message: "Creating request",
});
const request = createAutoModelRequest(
this.databaseItem.language,
externalApiUsages,
modeledMethods,
usages,
);
await this.showProgress({
step: 2000,
maxStep,
message: "Sending request",
});
const response = await autoModel(this.app.credentials, request);
await this.showProgress({
step: 2500,
maxStep,
message: "Parsing response",
});
const predictedModeledMethods = parsePredictedClassifications(
response.predicted,
);
await this.showProgress({
step: 2800,
maxStep,
message: "Applying results",
});
await this.postMessage({
t: "addModeledMethods",
modeledMethods: predictedModeledMethods,
overrideNone: true,
});
await this.clearProgress();
}
/*

View File

@@ -16,6 +16,7 @@ import { QueryResultType } from "../pure/new-messages";
import { join } from "path";
import { redactableError } from "../pure/errors";
import { QueryLanguage } from "../common/query-language";
import { Query } from "./queries/query";
export type RunQueryOptions = {
cliServer: Pick<CodeQLCliServer, "resolveQlpacks">;
@@ -27,14 +28,17 @@ export type RunQueryOptions = {
token: CancellationToken;
};
export async function runQuery({
export async function runQuery(
queryName: keyof Omit<Query, "dependencies">,
{
cliServer,
queryRunner,
databaseItem,
queryStorageDir,
progress,
token,
}: RunQueryOptions): Promise<CoreCompletedQuery | undefined> {
}: RunQueryOptions,
): Promise<CoreCompletedQuery | undefined> {
// The below code is temporary to allow for rapid prototyping of the queries. Once the queries are stabilized, we will
// move these queries into the `github/codeql` repository and use them like any other contextual (e.g. AST) queries.
// This is intentionally not pretty code, as it will be removed soon.
@@ -51,7 +55,7 @@ export async function runQuery({
const queryDir = (await dir({ unsafeCleanup: true })).path;
const queryFile = join(queryDir, "FetchExternalApis.ql");
await writeFile(queryFile, query.mainQuery, "utf8");
await writeFile(queryFile, query[queryName], "utf8");
if (query.dependencies) {
for (const [filename, contents] of Object.entries(query.dependencies)) {

View File

@@ -9,7 +9,6 @@ export const fetchExternalApisQuery: Query = {
*/
import csharp
import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
import ExternalApi
private Call aUsage(ExternalApi api) {
@@ -29,6 +28,26 @@ export const fetchExternalApisQuery: Query = {
supported = isSupported(api) and
usage = aUsage(api)
select apiName, supported, usage
`,
usagesQuery: `/**
* @name Usage of APIs coming from external libraries
* @description A list of 3rd party APIs used in the codebase.
* @kind problem
* @id cs/telemetry/fetch-external-api-usages
*/
import csharp
import ExternalApi
private Call aUsage(ExternalApi api) {
result.getTarget().getUnboundDeclaration() = api
}
from ExternalApi api, string apiName, Call usage
where
apiName = api.getApiName() and
usage = aUsage(api)
select usage, apiName
`,
dependencies: {
"ExternalApi.qll": `/** Provides classes and predicates related to handling APIs from external libraries. */

View File

@@ -9,7 +9,6 @@ export const fetchExternalApisQuery: Query = {
*/
import java
import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
import ExternalApi
private Call aUsage(ExternalApi api) {
@@ -29,6 +28,27 @@ where
supported = isSupported(api) and
usage = aUsage(api)
select apiName, supported, usage
`,
usagesQuery: `/**
* @name Usage of APIs coming from external libraries
* @description A list of 3rd party APIs used in the codebase. Excludes test and generated code.
* @kind problem
* @id java/telemetry/fetch-external-api-usages
*/
import java
import ExternalApi
private Call aUsage(ExternalApi api) {
result.getCallee().getSourceDeclaration() = api and
not result.getFile() instanceof GeneratedFile
}
from ExternalApi api, string apiName, Call usage
where
apiName = api.getApiName() and
usage = aUsage(api)
select usage, apiName
`,
dependencies: {
"ExternalApi.qll": `/** Provides classes and predicates related to handling APIs from external libraries. */

View File

@@ -1,5 +1,6 @@
export type Query = {
mainQuery: string;
usagesQuery: string;
dependencies?: {
[filename: string]: string;
};

View File

@@ -121,7 +121,9 @@ export class DatabaseItemImpl implements DatabaseItem {
/**
* Returns information about a database.
*/
private async getDbInfo(server: cli.CodeQLCliServer): Promise<cli.DbInfo> {
private async getDbInfo(
server: Pick<cli.CodeQLCliServer, "resolveDatabase">,
): Promise<cli.DbInfo> {
if (this._dbinfo === undefined) {
this._dbinfo = await server.resolveDatabase(this.databaseUri.fsPath);
}
@@ -133,7 +135,7 @@ export class DatabaseItemImpl implements DatabaseItem {
* has a `.dbinfo` file, which is the source of the prefix.
*/
public async getSourceLocationPrefix(
server: cli.CodeQLCliServer,
server: Pick<cli.CodeQLCliServer, "resolveDatabase">,
): Promise<string> {
const dbInfo = await this.getDbInfo(server);
return dbInfo.sourceLocationPrefix;
@@ -142,7 +144,9 @@ export class DatabaseItemImpl implements DatabaseItem {
/**
* Returns path to dataset folder of database.
*/
public async getDatasetFolder(server: cli.CodeQLCliServer): Promise<string> {
public async getDatasetFolder(
server: Pick<cli.CodeQLCliServer, "resolveDatabase">,
): Promise<string> {
const dbInfo = await this.getDbInfo(server);
return dbInfo.datasetFolder;
}

View File

@@ -43,12 +43,16 @@ export interface DatabaseItem {
/**
* Returns `sourceLocationPrefix` of exported database.
*/
getSourceLocationPrefix(server: cli.CodeQLCliServer): Promise<string>;
getSourceLocationPrefix(
server: Pick<cli.CodeQLCliServer, "resolveDatabase">,
): Promise<string>;
/**
* Returns dataset folder of exported database.
*/
getDatasetFolder(server: cli.CodeQLCliServer): Promise<string>;
getDatasetFolder(
server: Pick<cli.CodeQLCliServer, "resolveDatabase">,
): Promise<string>;
/**
* Returns the root uri of the virtual filesystem for this database's source archive,

View File

@@ -786,7 +786,7 @@ export async function askForLanguage(
* @returns A promise that resolves to the query metadata, if available.
*/
export async function tryGetQueryMetadata(
cliServer: CodeQLCliServer,
cliServer: Pick<CodeQLCliServer, "resolveMetadata">,
queryPath: string,
): Promise<QueryMetadata | undefined> {
try {

View File

@@ -135,10 +135,11 @@ export class CompletedQueryInfo implements QueryWithResults {
* Call cli command to interpret SARIF results.
*/
export async function interpretResultsSarif(
cli: cli.CodeQLCliServer,
cli: Pick<cli.CodeQLCliServer, "interpretBqrsSarif">,
metadata: QueryMetadata | undefined,
resultsPaths: ResultsPaths,
sourceInfo?: cli.SourceInfo,
args?: string[],
): Promise<SarifInterpretationData> {
const { resultsPath, interpretedResultsPath } = resultsPaths;
let res;
@@ -150,6 +151,7 @@ export async function interpretResultsSarif(
resultsPath,
interpretedResultsPath,
sourceInfo,
args,
);
}
return { ...res, t: "SarifInterpretationData" };

View File

@@ -200,9 +200,36 @@ describe("createAutoModelRequest", () => {
},
};
const usages: Record<string, string[]> = {
"org.springframework.boot.SpringApplication#run(Class,String[])": [
"public class Sql2oExampleApplication {\n public static void main(String[] args) {\n SpringApplication.run(Sql2oExampleApplication.class, args);\n }\n}",
],
"org.sql2o.Connection#createQuery(String)": [
' public String index(@RequestParam("id") String id) {\n try (var con = sql2o.open()) {\n con.createQuery("select 1 where id = " + id).executeScalar(Integer.class);\n }\n\n',
'\n try (var con = sql2o.open()) {\n con.createQuery("select 1").executeScalar(Integer.class);\n }\n\n',
],
"org.sql2o.Query#executeScalar(Class)": [
' public String index(@RequestParam("id") String id) {\n try (var con = sql2o.open()) {\n con.createQuery("select 1 where id = " + id).executeScalar(Integer.class);\n }\n\n',
'\n try (var con = sql2o.open()) {\n con.createQuery("select 1").executeScalar(Integer.class);\n }\n\n',
],
"org.sql2o.Sql2o#open()": [
' @GetMapping("/")\n public String index(@RequestParam("id") String id) {\n try (var con = sql2o.open()) {\n con.createQuery("select 1 where id = " + id).executeScalar(Integer.class);\n }\n',
' Sql2o sql2o = new Sql2o(url);\n\n try (var con = sql2o.open()) {\n con.createQuery("select 1").executeScalar(Integer.class);\n }\n',
],
"java.io.PrintStream#println(String)": [
' }\n\n System.out.println("Connected to " + url);\n\n return "Greetings from Spring Boot!";\n',
],
"org.sql2o.Sql2o#Sql2o(String,String,String)": [
'@RestController\npublic class HelloController {\n private final Sql2o sql2o = new Sql2o("jdbc:h2:mem:test;DB_CLOSE_DELAY=-1","sa", "");\n\n @GetMapping("/")\n',
],
"org.sql2o.Sql2o#Sql2o(String)": [
' @GetMapping("/connect")\n public String connect(@RequestParam("url") String url) {\n Sql2o sql2o = new Sql2o(url);\n\n try (var con = sql2o.open()) {\n',
],
};
it("creates a matching request", () => {
expect(
createAutoModelRequest("java", externalApiUsages, modeledMethods),
createAutoModelRequest("java", externalApiUsages, modeledMethods, usages),
).toEqual({
language: "java",
samples: [
@@ -216,7 +243,7 @@ describe("createAutoModelRequest", () => {
kind: "jndi-injection",
explanation: "",
},
usages: ["new Sql2o(...)"],
usages: usages["org.sql2o.Sql2o#Sql2o(String)"],
input: "Argument[0]",
},
],
@@ -226,64 +253,78 @@ describe("createAutoModelRequest", () => {
type: "Connection",
name: "createQuery",
signature: "(String)",
usages: ["createQuery(...)", "createQuery(...)"],
usages: usages["org.sql2o.Connection#createQuery(String)"],
input: "Argument[0]",
classification: undefined,
},
{
package: "org.sql2o",
type: "Query",
name: "executeScalar",
signature: "(Class)",
usages: ["executeScalar(...)", "executeScalar(...)"],
usages: usages["org.sql2o.Query#executeScalar(Class)"],
input: "Argument[0]",
classification: undefined,
},
{
package: "org.springframework.boot",
type: "SpringApplication",
name: "run",
signature: "(Class,String[])",
usages: ["run(...)"],
usages:
usages[
"org.springframework.boot.SpringApplication#run(Class,String[])"
],
input: "Argument[0]",
classification: undefined,
},
{
package: "org.springframework.boot",
type: "SpringApplication",
name: "run",
signature: "(Class,String[])",
usages: ["run(...)"],
usages:
usages[
"org.springframework.boot.SpringApplication#run(Class,String[])"
],
input: "Argument[1]",
classification: undefined,
},
{
package: "java.io",
type: "PrintStream",
name: "println",
signature: "(String)",
usages: ["println(...)"],
usages: usages["java.io.PrintStream#println(String)"],
input: "Argument[0]",
classification: undefined,
},
{
package: "org.sql2o",
type: "Sql2o",
name: "Sql2o",
signature: "(String,String,String)",
usages: ["new Sql2o(...)"],
usages: usages["org.sql2o.Sql2o#Sql2o(String,String,String)"],
input: "Argument[0]",
classification: undefined,
},
{
package: "org.sql2o",
type: "Sql2o",
name: "Sql2o",
signature: "(String,String,String)",
usages: ["new Sql2o(...)"],
usages: usages["org.sql2o.Sql2o#Sql2o(String,String,String)"],
input: "Argument[1]",
classification: undefined,
},
{
package: "org.sql2o",
type: "Sql2o",
name: "Sql2o",
signature: "(String,String,String)",
usages: ["new Sql2o(...)"],
usages: usages["org.sql2o.Sql2o#Sql2o(String,String,String)"],
input: "Argument[2]",
classification: undefined,
},
],
});

View File

@@ -66,7 +66,7 @@ describe("runQuery", () => {
onCancellationRequested: jest.fn(),
},
};
const result = await runQuery(options);
const result = await runQuery("mainQuery", options);
expect(result?.resultType).toEqual(QueryResultType.SUCCESS);

View File

@@ -225,6 +225,7 @@ describe("query-results", () => {
resultsPath,
interpretedResultsPath,
sourceInfo,
undefined,
);
},
2 * 60 * 1000, // up to 2 minutes per test
@@ -249,6 +250,7 @@ describe("query-results", () => {
resultsPath,
interpretedResultsPath,
sourceInfo,
undefined,
);
},
2 * 60 * 1000, // up to 2 minutes per test