Merge pull request #2670 from github/starcke/apply-slice-filter
Use filtering queries to do batched AI querying
This commit is contained in:
@@ -17,6 +17,10 @@ import { redactableError } from "../common/errors";
|
||||
import { interpretResultsSarif } from "../query-results";
|
||||
import { join } from "path";
|
||||
import { assertNever } from "../common/helpers-pure";
|
||||
import { dir } from "tmp-promise";
|
||||
import { writeFile, outputFile } from "fs-extra";
|
||||
import { dump as dumpYaml } from "js-yaml";
|
||||
import { MethodSignature } from "./external-api-usage";
|
||||
|
||||
type AutoModelQueryOptions = {
|
||||
queryTag: string;
|
||||
@@ -26,6 +30,7 @@ type AutoModelQueryOptions = {
|
||||
databaseItem: DatabaseItem;
|
||||
qlpack: QlPacksForLanguage;
|
||||
sourceInfo: SourceInfo | undefined;
|
||||
additionalPacks: string[];
|
||||
extensionPacks: string[];
|
||||
queryStorageDir: string;
|
||||
|
||||
@@ -52,6 +57,7 @@ async function runAutoModelQuery({
|
||||
databaseItem,
|
||||
qlpack,
|
||||
sourceInfo,
|
||||
additionalPacks,
|
||||
extensionPacks,
|
||||
queryStorageDir,
|
||||
progress,
|
||||
@@ -99,7 +105,7 @@ async function runAutoModelQuery({
|
||||
quickEvalCountOnly: false,
|
||||
},
|
||||
false,
|
||||
getOnDiskWorkspaceFolders(),
|
||||
additionalPacks,
|
||||
extensionPacks,
|
||||
queryStorageDir,
|
||||
undefined,
|
||||
@@ -147,6 +153,7 @@ async function runAutoModelQuery({
|
||||
|
||||
type AutoModelQueriesOptions = {
|
||||
mode: Mode;
|
||||
candidateMethods: MethodSignature[];
|
||||
cliServer: CodeQLCliServer;
|
||||
queryRunner: QueryRunner;
|
||||
databaseItem: DatabaseItem;
|
||||
@@ -161,6 +168,7 @@ export type AutoModelQueriesResult = {
|
||||
|
||||
export async function runAutoModelQueries({
|
||||
mode,
|
||||
candidateMethods,
|
||||
cliServer,
|
||||
queryRunner,
|
||||
databaseItem,
|
||||
@@ -189,7 +197,13 @@ export async function runAutoModelQueries({
|
||||
sourceLocationPrefix,
|
||||
};
|
||||
|
||||
const additionalPacks = getOnDiskWorkspaceFolders();
|
||||
// Generate a pack containing the candidate filters
|
||||
const filterPackDir = await generateCandidateFilterPack(
|
||||
databaseItem.language,
|
||||
candidateMethods,
|
||||
);
|
||||
|
||||
const additionalPacks = [...getOnDiskWorkspaceFolders(), filterPackDir];
|
||||
const extensionPacks = Object.keys(
|
||||
await cliServer.resolveQlpacks(additionalPacks, true),
|
||||
);
|
||||
@@ -208,6 +222,7 @@ export async function runAutoModelQueries({
|
||||
databaseItem,
|
||||
qlpack,
|
||||
sourceInfo,
|
||||
additionalPacks,
|
||||
extensionPacks,
|
||||
queryStorageDir,
|
||||
progress: (update) => {
|
||||
@@ -228,3 +243,59 @@ export async function runAutoModelQueries({
|
||||
candidates,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* generateCandidateFilterPack will create a temporary extension pack.
|
||||
* This pack will contain a filter that will restrict the automodel queries
|
||||
* to the specified candidate methods only.
|
||||
* This is done using the `extensible` predicate "automodelCandidateFilter".
|
||||
* @param language
|
||||
* @param candidateMethods
|
||||
* @returns
|
||||
*/
|
||||
export async function generateCandidateFilterPack(
|
||||
language: string,
|
||||
candidateMethods: MethodSignature[],
|
||||
): Promise<string> {
|
||||
// Pack resides in a temporary directory, to not pollute the workspace.
|
||||
const packDir = (await dir({ unsafeCleanup: true })).path;
|
||||
|
||||
const syntheticConfigPack = {
|
||||
name: "codeql/automodel-filter",
|
||||
version: "0.0.0",
|
||||
library: true,
|
||||
extensionTargets: {
|
||||
[`codeql/${language}-queries`]: "*",
|
||||
},
|
||||
dataExtensions: ["filter.yml"],
|
||||
};
|
||||
|
||||
const qlpackFile = join(packDir, "codeql-pack.yml");
|
||||
await outputFile(qlpackFile, dumpYaml(syntheticConfigPack), "utf8");
|
||||
|
||||
// The predicate has the following defintion:
|
||||
// extensible predicate automodelCandidateFilter(string package, string type, string name, string signature)
|
||||
const dataRows = candidateMethods.map((method) => [
|
||||
method.packageName,
|
||||
method.typeName,
|
||||
method.methodName,
|
||||
method.methodParameters,
|
||||
]);
|
||||
|
||||
const filter = {
|
||||
extensions: [
|
||||
{
|
||||
addsTo: {
|
||||
pack: `codeql/${language}-queries`,
|
||||
extensible: "automodelCandidateFilter",
|
||||
},
|
||||
data: dataRows,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const filterFile = join(packDir, "filter.yml");
|
||||
await writeFile(filterFile, dumpYaml(filter), "utf8");
|
||||
|
||||
return packDir;
|
||||
}
|
||||
|
||||
@@ -4,6 +4,63 @@ import { AutoModelQueriesResult } from "./auto-model-codeml-queries";
|
||||
import { assertNever } from "../common/helpers-pure";
|
||||
import * as Sarif from "sarif";
|
||||
import { gzipEncode } from "../common/zlib";
|
||||
import { ExternalApiUsage, MethodSignature } from "./external-api-usage";
|
||||
import { ModeledMethod } from "./modeled-method";
|
||||
import { groupMethods, sortGroupNames, sortMethods } from "./shared/sorting";
|
||||
|
||||
// Soft limit on the number of candidates to send to the model.
|
||||
// Note that the model may return fewer than this number of candidates.
|
||||
const candidateLimit = 20;
|
||||
/**
|
||||
* Return the candidates that the model should be run on. This includes limiting the number of
|
||||
* candidates to the candidate limit and filtering out anything that is already modeled and respecting
|
||||
* the order in the UI.
|
||||
* @param mode Whether it is application or framework mode.
|
||||
* @param externalApiUsages all external API usages.
|
||||
* @param modeledMethods the currently modeled methods.
|
||||
* @returns list of modeled methods that are candidates for modeling.
|
||||
*/
|
||||
export function getCandidates(
|
||||
mode: Mode,
|
||||
externalApiUsages: ExternalApiUsage[],
|
||||
modeledMethods: Record<string, ModeledMethod>,
|
||||
): MethodSignature[] {
|
||||
// Sort the same way as the UI so we send the first ones listed in the UI first
|
||||
const grouped = groupMethods(externalApiUsages, mode);
|
||||
const sortedGroupNames = sortGroupNames(grouped);
|
||||
const sortedExternalApiUsages = sortedGroupNames.flatMap((name) =>
|
||||
sortMethods(grouped[name]),
|
||||
);
|
||||
|
||||
const candidates: MethodSignature[] = [];
|
||||
|
||||
for (const externalApiUsage of sortedExternalApiUsages) {
|
||||
const modeledMethod: ModeledMethod = modeledMethods[
|
||||
externalApiUsage.signature
|
||||
] ?? {
|
||||
type: "none",
|
||||
};
|
||||
|
||||
// If we have reached the max number of candidates then stop
|
||||
if (candidates.length >= candidateLimit) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Anything that is modeled is not a candidate
|
||||
if (modeledMethod.type !== "none") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// A method that is supported is modeled outside of the model file, so it is not a candidate.
|
||||
if (externalApiUsage.supported) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// The rest are candidates
|
||||
candidates.push(externalApiUsage);
|
||||
}
|
||||
return candidates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a SARIF log to the format expected by the server: JSON, GZIP-compressed, base64-encoded
|
||||
|
||||
@@ -56,9 +56,10 @@ import { join } from "path";
|
||||
import { pickExtensionPack } from "./extension-pack-picker";
|
||||
import { getLanguageDisplayName } from "../common/query-language";
|
||||
import { runAutoModelQueries } from "./auto-model-codeml-queries";
|
||||
import { createAutoModelV2Request } from "./auto-model-v2";
|
||||
import { createAutoModelV2Request, getCandidates } from "./auto-model-v2";
|
||||
import { load as loadYaml } from "js-yaml";
|
||||
import { loadDataExtensionYaml } from "./yaml";
|
||||
import { extLogger } from "../common/logging/vscode";
|
||||
|
||||
export class DataExtensionsEditorView extends AbstractWebview<
|
||||
ToDataExtensionsEditorMessage,
|
||||
@@ -377,8 +378,22 @@ export class DataExtensionsEditorView extends AbstractWebview<
|
||||
let predictedModeledMethods: Record<string, ModeledMethod>;
|
||||
|
||||
if (useLlmGenerationV2()) {
|
||||
// Fetch the candidates to send to the model
|
||||
const candidateMethods = getCandidates(
|
||||
this.mode,
|
||||
externalApiUsages,
|
||||
modeledMethods,
|
||||
);
|
||||
|
||||
// If there are no candidates, there is nothing to model and we just return
|
||||
if (candidateMethods.length === 0) {
|
||||
void extLogger.log("No candidates to model. Stopping.");
|
||||
return;
|
||||
}
|
||||
|
||||
const usages = await runAutoModelQueries({
|
||||
mode: this.mode,
|
||||
candidateMethods,
|
||||
cliServer: this.cliServer,
|
||||
queryRunner: this.queryRunner,
|
||||
queryStorageDir: this.queryStorageDir,
|
||||
@@ -418,12 +433,33 @@ export class DataExtensionsEditorView extends AbstractWebview<
|
||||
filename: "auto-model.yml",
|
||||
});
|
||||
|
||||
const modeledMethods = loadDataExtensionYaml(models);
|
||||
if (!modeledMethods) {
|
||||
const loadedMethods = loadDataExtensionYaml(models);
|
||||
if (!loadedMethods) {
|
||||
return;
|
||||
}
|
||||
|
||||
predictedModeledMethods = modeledMethods;
|
||||
// Any candidate that was part of the response is a negative result
|
||||
// meaning that the canidate is not a sink for the kinds that the LLM is checking for.
|
||||
// For now we model this as a sink neutral method, however this is subject
|
||||
// to discussion.
|
||||
for (const candidate of candidateMethods) {
|
||||
if (!(candidate.signature in loadedMethods)) {
|
||||
loadedMethods[candidate.signature] = {
|
||||
type: "neutral",
|
||||
kind: "sink",
|
||||
input: "",
|
||||
output: "",
|
||||
provenance: "ai-generated",
|
||||
signature: candidate.signature,
|
||||
packageName: candidate.packageName,
|
||||
typeName: candidate.typeName,
|
||||
methodName: candidate.methodName,
|
||||
methodParameters: candidate.methodParameters,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
predictedModeledMethods = loadedMethods;
|
||||
} else {
|
||||
const usages = await getAutoModelUsages({
|
||||
cliServer: this.cliServer,
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import {
|
||||
createAutoModelV2Request,
|
||||
encodeSarif,
|
||||
getCandidates,
|
||||
} from "../../../src/data-extensions-editor/auto-model-v2";
|
||||
import { Mode } from "../../../src/data-extensions-editor/shared/mode";
|
||||
import { AutomodelMode } from "../../../src/data-extensions-editor/auto-model-api-v2";
|
||||
import { AutoModelQueriesResult } from "../../../src/data-extensions-editor/auto-model-codeml-queries";
|
||||
import * as sarif from "sarif";
|
||||
import { gzipDecode } from "../../../src/common/zlib";
|
||||
import { ExternalApiUsage } from "../../../src/data-extensions-editor/external-api-usage";
|
||||
import { ModeledMethod } from "../../../src/data-extensions-editor/modeled-method";
|
||||
|
||||
describe("createAutoModelV2Request", () => {
|
||||
const createSarifLog = (queryId: string): sarif.Log => {
|
||||
@@ -80,3 +83,110 @@ describe("createAutoModelV2Request", () => {
|
||||
expect(parsed).toEqual(result.candidates);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getCandidates", () => {
|
||||
it("doesn't return methods that are already modelled", () => {
|
||||
const externalApiUsages: ExternalApiUsage[] = [
|
||||
{
|
||||
library: "my.jar",
|
||||
signature: "org.my.A#x()",
|
||||
packageName: "org.my",
|
||||
typeName: "A",
|
||||
methodName: "x",
|
||||
methodParameters: "()",
|
||||
supported: false,
|
||||
supportedType: "none",
|
||||
usages: [],
|
||||
},
|
||||
];
|
||||
const modeledMethods: Record<string, ModeledMethod> = {
|
||||
"org.my.A#x()": {
|
||||
type: "neutral",
|
||||
kind: "",
|
||||
input: "",
|
||||
output: "",
|
||||
provenance: "manual",
|
||||
signature: "org.my.A#x()",
|
||||
packageName: "org.my",
|
||||
typeName: "A",
|
||||
methodName: "x",
|
||||
methodParameters: "()",
|
||||
},
|
||||
};
|
||||
const candidates = getCandidates(
|
||||
Mode.Application,
|
||||
externalApiUsages,
|
||||
modeledMethods,
|
||||
);
|
||||
expect(candidates.length).toEqual(0);
|
||||
});
|
||||
|
||||
it("doesn't return methods that are supported from other sources", () => {
|
||||
const externalApiUsages: ExternalApiUsage[] = [
|
||||
{
|
||||
library: "my.jar",
|
||||
signature: "org.my.A#x()",
|
||||
packageName: "org.my",
|
||||
typeName: "A",
|
||||
methodName: "x",
|
||||
methodParameters: "()",
|
||||
supported: true,
|
||||
supportedType: "none",
|
||||
usages: [],
|
||||
},
|
||||
];
|
||||
const modeledMethods = {};
|
||||
const candidates = getCandidates(
|
||||
Mode.Application,
|
||||
externalApiUsages,
|
||||
modeledMethods,
|
||||
);
|
||||
expect(candidates.length).toEqual(0);
|
||||
});
|
||||
|
||||
it("returns methods that are neither modeled nor supported from other sources", () => {
|
||||
const externalApiUsages: ExternalApiUsage[] = [];
|
||||
externalApiUsages.push({
|
||||
library: "my.jar",
|
||||
signature: "org.my.A#x()",
|
||||
packageName: "org.my",
|
||||
typeName: "A",
|
||||
methodName: "x",
|
||||
methodParameters: "()",
|
||||
supported: false,
|
||||
supportedType: "none",
|
||||
usages: [],
|
||||
});
|
||||
const modeledMethods = {};
|
||||
const candidates = getCandidates(
|
||||
Mode.Application,
|
||||
externalApiUsages,
|
||||
modeledMethods,
|
||||
);
|
||||
expect(candidates.length).toEqual(1);
|
||||
});
|
||||
|
||||
it("respects the limit", () => {
|
||||
const externalApiUsages: ExternalApiUsage[] = [];
|
||||
for (let i = 0; i < 30; i++) {
|
||||
externalApiUsages.push({
|
||||
library: "my.jar",
|
||||
signature: `org.my.A#x${i}()`,
|
||||
packageName: "org.my",
|
||||
typeName: "A",
|
||||
methodName: `x${i}`,
|
||||
methodParameters: "()",
|
||||
supported: false,
|
||||
supportedType: "none",
|
||||
usages: [],
|
||||
});
|
||||
}
|
||||
const modeledMethods = {};
|
||||
const candidates = getCandidates(
|
||||
Mode.Application,
|
||||
externalApiUsages,
|
||||
modeledMethods,
|
||||
);
|
||||
expect(candidates.length).toEqual(20);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,13 +5,20 @@ import {
|
||||
} from "../../../../src/databases/local-databases";
|
||||
import { file } from "tmp-promise";
|
||||
import { QueryResultType } from "../../../../src/query-server/new-messages";
|
||||
import { runAutoModelQueries } from "../../../../src/data-extensions-editor/auto-model-codeml-queries";
|
||||
import {
|
||||
generateCandidateFilterPack,
|
||||
runAutoModelQueries,
|
||||
} from "../../../../src/data-extensions-editor/auto-model-codeml-queries";
|
||||
import { Mode } from "../../../../src/data-extensions-editor/shared/mode";
|
||||
import { mockedObject, mockedUri } from "../../utils/mocking.helpers";
|
||||
import { CodeQLCliServer } from "../../../../src/codeql-cli/cli";
|
||||
import { QueryRunner } from "../../../../src/query-server";
|
||||
import * as queryResolver from "../../../../src/local-queries/query-resolver";
|
||||
import * as standardQueries from "../../../../src/local-queries/standard-queries";
|
||||
import { MethodSignature } from "../../../../src/data-extensions-editor/external-api-usage";
|
||||
import { join } from "path";
|
||||
import { exists, readFile } from "fs-extra";
|
||||
import { load as loadYaml } from "js-yaml";
|
||||
|
||||
describe("runAutoModelQueries", () => {
|
||||
const qlpack = {
|
||||
@@ -60,6 +67,7 @@ describe("runAutoModelQueries", () => {
|
||||
|
||||
const options = {
|
||||
mode: Mode.Application,
|
||||
candidateMethods: [],
|
||||
cliServer: mockedObject<CodeQLCliServer>({
|
||||
resolveQlpacks: jest.fn().mockResolvedValue({
|
||||
"/a/b/c/my-extension-pack": {},
|
||||
@@ -140,7 +148,10 @@ describe("runAutoModelQueries", () => {
|
||||
expect(result).not.toBeUndefined();
|
||||
|
||||
expect(options.cliServer.resolveQlpacks).toHaveBeenCalledTimes(1);
|
||||
expect(options.cliServer.resolveQlpacks).toHaveBeenCalledWith([], true);
|
||||
expect(options.cliServer.resolveQlpacks).toHaveBeenCalledWith(
|
||||
expect.arrayContaining([expect.stringContaining("tmp")]),
|
||||
true,
|
||||
);
|
||||
expect(resolveQueriesSpy).toHaveBeenCalledTimes(1);
|
||||
expect(resolveQueriesSpy).toHaveBeenCalledWith(
|
||||
options.cliServer,
|
||||
@@ -165,7 +176,7 @@ describe("runAutoModelQueries", () => {
|
||||
quickEvalCountOnly: false,
|
||||
},
|
||||
false,
|
||||
[],
|
||||
expect.arrayContaining([expect.stringContaining("tmp")]),
|
||||
["/a/b/c/my-extension-pack"],
|
||||
"/tmp/queries",
|
||||
undefined,
|
||||
@@ -173,3 +184,34 @@ describe("runAutoModelQueries", () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("generateCandidateFilterPack", () => {
|
||||
it("should create a temp pack containing the candidate filters", async () => {
|
||||
const candidateMethods: MethodSignature[] = [
|
||||
{
|
||||
signature: "org.my.A#x()",
|
||||
packageName: "org.my",
|
||||
typeName: "A",
|
||||
methodName: "x",
|
||||
methodParameters: "()",
|
||||
},
|
||||
];
|
||||
const packDir = await generateCandidateFilterPack("java", candidateMethods);
|
||||
expect(packDir).not.toBeUndefined();
|
||||
const qlpackFile = join(packDir, "codeql-pack.yml");
|
||||
expect(await exists(qlpackFile)).toBe(true);
|
||||
const filterFile = join(packDir, "filter.yml");
|
||||
expect(await exists(filterFile)).toBe(true);
|
||||
// Read the contents of filterFile and parse as yaml
|
||||
const yaml = await loadYaml(await readFile(filterFile, "utf8"));
|
||||
const extensions = yaml.extensions;
|
||||
expect(extensions).toBeInstanceOf(Array);
|
||||
expect(extensions).toHaveLength(1);
|
||||
const extension = extensions[0];
|
||||
expect(extension.addsTo.pack).toEqual("codeql/java-queries");
|
||||
expect(extension.addsTo.extensible).toEqual("automodelCandidateFilter");
|
||||
expect(extension.data).toBeInstanceOf(Array);
|
||||
expect(extension.data).toHaveLength(1);
|
||||
expect(extension.data[0]).toEqual(["org.my", "A", "x", "()"]);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user