From 6fd2579c9c15ab21ccfd17edf392cf2bdc768e04 Mon Sep 17 00:00:00 2001 From: Koen Vlaswinkel Date: Tue, 2 Jul 2024 15:42:10 +0200 Subject: [PATCH] Use simplified paths and endpoint kinds for Python model editor --- extensions/ql-vscode/src/model-editor/bqrs.ts | 22 +- .../model-editor/languages/models-as-data.ts | 3 + .../languages/python/access-paths.ts | 155 ++++++++---- .../model-editor/languages/python/index.ts | 95 ++++---- .../ql-vscode/src/model-editor/method.ts | 2 + .../src/model-editor/queries/query.ts | 4 + .../languages/python/access-paths.test.ts | 230 ++++++++++++++++-- 7 files changed, 390 insertions(+), 121 deletions(-) diff --git a/extensions/ql-vscode/src/model-editor/bqrs.ts b/extensions/ql-vscode/src/model-editor/bqrs.ts index a606f5c33..5cab2bdf1 100644 --- a/extensions/ql-vscode/src/model-editor/bqrs.ts +++ b/extensions/ql-vscode/src/model-editor/bqrs.ts @@ -33,6 +33,7 @@ export function decodeBqrsToMethods( let libraryVersion: string | undefined; let type: ModeledMethodType; let classification: CallClassification; + let endpointKindColumn: string | BqrsEntityValue | undefined; let endpointType: EndpointType | undefined = undefined; if (mode === Mode.Application) { @@ -47,6 +48,7 @@ export function decodeBqrsToMethods( libraryVersion, type, classification, + endpointKindColumn, ] = tuple as ApplicationModeTuple; } else { [ @@ -58,6 +60,7 @@ export function decodeBqrsToMethods( supported, library, type, + endpointKindColumn, ] = tuple as FrameworkModeTuple; classification = CallClassification.Unknown; @@ -68,13 +71,18 @@ export function decodeBqrsToMethods( } if (definition.endpointTypeForEndpoint) { - endpointType = definition.endpointTypeForEndpoint({ - endpointType, - packageName, - typeName, - methodName, - methodParameters, - }); + endpointType = definition.endpointTypeForEndpoint( + { + endpointType, + packageName, + typeName, + methodName, + methodParameters, + }, + typeof endpointKindColumn === "object" + ? endpointKindColumn.label + : endpointKindColumn, + ); } if (endpointType === undefined) { diff --git a/extensions/ql-vscode/src/model-editor/languages/models-as-data.ts b/extensions/ql-vscode/src/model-editor/languages/models-as-data.ts index c0a68c188..9ce524dc8 100644 --- a/extensions/ql-vscode/src/model-editor/languages/models-as-data.ts +++ b/extensions/ql-vscode/src/model-editor/languages/models-as-data.ts @@ -174,11 +174,14 @@ export type ModelsAsDataLanguage = { * be determined by heuristics. * @param method The method to get the endpoint type for. The endpoint type can be undefined if the * query does not return an endpoint type. + * @param endpointKind An optional column that may be provided by the query to help determine the + * endpoint type. */ endpointTypeForEndpoint?: ( method: Omit & { endpointType: EndpointType | undefined; }, + endpointKind: string | undefined, ) => EndpointType | undefined; predicates: ModelsAsDataLanguagePredicates; modelGeneration?: ModelsAsDataLanguageModelGeneration; diff --git a/extensions/ql-vscode/src/model-editor/languages/python/access-paths.ts b/extensions/ql-vscode/src/model-editor/languages/python/access-paths.ts index db9d40b8c..010719b57 100644 --- a/extensions/ql-vscode/src/model-editor/languages/python/access-paths.ts +++ b/extensions/ql-vscode/src/model-editor/languages/python/access-paths.ts @@ -4,7 +4,26 @@ import { EndpointType } from "../../method"; const memberTokenRegex = /^Member\[(.+)]$/; -export function parsePythonAccessPath(path: string): { +// In Python, the type can contain both the package name and the type name. +export function parsePythonType(type: string) { + // The first part is always the package name. All remaining parts are the type + // name. + + const parts = type.split("."); + const packageName = parts.shift() ?? ""; + + return { + packageName, + typeName: parts.join("."), + }; +} + +// The type name can also be specified in the type, so this will combine +// the already parsed type name and the type name from the access path. +export function parsePythonAccessPath( + path: string, + shortTypeName: string, +): { typeName: string; methodName: string; endpointType: EndpointType; @@ -13,8 +32,12 @@ export function parsePythonAccessPath(path: string): { const tokens = parseAccessPathTokens(path); if (tokens.length === 0) { + const typeName = shortTypeName.endsWith("!") + ? shortTypeName.slice(0, -1) + : shortTypeName; + return { - typeName: "", + typeName, methodName: "", endpointType: EndpointType.Method, path: "", @@ -23,6 +46,10 @@ export function parsePythonAccessPath(path: string): { const typeParts = []; let endpointType = EndpointType.Function; + // If a short type name was given and it doesn't end in a `!`, then this refers to a method. + if (shortTypeName !== "" && !shortTypeName.endsWith("!")) { + endpointType = EndpointType.Method; + } let remainingTokens: typeof tokens = []; @@ -32,6 +59,7 @@ export function parsePythonAccessPath(path: string): { if (memberMatch) { typeParts.push(memberMatch[1]); } else if (token.text === "Instance") { + // Alternative way of specifying that this refers to a method. endpointType = EndpointType.Method; } else { remainingTokens = tokens.slice(i); @@ -40,9 +68,22 @@ export function parsePythonAccessPath(path: string): { } const methodName = typeParts.pop() ?? ""; - const typeName = typeParts.join("."); + let typeName = typeParts.join("."); const remainingPath = remainingTokens.map((token) => token.text).join("."); + if (shortTypeName !== "") { + if (shortTypeName.endsWith("!")) { + // The actual type name is the name without the `!`. + shortTypeName = shortTypeName.slice(0, -1); + } + + if (typeName !== "") { + typeName = `${shortTypeName}.${typeName}`; + } else { + typeName = shortTypeName; + } + } + return { methodName, typeName, @@ -51,53 +92,59 @@ export function parsePythonAccessPath(path: string): { }; } +export function parsePythonTypeAndPath( + type: string, + path: string, +): { + packageName: string; + typeName: string; + methodName: string; + endpointType: EndpointType; + path: string; +} { + const { packageName, typeName: shortTypeName } = parsePythonType(type); + const { + typeName, + methodName, + endpointType, + path: remainingPath, + } = parsePythonAccessPath(path, shortTypeName); + + return { + packageName, + typeName, + methodName, + endpointType, + path: remainingPath, + }; +} + export function pythonMethodSignature(typeName: string, methodName: string) { return `${typeName}#${methodName}`; } -function pythonTypePath(typeName: string) { - if (typeName === "") { +export function pythonType( + packageName: string, + typeName: string, + endpointType: EndpointType, +) { + if (typeName !== "" && packageName !== "") { + return `${packageName}.${typeName}${endpointType === EndpointType.Function ? "!" : ""}`; + } + + return `${packageName}${typeName}`; +} + +export function pythonMethodPath(methodName: string) { + if (methodName === "") { return ""; } - return typeName - .split(".") - .map((part) => `Member[${part}]`) - .join("."); + return `Member[${methodName}]`; } -export function pythonMethodPath( - typeName: string, - methodName: string, - endpointType: EndpointType, -) { - if (methodName === "") { - return pythonTypePath(typeName); - } - - const typePath = pythonTypePath(typeName); - - let result = typePath; - if (typePath !== "" && endpointType === EndpointType.Method) { - result += ".Instance"; - } - - if (result !== "") { - result += "."; - } - - result += `Member[${methodName}]`; - - return result; -} - -export function pythonPath( - typeName: string, - methodName: string, - endpointType: EndpointType, - path: string, -) { - const methodPath = pythonMethodPath(typeName, methodName, endpointType); +export function pythonPath(methodName: string, path: string) { + const methodPath = pythonMethodPath(methodName); if (methodPath === "") { return path; } @@ -111,7 +158,24 @@ export function pythonPath( export function pythonEndpointType( method: Omit, + endpointKind: string | undefined, ): EndpointType { + switch (endpointKind) { + case "Function": + return EndpointType.Function; + case "InstanceMethod": + return EndpointType.Method; + case "ClassMethod": + return EndpointType.ClassMethod; + case "StaticMethod": + return EndpointType.StaticMethod; + case "InitMethod": + return EndpointType.Constructor; + case "Class": + return EndpointType.Class; + } + + // Legacy behavior for when the kind column is missing. if ( method.methodParameters.startsWith("(self,") || method.methodParameters === "(self)" @@ -120,3 +184,12 @@ export function pythonEndpointType( } return EndpointType.Function; } + +export function hasPythonSelfArgument(endpointType: EndpointType): boolean { + // Instance methods and class methods both use `Argument[self]` for the first parameter. The first + // parameter after self is called `Argument[0]`. + return ( + endpointType === EndpointType.Method || + endpointType === EndpointType.ClassMethod + ); +} diff --git a/extensions/ql-vscode/src/model-editor/languages/python/index.ts b/extensions/ql-vscode/src/model-editor/languages/python/index.ts index 946aac294..691687f1b 100644 --- a/extensions/ql-vscode/src/model-editor/languages/python/index.ts +++ b/extensions/ql-vscode/src/model-editor/languages/python/index.ts @@ -4,44 +4,48 @@ import { Mode } from "../../shared/mode"; import type { MethodArgument } from "../../method"; import { EndpointType, getArgumentsList } from "../../method"; import { - parsePythonAccessPath, + hasPythonSelfArgument, + parsePythonTypeAndPath, pythonEndpointType, pythonMethodPath, pythonMethodSignature, pythonPath, + pythonType, } from "./access-paths"; export const python: ModelsAsDataLanguage = { availableModes: [Mode.Framework], createMethodSignature: ({ typeName, methodName }) => `${typeName}#${methodName}`, - endpointTypeForEndpoint: (method) => pythonEndpointType(method), + endpointTypeForEndpoint: (method, endpointKind) => + pythonEndpointType(method, endpointKind), predicates: { source: { extensiblePredicate: sharedExtensiblePredicates.source, supportedKinds: sharedKinds.source, - supportedEndpointTypes: [EndpointType.Method, EndpointType.Function], + supportedEndpointTypes: [ + EndpointType.Method, + EndpointType.Function, + EndpointType.Constructor, + EndpointType.ClassMethod, + EndpointType.StaticMethod, + ], // extensible predicate sourceModel( // string type, string path, string kind // ); generateMethodDefinition: (method) => [ - method.packageName, - pythonPath( - method.typeName, - method.methodName, - method.endpointType, - method.output, - ), + pythonType(method.packageName, method.typeName, method.endpointType), + pythonPath(method.methodName, method.output), method.kind, ], readModeledMethod: (row) => { - const packageName = row[0] as string; const { + packageName, typeName, methodName, endpointType, path: output, - } = parsePythonAccessPath(row[1] as string); + } = parsePythonTypeAndPath(row[0] as string, row[1] as string); return { type: "source", output, @@ -59,30 +63,31 @@ export const python: ModelsAsDataLanguage = { sink: { extensiblePredicate: sharedExtensiblePredicates.sink, supportedKinds: sharedKinds.sink, - supportedEndpointTypes: [EndpointType.Method, EndpointType.Function], + supportedEndpointTypes: [ + EndpointType.Method, + EndpointType.Function, + EndpointType.Constructor, + EndpointType.ClassMethod, + EndpointType.StaticMethod, + ], // extensible predicate sinkModel( // string type, string path, string kind // ); generateMethodDefinition: (method) => { return [ - method.packageName, - pythonPath( - method.typeName, - method.methodName, - method.endpointType, - method.input, - ), + pythonType(method.packageName, method.typeName, method.endpointType), + pythonPath(method.methodName, method.input), method.kind, ]; }, readModeledMethod: (row) => { - const packageName = row[0] as string; const { + packageName, typeName, methodName, endpointType, path: input, - } = parsePythonAccessPath(row[1] as string); + } = parsePythonTypeAndPath(row[0] as string, row[1] as string); return { type: "sink", input, @@ -100,25 +105,26 @@ export const python: ModelsAsDataLanguage = { summary: { extensiblePredicate: sharedExtensiblePredicates.summary, supportedKinds: sharedKinds.summary, - supportedEndpointTypes: [EndpointType.Method, EndpointType.Function], + supportedEndpointTypes: [ + EndpointType.Method, + EndpointType.Function, + EndpointType.Constructor, + EndpointType.ClassMethod, + EndpointType.StaticMethod, + ], // extensible predicate summaryModel( // string type, string path, string input, string output, string kind // ); generateMethodDefinition: (method) => [ - method.packageName, - pythonMethodPath( - method.typeName, - method.methodName, - method.endpointType, - ), + pythonType(method.packageName, method.typeName, method.endpointType), + pythonMethodPath(method.methodName), method.input, method.output, method.kind, ], readModeledMethod: (row) => { - const packageName = row[0] as string; - const { typeName, methodName, endpointType, path } = - parsePythonAccessPath(row[1] as string); + const { packageName, typeName, methodName, endpointType, path } = + parsePythonTypeAndPath(row[0] as string, row[1] as string); if (path !== "") { throw new Error("Summary path must be a method"); } @@ -144,18 +150,13 @@ export const python: ModelsAsDataLanguage = { // string type, string path, string kind // ); generateMethodDefinition: (method) => [ - method.packageName, - pythonMethodPath( - method.typeName, - method.methodName, - method.endpointType, - ), + pythonType(method.packageName, method.typeName, method.endpointType), + pythonMethodPath(method.methodName), method.kind, ], readModeledMethod: (row) => { - const packageName = row[0] as string; - const { typeName, methodName, endpointType, path } = - parsePythonAccessPath(row[1] as string); + const { packageName, typeName, methodName, endpointType, path } = + parsePythonTypeAndPath(row[0] as string, row[1] as string); if (path !== "") { throw new Error("Neutral path must be a method"); } @@ -177,20 +178,16 @@ export const python: ModelsAsDataLanguage = { // Argument and Parameter are equivalent in Python, but we'll use Argument in the model editor const argumentsList = getArgumentsList(method.methodParameters).map( (argument, index): MethodArgument => { - if ( - method.endpointType === EndpointType.Method && - argument === "self" && - index === 0 - ) { + if (hasPythonSelfArgument(method.endpointType) && index === 0) { return { path: "Argument[self]", - label: "Argument[self]: self", + label: `Argument[self]: ${argument}`, }; } - // If this is a method, self does not count as an argument index, so we + // If this endpoint has a self argument, self does not count as an argument index so we // should start at 0 for the second argument - if (method.endpointType === EndpointType.Method) { + if (hasPythonSelfArgument(method.endpointType)) { index -= 1; } diff --git a/extensions/ql-vscode/src/model-editor/method.ts b/extensions/ql-vscode/src/model-editor/method.ts index 1f324f587..e62449d20 100644 --- a/extensions/ql-vscode/src/model-editor/method.ts +++ b/extensions/ql-vscode/src/model-editor/method.ts @@ -29,6 +29,8 @@ export enum EndpointType { Method = "method", Constructor = "constructor", Function = "function", + StaticMethod = "staticMethod", + ClassMethod = "classMethod", } export interface MethodDefinition { diff --git a/extensions/ql-vscode/src/model-editor/queries/query.ts b/extensions/ql-vscode/src/model-editor/queries/query.ts index a24d96773..3fe4926bf 100644 --- a/extensions/ql-vscode/src/model-editor/queries/query.ts +++ b/extensions/ql-vscode/src/model-editor/queries/query.ts @@ -17,6 +17,7 @@ export type Query = { * - libraryVersion: the version of the library that contains the external API. This is a string and can be empty if the version cannot be determined. * - type: the modeled kind of the method, either "sink", "source", "summary", or "neutral" * - classification: the classification of the use of the method, either "source", "test", "generated", or "unknown" + * - kind: the kind of the endpoint, language-specific, e.g. "method" or "function" */ applicationModeQuery: string; /** @@ -32,6 +33,7 @@ export type Query = { * - supported: whether this method is modeled. This should be a string representation of a boolean to satify the result pattern for a problem query. * - libraryName: the name of the file or library that contains the method. This is a string and usually the basename of a file. * - type: the modeled kind of the method, either "sink", "source", "summary", or "neutral" + * - kind: the kind of the endpoint, language-specific, e.g. "method" or "function" */ frameworkModeQuery: string; dependencies?: { @@ -50,6 +52,7 @@ export type ApplicationModeTuple = [ string, ModeledMethodType, CallClassification, + string | BqrsEntityValue | undefined, ]; export type FrameworkModeTuple = [ @@ -61,4 +64,5 @@ export type FrameworkModeTuple = [ boolean, string, ModeledMethodType, + string | BqrsEntityValue | undefined, ]; diff --git a/extensions/ql-vscode/test/unit-tests/model-editor/languages/python/access-paths.test.ts b/extensions/ql-vscode/test/unit-tests/model-editor/languages/python/access-paths.test.ts index 34df7f79d..5cb3f5245 100644 --- a/extensions/ql-vscode/test/unit-tests/model-editor/languages/python/access-paths.test.ts +++ b/extensions/ql-vscode/test/unit-tests/model-editor/languages/python/access-paths.test.ts @@ -1,16 +1,59 @@ import { parsePythonAccessPath, + parsePythonType, pythonEndpointType, pythonPath, } from "../../../../../src/model-editor/languages/python/access-paths"; import { EndpointType } from "../../../../../src/model-editor/method"; +describe("parsePythonType", () => { + it("parses a type with a package", () => { + expect(parsePythonType("requests.utils")).toEqual({ + packageName: "requests", + typeName: "utils", + }); + }); + + it("parses a nested type with a package", () => { + expect(parsePythonType("requests.adapters.HTTPAdapter")).toEqual({ + packageName: "requests", + typeName: "adapters.HTTPAdapter", + }); + }); + + it("parses a package without a type", () => { + expect(parsePythonType("requests")).toEqual({ + packageName: "requests", + typeName: "", + }); + }); + + it("parses an empty string", () => { + expect(parsePythonType("")).toEqual({ + packageName: "", + typeName: "", + }); + }); +}); + const testCases: Array<{ path: string; + shortTypeName: string; method: ReturnType; }> = [ { path: "Member[CommonTokens].Member[Class].Instance.Member[foo]", + shortTypeName: "", + method: { + typeName: "CommonTokens.Class", + methodName: "foo", + endpointType: EndpointType.Method, + path: "", + }, + }, + { + path: "Member[foo]", + shortTypeName: "CommonTokens.Class", method: { typeName: "CommonTokens.Class", methodName: "foo", @@ -20,6 +63,17 @@ const testCases: Array<{ }, { path: "Member[CommonTokens].Member[Class].Instance.Member[foo].Parameter[self]", + shortTypeName: "", + method: { + typeName: "CommonTokens.Class", + methodName: "foo", + endpointType: EndpointType.Method, + path: "Parameter[self]", + }, + }, + { + path: "Member[foo].Parameter[self]", + shortTypeName: "CommonTokens.Class", method: { typeName: "CommonTokens.Class", methodName: "foo", @@ -29,6 +83,7 @@ const testCases: Array<{ }, { path: "Member[getSource].ReturnValue", + shortTypeName: "", method: { typeName: "", methodName: "getSource", @@ -38,6 +93,17 @@ const testCases: Array<{ }, { path: "Member[CommonTokens].Member[makePromise].ReturnValue.Awaited", + shortTypeName: "", + method: { + typeName: "CommonTokens", + methodName: "makePromise", + endpointType: EndpointType.Function, + path: "ReturnValue.Awaited", + }, + }, + { + path: "Member[makePromise].ReturnValue.Awaited", + shortTypeName: "CommonTokens!", method: { typeName: "CommonTokens", methodName: "makePromise", @@ -47,6 +113,17 @@ const testCases: Array<{ }, { path: "Member[ArgPos].Member[anyParam].Argument[any]", + shortTypeName: "", + method: { + typeName: "ArgPos", + methodName: "anyParam", + endpointType: EndpointType.Function, + path: "Argument[any]", + }, + }, + { + path: "Member[anyParam].Argument[any]", + shortTypeName: "ArgPos!", method: { typeName: "ArgPos", methodName: "anyParam", @@ -56,6 +133,17 @@ const testCases: Array<{ }, { path: "Member[ArgPos].Instance.Member[self_thing].Argument[self]", + shortTypeName: "", + method: { + typeName: "ArgPos", + methodName: "self_thing", + endpointType: EndpointType.Method, + path: "Argument[self]", + }, + }, + { + path: "Member[self_thing].Argument[self]", + shortTypeName: "ArgPos", method: { typeName: "ArgPos", methodName: "self_thing", @@ -66,44 +154,138 @@ const testCases: Array<{ ]; describe("parsePythonAccessPath", () => { - it.each(testCases)("parses $path", ({ path, method }) => { - expect(parsePythonAccessPath(path)).toEqual(method); - }); + it.each(testCases)( + "parses $path with $shortTypeName", + ({ path, shortTypeName, method }) => { + expect(parsePythonAccessPath(path, shortTypeName)).toEqual(method); + }, + ); }); describe("pythonPath", () => { - it.each(testCases)("constructs $path", ({ path, method }) => { - expect( - pythonPath( - method.typeName, - method.methodName, - method.endpointType, - method.path, - ), - ).toEqual(path); + it("returns empty for an empty method name", () => { + expect(pythonPath("", "ReturnValue")).toEqual("ReturnValue"); + }); + + it("returns empty for an empty path", () => { + expect(pythonPath("foo", "")).toEqual("Member[foo]"); + }); + + it("returns correctly for a full method name and path", () => { + expect(pythonPath("foo", "ReturnValue")).toEqual("Member[foo].ReturnValue"); }); }); describe("pythonEndpointType", () => { it("returns method for a method", () => { expect( - pythonEndpointType({ - packageName: "testlib", - typeName: "CommonTokens", - methodName: "foo", - methodParameters: "(self,a)", - }), + pythonEndpointType( + { + packageName: "testlib", + typeName: "CommonTokens", + methodName: "foo", + methodParameters: "(self,a)", + }, + "InstanceMethod", + ), ).toEqual(EndpointType.Method); }); + it("returns class method for a class method", () => { + expect( + pythonEndpointType( + { + packageName: "testlib", + typeName: "CommonTokens", + methodName: "foo", + methodParameters: "(cls,a)", + }, + "ClassMethod", + ), + ).toEqual(EndpointType.ClassMethod); + }); + + it("returns static method for a static method", () => { + expect( + pythonEndpointType( + { + packageName: "testlib", + typeName: "CommonTokens", + methodName: "foo", + methodParameters: "(a)", + }, + "StaticMethod", + ), + ).toEqual(EndpointType.StaticMethod); + }); + it("returns function for a function", () => { expect( - pythonEndpointType({ - packageName: "testlib", - typeName: "CommonTokens", - methodName: "foo", - methodParameters: "(a)", - }), + pythonEndpointType( + { + packageName: "testlib", + typeName: "", + methodName: "foo", + methodParameters: "(a)", + }, + "Function", + ), + ).toEqual(EndpointType.Function); + }); + + it("returns constructor for an init method", () => { + expect( + pythonEndpointType( + { + packageName: "testlib", + typeName: "CommonTokens", + methodName: "foo", + methodParameters: "(a)", + }, + "InitMethod", + ), + ).toEqual(EndpointType.Constructor); + }); + + it("returns class for a class", () => { + expect( + pythonEndpointType( + { + packageName: "testlib", + typeName: "CommonTokens", + methodName: "", + methodParameters: "", + }, + "Class", + ), + ).toEqual(EndpointType.Class); + }); + + it("returns method for a method without endpoint kind", () => { + expect( + pythonEndpointType( + { + packageName: "testlib", + typeName: "CommonTokens", + methodName: "foo", + methodParameters: "(self,a)", + }, + undefined, + ), + ).toEqual(EndpointType.Method); + }); + + it("returns function for a function without endpoint kind", () => { + expect( + pythonEndpointType( + { + packageName: "testlib", + typeName: "CommonTokens", + methodName: "foo", + methodParameters: "(a)", + }, + undefined, + ), ).toEqual(EndpointType.Function); }); });