From 1c910550e6c3da3095796dfbf53c03b83a9288e4 Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 14 Nov 2022 14:23:39 +0100 Subject: [PATCH] Python: merge package/type columns --- .../lib/semmle/python/frameworks/Asyncpg.qll | 23 +- .../data/internal/ApiGraphModels.qll | 312 ++++++++---------- .../data/internal/ApiGraphModelsSpecific.qll | 40 +-- .../library-tests/frameworks/data/test.ql | 91 ++--- .../frameworks/data/warnings.expected | 4 +- .../library-tests/frameworks/data/warnings.ql | 14 +- 6 files changed, 227 insertions(+), 257 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Asyncpg.qll b/python/ql/lib/semmle/python/frameworks/Asyncpg.qll index ca28dca550f..7ded3730788 100644 --- a/python/ql/lib/semmle/python/frameworks/Asyncpg.qll +++ b/python/ql/lib/semmle/python/frameworks/Asyncpg.qll @@ -17,14 +17,15 @@ private module Asyncpg { row = [ // a `ConnectionPool` that is created when the result of `asyncpg.create_pool()` is awaited. - "asyncpg;ConnectionPool;asyncpg;;Member[create_pool].ReturnValue.Awaited", + "asyncpg.ConnectionPool;asyncpg;Member[create_pool].ReturnValue.Awaited", // a `Connection` that is created when // * - the result of `asyncpg.connect()` is awaited. // * - the result of calling `acquire` on a `ConnectionPool` is awaited. - "asyncpg;Connection;asyncpg;;Member[connect].ReturnValue.Awaited", - "asyncpg;Connection;asyncpg;ConnectionPool;Member[acquire].ReturnValue.Awaited", + "asyncpg.Connection;asyncpg;Member[connect].ReturnValue.Awaited", + "asyncpg.Connection;asyncpg.ConnectionPool;Member[acquire].ReturnValue.Awaited", // Creating an internal `~Connection` type that contains both `Connection` and `ConnectionPool`. - "asyncpg;~Connection;asyncpg;Connection;", "asyncpg;~Connection;asyncpg;ConnectionPool;" + "asyncpg.~Connection;asyncpg.Connection;", // + "asyncpg.~Connection;asyncpg.ConnectionPool;" ] } } @@ -35,13 +36,13 @@ private module Asyncpg { row = [ // `Connection`s and `ConnectionPool`s provide some methods that execute SQL. - "asyncpg;~Connection;Member[copy_from_query,execute,fetch,fetchrow,fetchval].Argument[0,query:];sql-injection", - "asyncpg;~Connection;Member[executemany].Argument[0,command:];sql-injection", + "asyncpg.~Connection;Member[copy_from_query,execute,fetch,fetchrow,fetchval].Argument[0,query:];sql-injection", + "asyncpg.~Connection;Member[executemany].Argument[0,command:];sql-injection", // A model of `Connection` and `ConnectionPool`, which provide some methods that access the file system. - "asyncpg;~Connection;Member[copy_from_query,copy_from_table].Argument[output:];path-injection", - "asyncpg;~Connection;Member[copy_to_table].Argument[source:];path-injection", + "asyncpg.~Connection;Member[copy_from_query,copy_from_table].Argument[output:];path-injection", + "asyncpg.~Connection;Member[copy_to_table].Argument[source:];path-injection", // the `PreparedStatement` class in `asyncpg`. - "asyncpg;Connection;Member[prepare].Argument[0,query:];sql-injection", + "asyncpg.Connection;Member[prepare].Argument[0,query:];sql-injection", ] } } @@ -58,7 +59,7 @@ private module Asyncpg { module Cursor { class CursorConstruction extends SqlConstruction::Range, API::CallNode { CursorConstruction() { - this = ModelOutput::getATypeNode("asyncpg", "Connection").getMember("cursor").getACall() + this = ModelOutput::getATypeNode("asyncpg.Connection").getMember("cursor").getACall() } override DataFlow::Node getSql() { result = this.getParameter(0, "query").asSink() } @@ -76,7 +77,7 @@ private module Asyncpg { or exists(API::CallNode prepareCall | prepareCall = - ModelOutput::getATypeNode("asyncpg", "Connection").getMember("prepare").getACall() + ModelOutput::getATypeNode("asyncpg.Connection").getMember("prepare").getACall() | sql = prepareCall.getParameter(0, "query").asSink() and this = diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index 9b2428f3413..0e744ba801a 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -5,23 +5,20 @@ * * The CSV specification has the following columns: * - Sources: - * `package; type; path; kind` + * `type; path; kind` * - Sinks: - * `package; type; path; kind` + * `type; path; kind` * - Summaries: - * `package; type; path; input; output; kind` + * `type; path; input; output; kind` * - Types: - * `package1; type1; package2; type2; path` + * `type1; type2; path` * * The interpretation of a row is similar to API-graphs with a left-to-right * reading. - * 1. The `package` column selects a package name, as it would be referenced in the source code, - * such as an NPM package, PIP package, or Ruby gem. (See `ModelsAsData.qll` for language-specific details). - * It may also be a synthetic package used for a type definition (see type definitions below). - * 2. The `type` column selects all instances of a named type originating from that package, - * or the empty string if referring to the package itself. + * 1. The `type` column selects all instances of a named type. The syntax of this column is language-specific. + * The language defines some type names that the analysis knows how to identify without models. * It can also be a synthetic type name defined by a type definition (see type definitions below). - * 3. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `package` and `type`. + * 2. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `type`. * * Every language supports the following tokens: * - Argument[n]: the n-th argument to a call. May be a range of form `x..y` (inclusive) and/or a comma-separated list. @@ -42,10 +39,10 @@ * * For the time being, please consult `ApiGraphModelsSpecific.qll` to see which language-specific tokens are currently supported. * - * 4. The `input` and `output` columns specify how data enters and leaves the element selected by the - * first `(package, type, path)` tuple. Both strings are `.`-separated access paths + * 3. The `input` and `output` columns specify how data enters and leaves the element selected by the + * first `(type, path)` tuple. Both strings are `.`-separated access paths * of the same syntax as the `path` column. - * 5. The `kind` column is a tag that can be referenced from QL to determine to + * 4. The `kind` column is a tag that can be referenced from QL to determine to * which classes the interpreted elements should be added. For example, for * sources `"remote"` indicates a default remote flow source, and for summaries * `"taint"` indicates a default additional taint step and `"value"` indicates a @@ -53,17 +50,17 @@ * * ### Types * - * A type row of form `package1; type1; package2; type2; path` indicates that `package2; type2; path` - * should be seen as an instance of the type `package1; type1`. + * A type row of form `type1; type2; path` indicates that `type2; path` + * should be seen as an instance of the type `type1`. * - * A `(package,type)` pair may refer to a static type or a synthetic type name used internally in the model. + * A type may refer to a static type or a synthetic type name used internally in the model. * Synthetic type names can be used to reuse intermediate sub-paths, when there are multiple ways to access the same * element. - * See `ModelsAsData.qll` for the language-specific interpretation of packages and static type names. + * See `ModelsAsData.qll` for the language-specific interpretation of type names. * - * By convention, if one wants to avoid clashes with static types from the package, the type name - * should be prefixed with a tilde character (`~`). For example, `(foo, ~Bar)` can be used to indicate that - * the type is related to the `foo` package but is not intended to match a static type. + * By convention, if one wants to avoid clashes with static types, the type name + * should be prefixed with a tilde character (`~`). For example, `~Bar` can be used to indicate that + * the type is not intended to match a static type. */ private import ApiGraphModelsSpecific as Specific @@ -89,9 +86,9 @@ module ModelInput { * * A row of form * ``` - * package;type;path;kind + * type;path;kind * ``` - * indicates that the value at `(package, type, path)` should be seen as a flow + * indicates that the value at `(type, path)` should be seen as a flow * source of the given `kind`. * * The kind `remote` represents a general remote flow source. @@ -110,9 +107,9 @@ module ModelInput { * * A row of form * ``` - * package;type;path;kind + * type;path;kind * ``` - * indicates that the value at `(package, type, path)` should be seen as a sink + * indicates that the value at `(type, path)` should be seen as a sink * of the given `kind`. */ abstract predicate row(string row); @@ -129,9 +126,9 @@ module ModelInput { * * A row of form * ``` - * package;type;path;input;output;kind + * type;path;input;output;kind * ``` - * indicates that for each call to `(package, type, path)`, the value referred to by `input` + * indicates that for each call to `(type, path)`, the value referred to by `input` * can flow to the value referred to by `output`. * * `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps, @@ -151,9 +148,9 @@ module ModelInput { * * A row of form, * ``` - * package1;type1;package2;type2;path + * type1;type2;path * ``` - * indicates that `(package2, type2, path)` should be seen as an instance of `(package1, type1)`. + * indicates that `(type2, path)` should be seen as an instance of `type1`. */ abstract predicate row(string row); } @@ -163,28 +160,28 @@ module ModelInput { */ class TypeModel extends Unit { /** - * Gets a data-flow node that is a source of the type `package;type`. + * Gets a data-flow node that is a source of the given `type`. * * This must not depend on API graphs, but ensures that an API node is generated for * the source. */ - DataFlow::Node getASource(string package, string type) { none() } + DataFlow::Node getASource(string type) { none() } /** - * Gets a data-flow node that is a sink of the type `package;type`, + * Gets a data-flow node that is a sink of the given `type`, * usually because it is an argument passed to a parameter of that type. * * This must not depend on API graphs, but ensures that an API node is generated for * the sink. */ - DataFlow::Node getASink(string package, string type) { none() } + DataFlow::Node getASink(string type) { none() } /** - * Gets an API node that is a source or sink of the type `package;type`. + * Gets an API node that is a source or sink of the given `type`. * * Unlike `getASource` and `getASink`, this may depend on API graphs. */ - API::Node getAnApiNode(string package, string type) { none() } + API::Node getAnApiNode(string type) { none() } } /** @@ -209,7 +206,7 @@ private import ModelInput /** * An empty class, except in specific tests. * - * If this is non-empty, all models are parsed even if the package is not + * If this is non-empty, all models are parsed even if the type name is not * considered relevant for the current database. */ abstract class TestAllModels extends Unit { } @@ -232,53 +229,44 @@ private predicate typeModel(string row) { any(TypeModelCsv s).row(inversePad(row private predicate typeVariableModel(string row) { any(TypeVariableModelCsv s).row(inversePad(row)) } /** Holds if a source model exists for the given parameters. */ -predicate sourceModel(string package, string type, string path, string kind) { +predicate sourceModel(string type, string path, string kind) { exists(string row | sourceModel(row) and - row.splitAt(";", 0) = package and - row.splitAt(";", 1) = type and - row.splitAt(";", 2) = path and - row.splitAt(";", 3) = kind + row.splitAt(";", 0) = type and + row.splitAt(";", 1) = path and + row.splitAt(";", 2) = kind ) } /** Holds if a sink model exists for the given parameters. */ -private predicate sinkModel(string package, string type, string path, string kind) { +private predicate sinkModel(string type, string path, string kind) { exists(string row | sinkModel(row) and - row.splitAt(";", 0) = package and - row.splitAt(";", 1) = type and - row.splitAt(";", 2) = path and - row.splitAt(";", 3) = kind + row.splitAt(";", 0) = type and + row.splitAt(";", 1) = path and + row.splitAt(";", 2) = kind ) } /** Holds if a summary model `row` exists for the given parameters. */ -private predicate summaryModel( - string package, string type, string path, string input, string output, string kind -) { +private predicate summaryModel(string type, string path, string input, string output, string kind) { exists(string row | summaryModel(row) and - row.splitAt(";", 0) = package and - row.splitAt(";", 1) = type and - row.splitAt(";", 2) = path and - row.splitAt(";", 3) = input and - row.splitAt(";", 4) = output and - row.splitAt(";", 5) = kind + row.splitAt(";", 0) = type and + row.splitAt(";", 1) = path and + row.splitAt(";", 2) = input and + row.splitAt(";", 3) = output and + row.splitAt(";", 4) = kind ) } /** Holds if a type model exists for the given parameters. */ -private predicate typeModel( - string package1, string type1, string package2, string type2, string path -) { +private predicate typeModel(string type1, string type2, string path) { exists(string row | typeModel(row) and - row.splitAt(";", 0) = package1 and - row.splitAt(";", 1) = type1 and - row.splitAt(";", 2) = package2 and - row.splitAt(";", 3) = type2 and - row.splitAt(";", 4) = path + row.splitAt(";", 0) = type1 and + row.splitAt(";", 1) = type2 and + row.splitAt(";", 2) = path ) } @@ -292,61 +280,50 @@ private predicate typeVariableModel(string name, string path) { } /** - * Gets a package that should be seen as an alias for the given other `package`, - * or the `package` itself. + * Holds if CSV rows involving `type` might be relevant for the analysis of this database. */ -bindingset[package] -bindingset[result] -string getAPackageAlias(string package) { - typeModel(package, "", result, "", "") - or - result = package -} - -/** - * Holds if CSV rows involving `package` might be relevant for the analysis of this database. - */ -private predicate isRelevantPackage(string package) { +predicate isRelevantType(string type) { ( - sourceModel(package, _, _, _) or - sinkModel(package, _, _, _) or - summaryModel(package, _, _, _, _, _) or - typeModel(_, _, package, _, _) + sourceModel(type, _, _) or + sinkModel(type, _, _) or + summaryModel(type, _, _, _, _) or + typeModel(_, type, _) ) and ( - Specific::isPackageUsed(package) + Specific::isTypeUsed(type) or exists(TestAllModels t) ) or - exists(string other | - isRelevantPackage(other) and - typeModel(package, _, other, _, _) + exists(string other | isRelevantType(other) | + typeModel(type, other, _) + or + Specific::hasImplicitTypeModel(type, other) ) } /** - * Holds if `package,type,path` is used in some CSV row. + * Holds if `type,path` is used in some CSV row. */ pragma[nomagic] -predicate isRelevantFullPath(string package, string type, string path) { - isRelevantPackage(package) and +predicate isRelevantFullPath(string type, string path) { + isRelevantType(type) and ( - sourceModel(package, type, path, _) or - sinkModel(package, type, path, _) or - summaryModel(package, type, path, _, _, _) or - typeModel(_, _, package, type, path) + sourceModel(type, path, _) or + sinkModel(type, path, _) or + summaryModel(type, path, _, _, _) or + typeModel(_, type, path) ) } /** A string from a CSV row that should be parsed as an access path. */ private class AccessPathRange extends AccessPath::Range { AccessPathRange() { - isRelevantFullPath(_, _, this) + isRelevantFullPath(_, this) or - exists(string package | isRelevantPackage(package) | - summaryModel(package, _, _, this, _, _) or - summaryModel(package, _, _, _, this, _) + exists(string type | isRelevantType(type) | + summaryModel(type, _, this, _, _) or + summaryModel(type, _, _, this, _) ) or typeVariableModel(_, this) @@ -400,83 +377,73 @@ private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, A } private class TypeModelUseEntry extends API::EntryPoint { - private string package; private string type; TypeModelUseEntry() { - exists(any(TypeModel tm).getASource(package, type)) and - this = "TypeModelUseEntry;" + package + ";" + type + exists(any(TypeModel tm).getASource(type)) and + this = "TypeModelUseEntry;" + type } - override DataFlow::LocalSourceNode getASource() { - result = any(TypeModel tm).getASource(package, type) - } + override DataFlow::LocalSourceNode getASource() { result = any(TypeModel tm).getASource(type) } - API::Node getNodeForType(string package_, string type_) { - package = package_ and type = type_ and result = this.getANode() - } + API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() } } private class TypeModelDefEntry extends API::EntryPoint { - private string package; private string type; TypeModelDefEntry() { - exists(any(TypeModel tm).getASink(package, type)) and - this = "TypeModelDefEntry;" + package + ";" + type + exists(any(TypeModel tm).getASink(type)) and + this = "TypeModelDefEntry;" + type } - override DataFlow::Node getASink() { result = any(TypeModel tm).getASink(package, type) } + override DataFlow::Node getASink() { result = any(TypeModel tm).getASink(type) } - API::Node getNodeForType(string package_, string type_) { - package = package_ and type = type_ and result = this.getANode() - } + API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() } } /** - * Gets an API node identified by the given `(package,type)` pair. + * Gets an API node identified by the given `type`. */ pragma[nomagic] -private API::Node getNodeFromType(string package, string type) { - exists(string package2, string type2, AccessPath path2 | - typeModel(package, type, package2, type2, path2) and - result = getNodeFromPath(package2, type2, path2) +private API::Node getNodeFromType(string type) { + exists(string type2, AccessPath path2 | + typeModel(type, type2, path2) and + result = getNodeFromPath(type2, path2) ) or - result = any(TypeModelUseEntry e).getNodeForType(package, type) + result = any(TypeModelUseEntry e).getNodeForType(type) or - result = any(TypeModelDefEntry e).getNodeForType(package, type) + result = any(TypeModelDefEntry e).getNodeForType(type) or - result = any(TypeModel t).getAnApiNode(package, type) + result = any(TypeModel t).getAnApiNode(type) or - result = Specific::getExtraNodeFromType(package, type) + result = Specific::getExtraNodeFromType(type) } /** - * Gets the API node identified by the first `n` tokens of `path` in the given `(package, type, path)` tuple. + * Gets the API node identified by the first `n` tokens of `path` in the given `(type, path)` tuple. */ pragma[nomagic] -private API::Node getNodeFromPath(string package, string type, AccessPath path, int n) { - isRelevantFullPath(package, type, path) and +private API::Node getNodeFromPath(string type, AccessPath path, int n) { + isRelevantFullPath(type, path) and ( n = 0 and - result = getNodeFromType(package, type) + result = getNodeFromType(type) or - result = Specific::getExtraNodeFromPath(package, type, path, n) + result = Specific::getExtraNodeFromPath(type, path, n) ) or - result = getSuccessorFromNode(getNodeFromPath(package, type, path, n - 1), path.getToken(n - 1)) + result = getSuccessorFromNode(getNodeFromPath(type, path, n - 1), path.getToken(n - 1)) or // Similar to the other recursive case, but where the path may have stepped through one or more call-site filters - result = - getSuccessorFromInvoke(getInvocationFromPath(package, type, path, n - 1), path.getToken(n - 1)) + result = getSuccessorFromInvoke(getInvocationFromPath(type, path, n - 1), path.getToken(n - 1)) or // Apply a subpath - result = - getNodeFromSubPath(getNodeFromPath(package, type, path, n - 1), getSubPathAt(path, n - 1)) + result = getNodeFromSubPath(getNodeFromPath(type, path, n - 1), getSubPathAt(path, n - 1)) or // Apply a type step - typeStep(getNodeFromPath(package, type, path, n), result) + typeStep(getNodeFromPath(type, path, n), result) } /** @@ -496,15 +463,15 @@ private AccessPath getSubPathAt(AccessPath path, int n) { pragma[nomagic] private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath, int n) { exists(AccessPath path, int k | - base = [getNodeFromPath(_, _, path, k), getNodeFromSubPath(_, path, k)] and + base = [getNodeFromPath(_, path, k), getNodeFromSubPath(_, path, k)] and subPath = getSubPathAt(path, k) and result = base and n = 0 ) or - exists(string package, string type, AccessPath basePath | - typeStepModel(package, type, basePath, subPath) and - base = getNodeFromPath(package, type, basePath) and + exists(string type, AccessPath basePath | + typeStepModel(type, basePath, subPath) and + base = getNodeFromPath(type, basePath) and result = base and n = 0 ) @@ -543,42 +510,40 @@ private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath) { result = getNodeFromSubPath(base, subPath, subPath.getNumToken()) } -/** Gets the node identified by the given `(package, type, path)` tuple. */ -private API::Node getNodeFromPath(string package, string type, AccessPath path) { - result = getNodeFromPath(package, type, path, path.getNumToken()) +/** Gets the node identified by the given `(type, path)` tuple. */ +private API::Node getNodeFromPath(string type, AccessPath path) { + result = getNodeFromPath(type, path, path.getNumToken()) } pragma[nomagic] -private predicate typeStepModel(string package, string type, AccessPath basePath, AccessPath output) { - summaryModel(package, type, basePath, "", output, "type") +private predicate typeStepModel(string type, AccessPath basePath, AccessPath output) { + summaryModel(type, basePath, "", output, "type") } pragma[nomagic] private predicate typeStep(API::Node pred, API::Node succ) { - exists(string package, string type, AccessPath basePath, AccessPath output | - typeStepModel(package, type, basePath, output) and - pred = getNodeFromPath(package, type, basePath) and + exists(string type, AccessPath basePath, AccessPath output | + typeStepModel(type, basePath, output) and + pred = getNodeFromPath(type, basePath) and succ = getNodeFromSubPath(pred, output) ) } /** - * Gets an invocation identified by the given `(package, type, path)` tuple. + * Gets an invocation identified by the given `(type, path)` tuple. * * Unlike `getNodeFromPath`, the `path` may end with one or more call-site filters. */ -private Specific::InvokeNode getInvocationFromPath( - string package, string type, AccessPath path, int n -) { - result = Specific::getAnInvocationOf(getNodeFromPath(package, type, path, n)) +private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path, int n) { + result = Specific::getAnInvocationOf(getNodeFromPath(type, path, n)) or - result = getInvocationFromPath(package, type, path, n - 1) and + result = getInvocationFromPath(type, path, n - 1) and invocationMatchesCallSiteFilter(result, path.getToken(n - 1)) } -/** Gets an invocation identified by the given `(package, type, path)` tuple. */ -private Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPath path) { - result = getInvocationFromPath(package, type, path, path.getNumToken()) +/** Gets an invocation identified by the given `(type, path)` tuple. */ +private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path) { + result = getInvocationFromPath(type, path, path.getNumToken()) } /** @@ -631,9 +596,9 @@ module ModelOutput { */ cached API::Node getASourceNode(string kind) { - exists(string package, string type, string path | - sourceModel(package, type, path, kind) and - result = getNodeFromPath(package, type, path) + exists(string type, string path | + sourceModel(type, path, kind) and + result = getNodeFromPath(type, path) ) } @@ -642,9 +607,9 @@ module ModelOutput { */ cached API::Node getASinkNode(string kind) { - exists(string package, string type, string path | - sinkModel(package, type, path, kind) and - result = getNodeFromPath(package, type, path) + exists(string type, string path | + sinkModel(type, path, kind) and + result = getNodeFromPath(type, path) ) } @@ -653,32 +618,31 @@ module ModelOutput { */ cached predicate relevantSummaryModel( - string package, string type, string path, string input, string output, string kind + string type, string path, string input, string output, string kind ) { - isRelevantPackage(package) and - summaryModel(package, type, path, input, output, kind) + isRelevantType(type) and + summaryModel(type, path, input, output, kind) } /** - * Holds if a `baseNode` is an invocation identified by the `package,type,path` part of a summary row. + * Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row. */ cached - predicate resolvedSummaryBase( - string package, string type, string path, Specific::InvokeNode baseNode - ) { - summaryModel(package, type, path, _, _, _) and - baseNode = getInvocationFromPath(package, type, path) + predicate resolvedSummaryBase(string type, string path, Specific::InvokeNode baseNode) { + summaryModel(type, path, _, _, _) and + baseNode = getInvocationFromPath(type, path) } /** - * Holds if `node` is seen as an instance of `(package,type)` due to a type definition + * Holds if `node` is seen as an instance of `type` due to a type definition * contributed by a CSV model. */ cached - API::Node getATypeNode(string package, string type) { result = getNodeFromType(package, type) } + API::Node getATypeNode(string type) { result = getNodeFromType(type) } } import Cached + import Specific::ModelOutputSpecific /** * Gets an error message relating to an invalid CSV row in a model. @@ -686,13 +650,13 @@ module ModelOutput { string getAWarning() { // Check number of columns exists(string row, string kind, int expectedArity, int actualArity | - any(SourceModelCsv csv).row(row) and kind = "source" and expectedArity = 4 + any(SourceModelCsv csv).row(row) and kind = "source" and expectedArity = 3 or - any(SinkModelCsv csv).row(row) and kind = "sink" and expectedArity = 4 + any(SinkModelCsv csv).row(row) and kind = "sink" and expectedArity = 3 or - any(SummaryModelCsv csv).row(row) and kind = "summary" and expectedArity = 6 + any(SummaryModelCsv csv).row(row) and kind = "summary" and expectedArity = 5 or - any(TypeModelCsv csv).row(row) and kind = "type" and expectedArity = 5 + any(TypeModelCsv csv).row(row) and kind = "type" and expectedArity = 3 or any(TypeVariableModelCsv csv).row(row) and kind = "type-variable" and expectedArity = 2 | @@ -705,7 +669,7 @@ module ModelOutput { or // Check names and arguments of access path tokens exists(AccessPath path, AccessPathToken token | - (isRelevantFullPath(_, _, path) or typeVariableModel(_, path)) and + (isRelevantFullPath(_, path) or typeVariableModel(_, path)) and token = path.getToken(_) | not isValidTokenNameInIdentifyingAccessPath(token.getName()) and diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index 2f53c4fdeea..eb23efe4aff 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -31,19 +31,22 @@ import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow private import AccessPathSyntax /** - * Holds if models describing `package` may be relevant for the analysis of this database. + * Holds if models describing `type` may be relevant for the analysis of this database. */ -predicate isPackageUsed(string package) { API::moduleImportExists(package) } +predicate isTypeUsed(string type) { API::moduleImportExists(type) } -/** Gets a Python-specific interpretation of the `(package, type, path)` tuple after resolving the first `n` access path tokens. */ -bindingset[package, type, path] -API::Node getExtraNodeFromPath(string package, string type, AccessPath path, int n) { none() } +/** + * Holds if `type` can be obtained from an instance of `otherType` due to + * language semantics modeled by `getExtraNodeFromType`. + */ +predicate hasImplicitTypeModel(string type, string otherType) { none() } -/** Gets a Python-specific interpretation of the `(package, type)` tuple. */ -API::Node getExtraNodeFromType(string package, string type) { - type = "" and - result = API::moduleImport(package) -} +/** Gets a Python-specific interpretation of the `(type, path)` tuple after resolving the first `n` access path tokens. */ +bindingset[type, path] +API::Node getExtraNodeFromPath(string type, AccessPath path, int n) { none() } + +/** Gets a Python-specific interpretation of the given `type`. */ +API::Node getExtraNodeFromType(string type) { result = API::moduleImport(type) } /** * Gets a Python-specific API graph successor of `node` reachable by resolving `token`. @@ -121,9 +124,9 @@ predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathT */ pragma[nomagic] private predicate relevantInputOutputPath(API::CallNode base, AccessPath inputOrOutput) { - exists(string package, string type, string input, string output, string path | - ModelOutput::relevantSummaryModel(package, type, path, input, output, _) and - ModelOutput::resolvedSummaryBase(package, type, path, base) and + exists(string type, string input, string output, string path | + ModelOutput::relevantSummaryModel(type, path, input, output, _) and + ModelOutput::resolvedSummaryBase(type, path, base) and inputOrOutput = [input, output] ) } @@ -153,12 +156,9 @@ private API::Node getNodeFromInputOutputPath(API::CallNode baseNode, AccessPath * Holds if a CSV summary contributed the step `pred -> succ` of the given `kind`. */ predicate summaryStep(API::Node pred, API::Node succ, string kind) { - exists( - string package, string type, string path, API::CallNode base, AccessPath input, - AccessPath output - | - ModelOutput::relevantSummaryModel(package, type, path, input, output, kind) and - ModelOutput::resolvedSummaryBase(package, type, path, base) and + exists(string type, string path, API::CallNode base, AccessPath input, AccessPath output | + ModelOutput::relevantSummaryModel(type, path, input, output, kind) and + ModelOutput::resolvedSummaryBase(type, path, base) and pred = getNodeFromInputOutputPath(base, input) and succ = getNodeFromInputOutputPath(base, output) ) @@ -201,3 +201,5 @@ predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string a argument.regexpMatch("\\w+:") // keyword argument ) } + +module ModelOutputSpecific { } diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 00411a5f95f..6c7639dc0c3 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -7,78 +7,78 @@ private import semmle.python.ApiGraphs class Steps extends ModelInput::SummaryModelCsv { override predicate row(string row) { - // package;type;path;input;output;kind + // type;path;input;output;kind row = [ - "testlib;;Member[Steps].Member[preserveTaint].Call;Argument[0];ReturnValue;taint", - "testlib;;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint", - "testlib;;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint", - "testlib;;Member[Steps].Member[preserveAllButFirstArgument].Call;Argument[1..];ReturnValue;taint", + "testlib;Member[Steps].Member[preserveTaint].Call;Argument[0];ReturnValue;taint", + "testlib;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint", + "testlib;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint", + "testlib;Member[Steps].Member[preserveAllButFirstArgument].Call;Argument[1..];ReturnValue;taint", ] } } class Types extends ModelInput::TypeModelCsv { override predicate row(string row) { - // package1;type1;package2;type2;path + // type1;type2;path row = [ - "testlib;Alias;testlib;;Member[alias].ReturnValue", - "testlib;Alias;testlib;Alias;Member[chain].ReturnValue", + "testlib.Alias;testlib;Member[alias].ReturnValue", + "testlib.Alias;testlib.Alias;Member[chain].ReturnValue", ] } } class Sinks extends ModelInput::SinkModelCsv { override predicate row(string row) { - // package;type;path;kind + // type;path;kind row = [ - "testlib;;Member[mySink].Argument[0,sinkName:];test-sink", + "testlib;Member[mySink].Argument[0,sinkName:];test-sink", // testing argument syntax - "testlib;;Member[Args].Member[arg0].Argument[0];test-sink", // - "testlib;;Member[Args].Member[arg1to3].Argument[1..3];test-sink", // - "testlib;;Member[Args].Member[lastarg].Argument[N-1];test-sink", // - "testlib;;Member[Args].Member[nonFist].Argument[1..];test-sink", // + "testlib;Member[Args].Member[arg0].Argument[0];test-sink", // + "testlib;Member[Args].Member[arg1to3].Argument[1..3];test-sink", // + "testlib;Member[Args].Member[lastarg].Argument[N-1];test-sink", // + "testlib;Member[Args].Member[nonFist].Argument[1..];test-sink", // // callsite filter. - "testlib;;Member[CallFilter].Member[arityOne].WithArity[1].Argument[any];test-sink", // - "testlib;;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-sink", // + "testlib;Member[CallFilter].Member[arityOne].WithArity[1].Argument[any];test-sink", // + "testlib;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-sink", // // testing non-positional arguments - "testlib;;Member[ArgPos].Instance.Member[self_thing].Argument[self];test-sink", // + "testlib;Member[ArgPos].Instance.Member[self_thing].Argument[self];test-sink", // // any argument - "testlib;;Member[ArgPos].Member[anyParam].Argument[any];test-sink", // - "testlib;;Member[ArgPos].Member[anyNamed].Argument[any-named];test-sink", // + "testlib;Member[ArgPos].Member[anyParam].Argument[any];test-sink", // + "testlib;Member[ArgPos].Member[anyNamed].Argument[any-named];test-sink", // // testing package syntax - "foo1.bar;;Member[baz1].Argument[any];test-sink", // - "foo2;;Member[bar].Member[baz2].Argument[any];test-sink", // + "foo1.bar;Member[baz1].Argument[any];test-sink", // + "foo2;Member[bar].Member[baz2].Argument[any];test-sink", // ] } } class Sources extends ModelInput::SourceModelCsv { - // package;type;path;kind + // type;path;kind override predicate row(string row) { row = [ - "testlib;;Member[getSource].ReturnValue;test-source", // - "testlib;Alias;;test-source", + "testlib;Member[getSource].ReturnValue;test-source", // + "testlib.Alias;;test-source", // testing parameter syntax - "testlib;;Member[Callbacks].Member[first].Argument[0].Parameter[0];test-source", // - "testlib;;Member[Callbacks].Member[param1to3].Argument[0].Parameter[1..3];test-source", // - "testlib;;Member[Callbacks].Member[nonFirst].Argument[0].Parameter[1..];test-source", // + "testlib;Member[Callbacks].Member[first].Argument[0].Parameter[0];test-source", // + "testlib;Member[Callbacks].Member[param1to3].Argument[0].Parameter[1..3];test-source", // + "testlib;Member[Callbacks].Member[nonFirst].Argument[0].Parameter[1..];test-source", // // Common tokens. - "testlib;;Member[CommonTokens].Member[makePromise].ReturnValue.Awaited;test-source", // - "testlib;;Member[CommonTokens].Member[Class].Instance;test-source", // - "testlib;;Member[CommonTokens].Member[Super].Subclass.Instance;test-source", // + "testlib;Member[CommonTokens].Member[makePromise].ReturnValue.Awaited;test-source", // + "testlib;Member[CommonTokens].Member[Class].Instance;test-source", // + "testlib;Member[CommonTokens].Member[Super].Subclass.Instance;test-source", // // method - "testlib;;Member[CommonTokens].Member[Class].Instance.Method[foo];test-source", // + "testlib;Member[CommonTokens].Member[Class].Instance.Method[foo];test-source", // // testing non-positional arguments - "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[self];test-source", // - "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[named:];test-source", // - "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[secondAndAfter].Parameter[1..];test-source", // - "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[otherSelfTest].Parameter[0];test-source", // - "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[anyParam].Parameter[any];test-source", // - "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[anyNamed].Parameter[any-named];test-source", // + "testlib;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[self];test-source", // + "testlib;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[named:];test-source", // + "testlib;Member[ArgPos].Member[MyClass].Subclass.Member[secondAndAfter].Parameter[1..];test-source", // + "testlib;Member[ArgPos].Member[MyClass].Subclass.Member[otherSelfTest].Parameter[0];test-source", // + "testlib;Member[ArgPos].Member[MyClass].Subclass.Member[anyParam].Parameter[any];test-source", // + "testlib;Member[ArgPos].Member[MyClass].Subclass.Member[anyNamed].Parameter[any-named];test-source", // ] } } @@ -111,13 +111,16 @@ class SyntaxErrorTest extends ModelInput::SinkModelCsv { override predicate row(string row) { row = [ - "testlib;;Member[foo],Member[bar];test-sink", "testlib;;Member[foo] Member[bar];test-sink", - "testlib;;Member[foo]. Member[bar];test-sink", - "testlib;;Member[foo], Member[bar];test-sink", - "testlib;;Member[foo]..Member[bar];test-sink", - "testlib;;Member[foo] .Member[bar];test-sink", "testlib;;Member[foo]Member[bar];test-sink", - "testlib;;Member[foo;test-sink", "testlib;;Member[foo]];test-sink", - "testlib;;Member[foo]].Member[bar];test-sink" + "testlib;Member[foo],Member[bar];test-sink", // + "testlib;Member[foo] Member[bar];test-sink", // + "testlib;Member[foo]. Member[bar];test-sink", // + "testlib;Member[foo], Member[bar];test-sink", // + "testlib;Member[foo]..Member[bar];test-sink", // + "testlib;Member[foo] .Member[bar];test-sink", // + "testlib;Member[foo]Member[bar];test-sink", // + "testlib;Member[foo;test-sink", // + "testlib;Member[foo]];test-sink", // + "testlib;Member[foo]].Member[bar];test-sink", // ] } } diff --git a/python/ql/test/library-tests/frameworks/data/warnings.expected b/python/ql/test/library-tests/frameworks/data/warnings.expected index 5cebb548358..23ce426abb9 100644 --- a/python/ql/test/library-tests/frameworks/data/warnings.expected +++ b/python/ql/test/library-tests/frameworks/data/warnings.expected @@ -1,5 +1,5 @@ -| CSV type row should have 5 columns but has 2: test;TooFewColumns | -| CSV type row should have 5 columns but has 8: test;TooManyColumns;;;Member[Foo].Instance;too;many;columns | +| CSV type row should have 3 columns but has 1: test.TooFewColumns | +| CSV type row should have 3 columns but has 6: test.TooManyColumns;;Member[Foo].Instance;too;many;columns | | Invalid argument '0-1' in token 'Argument[0-1]' in access path: Method[foo].Argument[0-1] | | Invalid argument '*' in token 'Argument[*]' in access path: Method[foo].Argument[*] | | Invalid token 'Argument' is missing its arguments, in access path: Method[foo].Argument | diff --git a/python/ql/test/library-tests/frameworks/data/warnings.ql b/python/ql/test/library-tests/frameworks/data/warnings.ql index 20b87211765..c6561797164 100644 --- a/python/ql/test/library-tests/frameworks/data/warnings.ql +++ b/python/ql/test/library-tests/frameworks/data/warnings.ql @@ -7,13 +7,13 @@ private class InvalidTypeModel extends ModelInput::TypeModelCsv { override predicate row(string row) { row = [ - "test;TooManyColumns;;;Member[Foo].Instance;too;many;columns", // - "test;TooFewColumns", // - "test;X;test;Y;Method[foo].Arg[0]", // - "test;X;test;Y;Method[foo].Argument[0-1]", // - "test;X;test;Y;Method[foo].Argument[*]", // - "test;X;test;Y;Method[foo].Argument", // - "test;X;test;Y;Method[foo].Member", // + "test.TooManyColumns;;Member[Foo].Instance;too;many;columns", // + "test.TooFewColumns", // + "test.X;test.Y;Method[foo].Arg[0]", // + "test.X;test.Y;Method[foo].Argument[0-1]", // + "test.X;test.Y;Method[foo].Argument[*]", // + "test.X;test.Y;Method[foo].Argument", // + "test.X;test.Y;Method[foo].Member", // ] } }