Python: merge package/type columns

This commit is contained in:
Asger F
2022-11-14 14:23:39 +01:00
parent 22316ee4fe
commit 1c910550e6
6 changed files with 227 additions and 257 deletions

View File

@@ -17,14 +17,15 @@ private module Asyncpg {
row =
[
// a `ConnectionPool` that is created when the result of `asyncpg.create_pool()` is awaited.
"asyncpg;ConnectionPool;asyncpg;;Member[create_pool].ReturnValue.Awaited",
"asyncpg.ConnectionPool;asyncpg;Member[create_pool].ReturnValue.Awaited",
// a `Connection` that is created when
// * - the result of `asyncpg.connect()` is awaited.
// * - the result of calling `acquire` on a `ConnectionPool` is awaited.
"asyncpg;Connection;asyncpg;;Member[connect].ReturnValue.Awaited",
"asyncpg;Connection;asyncpg;ConnectionPool;Member[acquire].ReturnValue.Awaited",
"asyncpg.Connection;asyncpg;Member[connect].ReturnValue.Awaited",
"asyncpg.Connection;asyncpg.ConnectionPool;Member[acquire].ReturnValue.Awaited",
// Creating an internal `~Connection` type that contains both `Connection` and `ConnectionPool`.
"asyncpg;~Connection;asyncpg;Connection;", "asyncpg;~Connection;asyncpg;ConnectionPool;"
"asyncpg.~Connection;asyncpg.Connection;", //
"asyncpg.~Connection;asyncpg.ConnectionPool;"
]
}
}
@@ -35,13 +36,13 @@ private module Asyncpg {
row =
[
// `Connection`s and `ConnectionPool`s provide some methods that execute SQL.
"asyncpg;~Connection;Member[copy_from_query,execute,fetch,fetchrow,fetchval].Argument[0,query:];sql-injection",
"asyncpg;~Connection;Member[executemany].Argument[0,command:];sql-injection",
"asyncpg.~Connection;Member[copy_from_query,execute,fetch,fetchrow,fetchval].Argument[0,query:];sql-injection",
"asyncpg.~Connection;Member[executemany].Argument[0,command:];sql-injection",
// A model of `Connection` and `ConnectionPool`, which provide some methods that access the file system.
"asyncpg;~Connection;Member[copy_from_query,copy_from_table].Argument[output:];path-injection",
"asyncpg;~Connection;Member[copy_to_table].Argument[source:];path-injection",
"asyncpg.~Connection;Member[copy_from_query,copy_from_table].Argument[output:];path-injection",
"asyncpg.~Connection;Member[copy_to_table].Argument[source:];path-injection",
// the `PreparedStatement` class in `asyncpg`.
"asyncpg;Connection;Member[prepare].Argument[0,query:];sql-injection",
"asyncpg.Connection;Member[prepare].Argument[0,query:];sql-injection",
]
}
}
@@ -58,7 +59,7 @@ private module Asyncpg {
module Cursor {
class CursorConstruction extends SqlConstruction::Range, API::CallNode {
CursorConstruction() {
this = ModelOutput::getATypeNode("asyncpg", "Connection").getMember("cursor").getACall()
this = ModelOutput::getATypeNode("asyncpg.Connection").getMember("cursor").getACall()
}
override DataFlow::Node getSql() { result = this.getParameter(0, "query").asSink() }
@@ -76,7 +77,7 @@ private module Asyncpg {
or
exists(API::CallNode prepareCall |
prepareCall =
ModelOutput::getATypeNode("asyncpg", "Connection").getMember("prepare").getACall()
ModelOutput::getATypeNode("asyncpg.Connection").getMember("prepare").getACall()
|
sql = prepareCall.getParameter(0, "query").asSink() and
this =

View File

@@ -5,23 +5,20 @@
*
* The CSV specification has the following columns:
* - Sources:
* `package; type; path; kind`
* `type; path; kind`
* - Sinks:
* `package; type; path; kind`
* `type; path; kind`
* - Summaries:
* `package; type; path; input; output; kind`
* `type; path; input; output; kind`
* - Types:
* `package1; type1; package2; type2; path`
* `type1; type2; path`
*
* The interpretation of a row is similar to API-graphs with a left-to-right
* reading.
* 1. The `package` column selects a package name, as it would be referenced in the source code,
* such as an NPM package, PIP package, or Ruby gem. (See `ModelsAsData.qll` for language-specific details).
* It may also be a synthetic package used for a type definition (see type definitions below).
* 2. The `type` column selects all instances of a named type originating from that package,
* or the empty string if referring to the package itself.
* 1. The `type` column selects all instances of a named type. The syntax of this column is language-specific.
* The language defines some type names that the analysis knows how to identify without models.
* It can also be a synthetic type name defined by a type definition (see type definitions below).
* 3. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `package` and `type`.
* 2. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `type`.
*
* Every language supports the following tokens:
* - Argument[n]: the n-th argument to a call. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
@@ -42,10 +39,10 @@
*
* For the time being, please consult `ApiGraphModelsSpecific.qll` to see which language-specific tokens are currently supported.
*
* 4. The `input` and `output` columns specify how data enters and leaves the element selected by the
* first `(package, type, path)` tuple. Both strings are `.`-separated access paths
* 3. The `input` and `output` columns specify how data enters and leaves the element selected by the
* first `(type, path)` tuple. Both strings are `.`-separated access paths
* of the same syntax as the `path` column.
* 5. The `kind` column is a tag that can be referenced from QL to determine to
* 4. The `kind` column is a tag that can be referenced from QL to determine to
* which classes the interpreted elements should be added. For example, for
* sources `"remote"` indicates a default remote flow source, and for summaries
* `"taint"` indicates a default additional taint step and `"value"` indicates a
@@ -53,17 +50,17 @@
*
* ### Types
*
* A type row of form `package1; type1; package2; type2; path` indicates that `package2; type2; path`
* should be seen as an instance of the type `package1; type1`.
* A type row of form `type1; type2; path` indicates that `type2; path`
* should be seen as an instance of the type `type1`.
*
* A `(package,type)` pair may refer to a static type or a synthetic type name used internally in the model.
* A type may refer to a static type or a synthetic type name used internally in the model.
* Synthetic type names can be used to reuse intermediate sub-paths, when there are multiple ways to access the same
* element.
* See `ModelsAsData.qll` for the language-specific interpretation of packages and static type names.
* See `ModelsAsData.qll` for the language-specific interpretation of type names.
*
* By convention, if one wants to avoid clashes with static types from the package, the type name
* should be prefixed with a tilde character (`~`). For example, `(foo, ~Bar)` can be used to indicate that
* the type is related to the `foo` package but is not intended to match a static type.
* By convention, if one wants to avoid clashes with static types, the type name
* should be prefixed with a tilde character (`~`). For example, `~Bar` can be used to indicate that
* the type is not intended to match a static type.
*/
private import ApiGraphModelsSpecific as Specific
@@ -89,9 +86,9 @@ module ModelInput {
*
* A row of form
* ```
* package;type;path;kind
* type;path;kind
* ```
* indicates that the value at `(package, type, path)` should be seen as a flow
* indicates that the value at `(type, path)` should be seen as a flow
* source of the given `kind`.
*
* The kind `remote` represents a general remote flow source.
@@ -110,9 +107,9 @@ module ModelInput {
*
* A row of form
* ```
* package;type;path;kind
* type;path;kind
* ```
* indicates that the value at `(package, type, path)` should be seen as a sink
* indicates that the value at `(type, path)` should be seen as a sink
* of the given `kind`.
*/
abstract predicate row(string row);
@@ -129,9 +126,9 @@ module ModelInput {
*
* A row of form
* ```
* package;type;path;input;output;kind
* type;path;input;output;kind
* ```
* indicates that for each call to `(package, type, path)`, the value referred to by `input`
* indicates that for each call to `(type, path)`, the value referred to by `input`
* can flow to the value referred to by `output`.
*
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
@@ -151,9 +148,9 @@ module ModelInput {
*
* A row of form,
* ```
* package1;type1;package2;type2;path
* type1;type2;path
* ```
* indicates that `(package2, type2, path)` should be seen as an instance of `(package1, type1)`.
* indicates that `(type2, path)` should be seen as an instance of `type1`.
*/
abstract predicate row(string row);
}
@@ -163,28 +160,28 @@ module ModelInput {
*/
class TypeModel extends Unit {
/**
* Gets a data-flow node that is a source of the type `package;type`.
* Gets a data-flow node that is a source of the given `type`.
*
* This must not depend on API graphs, but ensures that an API node is generated for
* the source.
*/
DataFlow::Node getASource(string package, string type) { none() }
DataFlow::Node getASource(string type) { none() }
/**
* Gets a data-flow node that is a sink of the type `package;type`,
* Gets a data-flow node that is a sink of the given `type`,
* usually because it is an argument passed to a parameter of that type.
*
* This must not depend on API graphs, but ensures that an API node is generated for
* the sink.
*/
DataFlow::Node getASink(string package, string type) { none() }
DataFlow::Node getASink(string type) { none() }
/**
* Gets an API node that is a source or sink of the type `package;type`.
* Gets an API node that is a source or sink of the given `type`.
*
* Unlike `getASource` and `getASink`, this may depend on API graphs.
*/
API::Node getAnApiNode(string package, string type) { none() }
API::Node getAnApiNode(string type) { none() }
}
/**
@@ -209,7 +206,7 @@ private import ModelInput
/**
* An empty class, except in specific tests.
*
* If this is non-empty, all models are parsed even if the package is not
* If this is non-empty, all models are parsed even if the type name is not
* considered relevant for the current database.
*/
abstract class TestAllModels extends Unit { }
@@ -232,53 +229,44 @@ private predicate typeModel(string row) { any(TypeModelCsv s).row(inversePad(row
private predicate typeVariableModel(string row) { any(TypeVariableModelCsv s).row(inversePad(row)) }
/** Holds if a source model exists for the given parameters. */
predicate sourceModel(string package, string type, string path, string kind) {
predicate sourceModel(string type, string path, string kind) {
exists(string row |
sourceModel(row) and
row.splitAt(";", 0) = package and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = path and
row.splitAt(";", 3) = kind
row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and
row.splitAt(";", 2) = kind
)
}
/** Holds if a sink model exists for the given parameters. */
private predicate sinkModel(string package, string type, string path, string kind) {
private predicate sinkModel(string type, string path, string kind) {
exists(string row |
sinkModel(row) and
row.splitAt(";", 0) = package and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = path and
row.splitAt(";", 3) = kind
row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and
row.splitAt(";", 2) = kind
)
}
/** Holds if a summary model `row` exists for the given parameters. */
private predicate summaryModel(
string package, string type, string path, string input, string output, string kind
) {
private predicate summaryModel(string type, string path, string input, string output, string kind) {
exists(string row |
summaryModel(row) and
row.splitAt(";", 0) = package and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = path and
row.splitAt(";", 3) = input and
row.splitAt(";", 4) = output and
row.splitAt(";", 5) = kind
row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and
row.splitAt(";", 2) = input and
row.splitAt(";", 3) = output and
row.splitAt(";", 4) = kind
)
}
/** Holds if a type model exists for the given parameters. */
private predicate typeModel(
string package1, string type1, string package2, string type2, string path
) {
private predicate typeModel(string type1, string type2, string path) {
exists(string row |
typeModel(row) and
row.splitAt(";", 0) = package1 and
row.splitAt(";", 1) = type1 and
row.splitAt(";", 2) = package2 and
row.splitAt(";", 3) = type2 and
row.splitAt(";", 4) = path
row.splitAt(";", 0) = type1 and
row.splitAt(";", 1) = type2 and
row.splitAt(";", 2) = path
)
}
@@ -292,61 +280,50 @@ private predicate typeVariableModel(string name, string path) {
}
/**
* Gets a package that should be seen as an alias for the given other `package`,
* or the `package` itself.
* Holds if CSV rows involving `type` might be relevant for the analysis of this database.
*/
bindingset[package]
bindingset[result]
string getAPackageAlias(string package) {
typeModel(package, "", result, "", "")
or
result = package
}
/**
* Holds if CSV rows involving `package` might be relevant for the analysis of this database.
*/
private predicate isRelevantPackage(string package) {
predicate isRelevantType(string type) {
(
sourceModel(package, _, _, _) or
sinkModel(package, _, _, _) or
summaryModel(package, _, _, _, _, _) or
typeModel(_, _, package, _, _)
sourceModel(type, _, _) or
sinkModel(type, _, _) or
summaryModel(type, _, _, _, _) or
typeModel(_, type, _)
) and
(
Specific::isPackageUsed(package)
Specific::isTypeUsed(type)
or
exists(TestAllModels t)
)
or
exists(string other |
isRelevantPackage(other) and
typeModel(package, _, other, _, _)
exists(string other | isRelevantType(other) |
typeModel(type, other, _)
or
Specific::hasImplicitTypeModel(type, other)
)
}
/**
* Holds if `package,type,path` is used in some CSV row.
* Holds if `type,path` is used in some CSV row.
*/
pragma[nomagic]
predicate isRelevantFullPath(string package, string type, string path) {
isRelevantPackage(package) and
predicate isRelevantFullPath(string type, string path) {
isRelevantType(type) and
(
sourceModel(package, type, path, _) or
sinkModel(package, type, path, _) or
summaryModel(package, type, path, _, _, _) or
typeModel(_, _, package, type, path)
sourceModel(type, path, _) or
sinkModel(type, path, _) or
summaryModel(type, path, _, _, _) or
typeModel(_, type, path)
)
}
/** A string from a CSV row that should be parsed as an access path. */
private class AccessPathRange extends AccessPath::Range {
AccessPathRange() {
isRelevantFullPath(_, _, this)
isRelevantFullPath(_, this)
or
exists(string package | isRelevantPackage(package) |
summaryModel(package, _, _, this, _, _) or
summaryModel(package, _, _, _, this, _)
exists(string type | isRelevantType(type) |
summaryModel(type, _, this, _, _) or
summaryModel(type, _, _, this, _)
)
or
typeVariableModel(_, this)
@@ -400,83 +377,73 @@ private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, A
}
private class TypeModelUseEntry extends API::EntryPoint {
private string package;
private string type;
TypeModelUseEntry() {
exists(any(TypeModel tm).getASource(package, type)) and
this = "TypeModelUseEntry;" + package + ";" + type
exists(any(TypeModel tm).getASource(type)) and
this = "TypeModelUseEntry;" + type
}
override DataFlow::LocalSourceNode getASource() {
result = any(TypeModel tm).getASource(package, type)
}
override DataFlow::LocalSourceNode getASource() { result = any(TypeModel tm).getASource(type) }
API::Node getNodeForType(string package_, string type_) {
package = package_ and type = type_ and result = this.getANode()
}
API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() }
}
private class TypeModelDefEntry extends API::EntryPoint {
private string package;
private string type;
TypeModelDefEntry() {
exists(any(TypeModel tm).getASink(package, type)) and
this = "TypeModelDefEntry;" + package + ";" + type
exists(any(TypeModel tm).getASink(type)) and
this = "TypeModelDefEntry;" + type
}
override DataFlow::Node getASink() { result = any(TypeModel tm).getASink(package, type) }
override DataFlow::Node getASink() { result = any(TypeModel tm).getASink(type) }
API::Node getNodeForType(string package_, string type_) {
package = package_ and type = type_ and result = this.getANode()
}
API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() }
}
/**
* Gets an API node identified by the given `(package,type)` pair.
* Gets an API node identified by the given `type`.
*/
pragma[nomagic]
private API::Node getNodeFromType(string package, string type) {
exists(string package2, string type2, AccessPath path2 |
typeModel(package, type, package2, type2, path2) and
result = getNodeFromPath(package2, type2, path2)
private API::Node getNodeFromType(string type) {
exists(string type2, AccessPath path2 |
typeModel(type, type2, path2) and
result = getNodeFromPath(type2, path2)
)
or
result = any(TypeModelUseEntry e).getNodeForType(package, type)
result = any(TypeModelUseEntry e).getNodeForType(type)
or
result = any(TypeModelDefEntry e).getNodeForType(package, type)
result = any(TypeModelDefEntry e).getNodeForType(type)
or
result = any(TypeModel t).getAnApiNode(package, type)
result = any(TypeModel t).getAnApiNode(type)
or
result = Specific::getExtraNodeFromType(package, type)
result = Specific::getExtraNodeFromType(type)
}
/**
* Gets the API node identified by the first `n` tokens of `path` in the given `(package, type, path)` tuple.
* Gets the API node identified by the first `n` tokens of `path` in the given `(type, path)` tuple.
*/
pragma[nomagic]
private API::Node getNodeFromPath(string package, string type, AccessPath path, int n) {
isRelevantFullPath(package, type, path) and
private API::Node getNodeFromPath(string type, AccessPath path, int n) {
isRelevantFullPath(type, path) and
(
n = 0 and
result = getNodeFromType(package, type)
result = getNodeFromType(type)
or
result = Specific::getExtraNodeFromPath(package, type, path, n)
result = Specific::getExtraNodeFromPath(type, path, n)
)
or
result = getSuccessorFromNode(getNodeFromPath(package, type, path, n - 1), path.getToken(n - 1))
result = getSuccessorFromNode(getNodeFromPath(type, path, n - 1), path.getToken(n - 1))
or
// Similar to the other recursive case, but where the path may have stepped through one or more call-site filters
result =
getSuccessorFromInvoke(getInvocationFromPath(package, type, path, n - 1), path.getToken(n - 1))
result = getSuccessorFromInvoke(getInvocationFromPath(type, path, n - 1), path.getToken(n - 1))
or
// Apply a subpath
result =
getNodeFromSubPath(getNodeFromPath(package, type, path, n - 1), getSubPathAt(path, n - 1))
result = getNodeFromSubPath(getNodeFromPath(type, path, n - 1), getSubPathAt(path, n - 1))
or
// Apply a type step
typeStep(getNodeFromPath(package, type, path, n), result)
typeStep(getNodeFromPath(type, path, n), result)
}
/**
@@ -496,15 +463,15 @@ private AccessPath getSubPathAt(AccessPath path, int n) {
pragma[nomagic]
private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath, int n) {
exists(AccessPath path, int k |
base = [getNodeFromPath(_, _, path, k), getNodeFromSubPath(_, path, k)] and
base = [getNodeFromPath(_, path, k), getNodeFromSubPath(_, path, k)] and
subPath = getSubPathAt(path, k) and
result = base and
n = 0
)
or
exists(string package, string type, AccessPath basePath |
typeStepModel(package, type, basePath, subPath) and
base = getNodeFromPath(package, type, basePath) and
exists(string type, AccessPath basePath |
typeStepModel(type, basePath, subPath) and
base = getNodeFromPath(type, basePath) and
result = base and
n = 0
)
@@ -543,42 +510,40 @@ private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath) {
result = getNodeFromSubPath(base, subPath, subPath.getNumToken())
}
/** Gets the node identified by the given `(package, type, path)` tuple. */
private API::Node getNodeFromPath(string package, string type, AccessPath path) {
result = getNodeFromPath(package, type, path, path.getNumToken())
/** Gets the node identified by the given `(type, path)` tuple. */
private API::Node getNodeFromPath(string type, AccessPath path) {
result = getNodeFromPath(type, path, path.getNumToken())
}
pragma[nomagic]
private predicate typeStepModel(string package, string type, AccessPath basePath, AccessPath output) {
summaryModel(package, type, basePath, "", output, "type")
private predicate typeStepModel(string type, AccessPath basePath, AccessPath output) {
summaryModel(type, basePath, "", output, "type")
}
pragma[nomagic]
private predicate typeStep(API::Node pred, API::Node succ) {
exists(string package, string type, AccessPath basePath, AccessPath output |
typeStepModel(package, type, basePath, output) and
pred = getNodeFromPath(package, type, basePath) and
exists(string type, AccessPath basePath, AccessPath output |
typeStepModel(type, basePath, output) and
pred = getNodeFromPath(type, basePath) and
succ = getNodeFromSubPath(pred, output)
)
}
/**
* Gets an invocation identified by the given `(package, type, path)` tuple.
* Gets an invocation identified by the given `(type, path)` tuple.
*
* Unlike `getNodeFromPath`, the `path` may end with one or more call-site filters.
*/
private Specific::InvokeNode getInvocationFromPath(
string package, string type, AccessPath path, int n
) {
result = Specific::getAnInvocationOf(getNodeFromPath(package, type, path, n))
private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path, int n) {
result = Specific::getAnInvocationOf(getNodeFromPath(type, path, n))
or
result = getInvocationFromPath(package, type, path, n - 1) and
result = getInvocationFromPath(type, path, n - 1) and
invocationMatchesCallSiteFilter(result, path.getToken(n - 1))
}
/** Gets an invocation identified by the given `(package, type, path)` tuple. */
private Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPath path) {
result = getInvocationFromPath(package, type, path, path.getNumToken())
/** Gets an invocation identified by the given `(type, path)` tuple. */
private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path) {
result = getInvocationFromPath(type, path, path.getNumToken())
}
/**
@@ -631,9 +596,9 @@ module ModelOutput {
*/
cached
API::Node getASourceNode(string kind) {
exists(string package, string type, string path |
sourceModel(package, type, path, kind) and
result = getNodeFromPath(package, type, path)
exists(string type, string path |
sourceModel(type, path, kind) and
result = getNodeFromPath(type, path)
)
}
@@ -642,9 +607,9 @@ module ModelOutput {
*/
cached
API::Node getASinkNode(string kind) {
exists(string package, string type, string path |
sinkModel(package, type, path, kind) and
result = getNodeFromPath(package, type, path)
exists(string type, string path |
sinkModel(type, path, kind) and
result = getNodeFromPath(type, path)
)
}
@@ -653,32 +618,31 @@ module ModelOutput {
*/
cached
predicate relevantSummaryModel(
string package, string type, string path, string input, string output, string kind
string type, string path, string input, string output, string kind
) {
isRelevantPackage(package) and
summaryModel(package, type, path, input, output, kind)
isRelevantType(type) and
summaryModel(type, path, input, output, kind)
}
/**
* Holds if a `baseNode` is an invocation identified by the `package,type,path` part of a summary row.
* Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row.
*/
cached
predicate resolvedSummaryBase(
string package, string type, string path, Specific::InvokeNode baseNode
) {
summaryModel(package, type, path, _, _, _) and
baseNode = getInvocationFromPath(package, type, path)
predicate resolvedSummaryBase(string type, string path, Specific::InvokeNode baseNode) {
summaryModel(type, path, _, _, _) and
baseNode = getInvocationFromPath(type, path)
}
/**
* Holds if `node` is seen as an instance of `(package,type)` due to a type definition
* Holds if `node` is seen as an instance of `type` due to a type definition
* contributed by a CSV model.
*/
cached
API::Node getATypeNode(string package, string type) { result = getNodeFromType(package, type) }
API::Node getATypeNode(string type) { result = getNodeFromType(type) }
}
import Cached
import Specific::ModelOutputSpecific
/**
* Gets an error message relating to an invalid CSV row in a model.
@@ -686,13 +650,13 @@ module ModelOutput {
string getAWarning() {
// Check number of columns
exists(string row, string kind, int expectedArity, int actualArity |
any(SourceModelCsv csv).row(row) and kind = "source" and expectedArity = 4
any(SourceModelCsv csv).row(row) and kind = "source" and expectedArity = 3
or
any(SinkModelCsv csv).row(row) and kind = "sink" and expectedArity = 4
any(SinkModelCsv csv).row(row) and kind = "sink" and expectedArity = 3
or
any(SummaryModelCsv csv).row(row) and kind = "summary" and expectedArity = 6
any(SummaryModelCsv csv).row(row) and kind = "summary" and expectedArity = 5
or
any(TypeModelCsv csv).row(row) and kind = "type" and expectedArity = 5
any(TypeModelCsv csv).row(row) and kind = "type" and expectedArity = 3
or
any(TypeVariableModelCsv csv).row(row) and kind = "type-variable" and expectedArity = 2
|
@@ -705,7 +669,7 @@ module ModelOutput {
or
// Check names and arguments of access path tokens
exists(AccessPath path, AccessPathToken token |
(isRelevantFullPath(_, _, path) or typeVariableModel(_, path)) and
(isRelevantFullPath(_, path) or typeVariableModel(_, path)) and
token = path.getToken(_)
|
not isValidTokenNameInIdentifyingAccessPath(token.getName()) and

View File

@@ -31,19 +31,22 @@ import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow
private import AccessPathSyntax
/**
* Holds if models describing `package` may be relevant for the analysis of this database.
* Holds if models describing `type` may be relevant for the analysis of this database.
*/
predicate isPackageUsed(string package) { API::moduleImportExists(package) }
predicate isTypeUsed(string type) { API::moduleImportExists(type) }
/** Gets a Python-specific interpretation of the `(package, type, path)` tuple after resolving the first `n` access path tokens. */
bindingset[package, type, path]
API::Node getExtraNodeFromPath(string package, string type, AccessPath path, int n) { none() }
/**
* Holds if `type` can be obtained from an instance of `otherType` due to
* language semantics modeled by `getExtraNodeFromType`.
*/
predicate hasImplicitTypeModel(string type, string otherType) { none() }
/** Gets a Python-specific interpretation of the `(package, type)` tuple. */
API::Node getExtraNodeFromType(string package, string type) {
type = "" and
result = API::moduleImport(package)
}
/** Gets a Python-specific interpretation of the `(type, path)` tuple after resolving the first `n` access path tokens. */
bindingset[type, path]
API::Node getExtraNodeFromPath(string type, AccessPath path, int n) { none() }
/** Gets a Python-specific interpretation of the given `type`. */
API::Node getExtraNodeFromType(string type) { result = API::moduleImport(type) }
/**
* Gets a Python-specific API graph successor of `node` reachable by resolving `token`.
@@ -121,9 +124,9 @@ predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathT
*/
pragma[nomagic]
private predicate relevantInputOutputPath(API::CallNode base, AccessPath inputOrOutput) {
exists(string package, string type, string input, string output, string path |
ModelOutput::relevantSummaryModel(package, type, path, input, output, _) and
ModelOutput::resolvedSummaryBase(package, type, path, base) and
exists(string type, string input, string output, string path |
ModelOutput::relevantSummaryModel(type, path, input, output, _) and
ModelOutput::resolvedSummaryBase(type, path, base) and
inputOrOutput = [input, output]
)
}
@@ -153,12 +156,9 @@ private API::Node getNodeFromInputOutputPath(API::CallNode baseNode, AccessPath
* Holds if a CSV summary contributed the step `pred -> succ` of the given `kind`.
*/
predicate summaryStep(API::Node pred, API::Node succ, string kind) {
exists(
string package, string type, string path, API::CallNode base, AccessPath input,
AccessPath output
|
ModelOutput::relevantSummaryModel(package, type, path, input, output, kind) and
ModelOutput::resolvedSummaryBase(package, type, path, base) and
exists(string type, string path, API::CallNode base, AccessPath input, AccessPath output |
ModelOutput::relevantSummaryModel(type, path, input, output, kind) and
ModelOutput::resolvedSummaryBase(type, path, base) and
pred = getNodeFromInputOutputPath(base, input) and
succ = getNodeFromInputOutputPath(base, output)
)
@@ -201,3 +201,5 @@ predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string a
argument.regexpMatch("\\w+:") // keyword argument
)
}
module ModelOutputSpecific { }

View File

@@ -7,78 +7,78 @@ private import semmle.python.ApiGraphs
class Steps extends ModelInput::SummaryModelCsv {
override predicate row(string row) {
// package;type;path;input;output;kind
// type;path;input;output;kind
row =
[
"testlib;;Member[Steps].Member[preserveTaint].Call;Argument[0];ReturnValue;taint",
"testlib;;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint",
"testlib;;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint",
"testlib;;Member[Steps].Member[preserveAllButFirstArgument].Call;Argument[1..];ReturnValue;taint",
"testlib;Member[Steps].Member[preserveTaint].Call;Argument[0];ReturnValue;taint",
"testlib;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint",
"testlib;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint",
"testlib;Member[Steps].Member[preserveAllButFirstArgument].Call;Argument[1..];ReturnValue;taint",
]
}
}
class Types extends ModelInput::TypeModelCsv {
override predicate row(string row) {
// package1;type1;package2;type2;path
// type1;type2;path
row =
[
"testlib;Alias;testlib;;Member[alias].ReturnValue",
"testlib;Alias;testlib;Alias;Member[chain].ReturnValue",
"testlib.Alias;testlib;Member[alias].ReturnValue",
"testlib.Alias;testlib.Alias;Member[chain].ReturnValue",
]
}
}
class Sinks extends ModelInput::SinkModelCsv {
override predicate row(string row) {
// package;type;path;kind
// type;path;kind
row =
[
"testlib;;Member[mySink].Argument[0,sinkName:];test-sink",
"testlib;Member[mySink].Argument[0,sinkName:];test-sink",
// testing argument syntax
"testlib;;Member[Args].Member[arg0].Argument[0];test-sink", //
"testlib;;Member[Args].Member[arg1to3].Argument[1..3];test-sink", //
"testlib;;Member[Args].Member[lastarg].Argument[N-1];test-sink", //
"testlib;;Member[Args].Member[nonFist].Argument[1..];test-sink", //
"testlib;Member[Args].Member[arg0].Argument[0];test-sink", //
"testlib;Member[Args].Member[arg1to3].Argument[1..3];test-sink", //
"testlib;Member[Args].Member[lastarg].Argument[N-1];test-sink", //
"testlib;Member[Args].Member[nonFist].Argument[1..];test-sink", //
// callsite filter.
"testlib;;Member[CallFilter].Member[arityOne].WithArity[1].Argument[any];test-sink", //
"testlib;;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-sink", //
"testlib;Member[CallFilter].Member[arityOne].WithArity[1].Argument[any];test-sink", //
"testlib;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-sink", //
// testing non-positional arguments
"testlib;;Member[ArgPos].Instance.Member[self_thing].Argument[self];test-sink", //
"testlib;Member[ArgPos].Instance.Member[self_thing].Argument[self];test-sink", //
// any argument
"testlib;;Member[ArgPos].Member[anyParam].Argument[any];test-sink", //
"testlib;;Member[ArgPos].Member[anyNamed].Argument[any-named];test-sink", //
"testlib;Member[ArgPos].Member[anyParam].Argument[any];test-sink", //
"testlib;Member[ArgPos].Member[anyNamed].Argument[any-named];test-sink", //
// testing package syntax
"foo1.bar;;Member[baz1].Argument[any];test-sink", //
"foo2;;Member[bar].Member[baz2].Argument[any];test-sink", //
"foo1.bar;Member[baz1].Argument[any];test-sink", //
"foo2;Member[bar].Member[baz2].Argument[any];test-sink", //
]
}
}
class Sources extends ModelInput::SourceModelCsv {
// package;type;path;kind
// type;path;kind
override predicate row(string row) {
row =
[
"testlib;;Member[getSource].ReturnValue;test-source", //
"testlib;Alias;;test-source",
"testlib;Member[getSource].ReturnValue;test-source", //
"testlib.Alias;;test-source",
// testing parameter syntax
"testlib;;Member[Callbacks].Member[first].Argument[0].Parameter[0];test-source", //
"testlib;;Member[Callbacks].Member[param1to3].Argument[0].Parameter[1..3];test-source", //
"testlib;;Member[Callbacks].Member[nonFirst].Argument[0].Parameter[1..];test-source", //
"testlib;Member[Callbacks].Member[first].Argument[0].Parameter[0];test-source", //
"testlib;Member[Callbacks].Member[param1to3].Argument[0].Parameter[1..3];test-source", //
"testlib;Member[Callbacks].Member[nonFirst].Argument[0].Parameter[1..];test-source", //
// Common tokens.
"testlib;;Member[CommonTokens].Member[makePromise].ReturnValue.Awaited;test-source", //
"testlib;;Member[CommonTokens].Member[Class].Instance;test-source", //
"testlib;;Member[CommonTokens].Member[Super].Subclass.Instance;test-source", //
"testlib;Member[CommonTokens].Member[makePromise].ReturnValue.Awaited;test-source", //
"testlib;Member[CommonTokens].Member[Class].Instance;test-source", //
"testlib;Member[CommonTokens].Member[Super].Subclass.Instance;test-source", //
// method
"testlib;;Member[CommonTokens].Member[Class].Instance.Method[foo];test-source", //
"testlib;Member[CommonTokens].Member[Class].Instance.Method[foo];test-source", //
// testing non-positional arguments
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[self];test-source", //
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[named:];test-source", //
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[secondAndAfter].Parameter[1..];test-source", //
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[otherSelfTest].Parameter[0];test-source", //
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[anyParam].Parameter[any];test-source", //
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[anyNamed].Parameter[any-named];test-source", //
"testlib;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[self];test-source", //
"testlib;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[named:];test-source", //
"testlib;Member[ArgPos].Member[MyClass].Subclass.Member[secondAndAfter].Parameter[1..];test-source", //
"testlib;Member[ArgPos].Member[MyClass].Subclass.Member[otherSelfTest].Parameter[0];test-source", //
"testlib;Member[ArgPos].Member[MyClass].Subclass.Member[anyParam].Parameter[any];test-source", //
"testlib;Member[ArgPos].Member[MyClass].Subclass.Member[anyNamed].Parameter[any-named];test-source", //
]
}
}
@@ -111,13 +111,16 @@ class SyntaxErrorTest extends ModelInput::SinkModelCsv {
override predicate row(string row) {
row =
[
"testlib;;Member[foo],Member[bar];test-sink", "testlib;;Member[foo] Member[bar];test-sink",
"testlib;;Member[foo]. Member[bar];test-sink",
"testlib;;Member[foo], Member[bar];test-sink",
"testlib;;Member[foo]..Member[bar];test-sink",
"testlib;;Member[foo] .Member[bar];test-sink", "testlib;;Member[foo]Member[bar];test-sink",
"testlib;;Member[foo;test-sink", "testlib;;Member[foo]];test-sink",
"testlib;;Member[foo]].Member[bar];test-sink"
"testlib;Member[foo],Member[bar];test-sink", //
"testlib;Member[foo] Member[bar];test-sink", //
"testlib;Member[foo]. Member[bar];test-sink", //
"testlib;Member[foo], Member[bar];test-sink", //
"testlib;Member[foo]..Member[bar];test-sink", //
"testlib;Member[foo] .Member[bar];test-sink", //
"testlib;Member[foo]Member[bar];test-sink", //
"testlib;Member[foo;test-sink", //
"testlib;Member[foo]];test-sink", //
"testlib;Member[foo]].Member[bar];test-sink", //
]
}
}

View File

@@ -1,5 +1,5 @@
| CSV type row should have 5 columns but has 2: test;TooFewColumns |
| CSV type row should have 5 columns but has 8: test;TooManyColumns;;;Member[Foo].Instance;too;many;columns |
| CSV type row should have 3 columns but has 1: test.TooFewColumns |
| CSV type row should have 3 columns but has 6: test.TooManyColumns;;Member[Foo].Instance;too;many;columns |
| Invalid argument '0-1' in token 'Argument[0-1]' in access path: Method[foo].Argument[0-1] |
| Invalid argument '*' in token 'Argument[*]' in access path: Method[foo].Argument[*] |
| Invalid token 'Argument' is missing its arguments, in access path: Method[foo].Argument |

View File

@@ -7,13 +7,13 @@ private class InvalidTypeModel extends ModelInput::TypeModelCsv {
override predicate row(string row) {
row =
[
"test;TooManyColumns;;;Member[Foo].Instance;too;many;columns", //
"test;TooFewColumns", //
"test;X;test;Y;Method[foo].Arg[0]", //
"test;X;test;Y;Method[foo].Argument[0-1]", //
"test;X;test;Y;Method[foo].Argument[*]", //
"test;X;test;Y;Method[foo].Argument", //
"test;X;test;Y;Method[foo].Member", //
"test.TooManyColumns;;Member[Foo].Instance;too;many;columns", //
"test.TooFewColumns", //
"test.X;test.Y;Method[foo].Arg[0]", //
"test.X;test.Y;Method[foo].Argument[0-1]", //
"test.X;test.Y;Method[foo].Argument[*]", //
"test.X;test.Y;Method[foo].Argument", //
"test.X;test.Y;Method[foo].Member", //
]
}
}