mirror of
https://github.com/github/codeql.git
synced 2025-12-21 11:16:30 +01:00
Merge pull request #8781 from yoff/python-dataflow/flow-summaries-from-scratch
Python dataflow: flow summaries restart
This commit is contained in:
24
python/ql/lib/design.md
Normal file
24
python/ql/lib/design.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# The Python libraries
|
||||
|
||||
The Python libraries are a collection of libraries for analysing Python code.
|
||||
Everythng can be imported by importing `python.qll`.
|
||||
|
||||
## The analysis layers
|
||||
|
||||
The analysis is built up in layers. the stack looks like this:
|
||||
|
||||
- AST (coms from the extractor)
|
||||
- Control flow graph (CFG) (built by the extractor)
|
||||
- SSA
|
||||
- Call graph
|
||||
- Data flow
|
||||
|
||||
## Avoiding non-monotonic recursion
|
||||
|
||||
Given the many interactivg layers, it is imprtant to decie which predicates are allowed to be mutually recursive in order to avoid non-monotonic recursion when negation is used to express the predicates.
|
||||
As an example, we have defined local source as those whcih do not receive local flow. This means that the local flow relation is not allowed to be recursive with anything depending on local sources.
|
||||
|
||||
Some particular reatrictions to keep in mind:
|
||||
|
||||
- Typetracking needs to use a local flow step not including summaries
|
||||
- Typetracking needs to use a call graph not including summaries
|
||||
@@ -380,6 +380,21 @@ module API {
|
||||
not m.matches("%.%")
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if an import of module `m` exists.
|
||||
*
|
||||
* This is determined without referring to `Node`,
|
||||
* allowing this predicate to be used in a negative
|
||||
* context when constructing new nodes.
|
||||
*/
|
||||
predicate moduleImportExists(string m) {
|
||||
Impl::isImported(m) and
|
||||
// restrict `moduleImport` so it will never give results for a dotted name. Note
|
||||
// that we cannot move this logic to the `MkModuleImport` construction, since we
|
||||
// need the intermediate API graph nodes for the prefixes in `import foo.bar.baz`.
|
||||
not m.matches("%.%")
|
||||
}
|
||||
|
||||
/** Gets a node corresponding to the built-in with the given name, if any. */
|
||||
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }
|
||||
|
||||
@@ -605,14 +620,38 @@ module API {
|
||||
*
|
||||
* Ignores relative imports, such as `from ..foo.bar import baz`.
|
||||
*/
|
||||
private predicate imports(DataFlow::Node imp, string name) {
|
||||
private predicate imports(DataFlow::CfgNode imp, string name) {
|
||||
exists(PY::ImportExprNode iexpr |
|
||||
imp.asCfgNode() = iexpr and
|
||||
imp.getNode() = iexpr and
|
||||
not iexpr.getNode().isRelative() and
|
||||
name = iexpr.getNode().getImportedModuleName()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the module `name` is imported.
|
||||
*
|
||||
* This is determined syntactically.
|
||||
*/
|
||||
cached
|
||||
predicate isImported(string name) {
|
||||
// Ignore the following module name for Python 2, as we alias `__builtin__` to `builtins` elsewhere
|
||||
(name != "__builtin__" or PY::major_version() = 3) and
|
||||
(
|
||||
exists(PY::ImportExpr iexpr |
|
||||
not iexpr.isRelative() and
|
||||
name = iexpr.getImportedModuleName()
|
||||
)
|
||||
or
|
||||
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
|
||||
// `foo` and `foo.bar`:
|
||||
name = any(PY::ImportExpr e | not e.isRelative()).getAnImportedModuleName()
|
||||
)
|
||||
or
|
||||
// The `builtins` module should always be implicitly available
|
||||
name = "builtins"
|
||||
}
|
||||
|
||||
private import semmle.python.dataflow.new.internal.Builtins
|
||||
private import semmle.python.dataflow.new.internal.ImportStar
|
||||
|
||||
@@ -631,7 +670,7 @@ module API {
|
||||
*/
|
||||
private TApiNode potential_import_star_base(PY::Scope s) {
|
||||
exists(DataFlow::Node n |
|
||||
n.asCfgNode() = ImportStar::potentialImportStarBase(s) and
|
||||
n.(DataFlow::CfgNode).getNode() = ImportStar::potentialImportStarBase(s) and
|
||||
use(result, n)
|
||||
)
|
||||
}
|
||||
@@ -653,17 +692,17 @@ module API {
|
||||
or
|
||||
// TODO: I had expected `DataFlow::AttrWrite` to contain the attribute writes from a dict, that's how JS works.
|
||||
exists(PY::Dict dict, PY::KeyValuePair item |
|
||||
dict = pred.asExpr() and
|
||||
dict = pred.(DataFlow::ExprNode).getNode().getNode() and
|
||||
dict.getItem(_) = item and
|
||||
lbl = Label::member(item.getKey().(PY::StrConst).getS()) and
|
||||
rhs.asExpr() = item.getValue()
|
||||
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getValue()
|
||||
)
|
||||
or
|
||||
exists(PY::CallableExpr fn | fn = pred.asExpr() |
|
||||
exists(PY::CallableExpr fn | fn = pred.(DataFlow::ExprNode).getNode().getNode() |
|
||||
not fn.getInnerScope().isAsync() and
|
||||
lbl = Label::return() and
|
||||
exists(PY::Return ret |
|
||||
rhs.asExpr() = ret.getValue() and
|
||||
rhs.(DataFlow::ExprNode).getNode().getNode() = ret.getValue() and
|
||||
ret.getScope() = fn.getInnerScope()
|
||||
)
|
||||
)
|
||||
@@ -716,9 +755,9 @@ module API {
|
||||
// "benign" and let subclasses edges flow through anyway.
|
||||
// see example in https://github.com/django/django/blob/c2250cfb80e27cdf8d098428824da2800a18cadf/tests/auth_tests/test_views.py#L40-L46
|
||||
(
|
||||
ref.asExpr() = clsExpr
|
||||
ref.(DataFlow::ExprNode).getNode().getNode() = clsExpr
|
||||
or
|
||||
ref.asExpr() = clsExpr.getADecoratorCall()
|
||||
ref.(DataFlow::ExprNode).getNode().getNode() = clsExpr.getADecoratorCall()
|
||||
)
|
||||
)
|
||||
or
|
||||
@@ -731,7 +770,7 @@ module API {
|
||||
)
|
||||
or
|
||||
exists(DataFlow::Node def, PY::CallableExpr fn |
|
||||
rhs(base, def) and fn = trackDefNode(def).asExpr()
|
||||
rhs(base, def) and fn = trackDefNode(def).(DataFlow::ExprNode).getNode().getNode()
|
||||
|
|
||||
exists(int i, int offset |
|
||||
if exists(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf())
|
||||
@@ -739,18 +778,19 @@ module API {
|
||||
else offset = 0
|
||||
|
|
||||
lbl = Label::parameter(i - offset) and
|
||||
ref.asExpr() = fn.getInnerScope().getArg(i)
|
||||
ref.(DataFlow::ExprNode).getNode().getNode() = fn.getInnerScope().getArg(i)
|
||||
)
|
||||
or
|
||||
exists(string name, PY::Parameter param |
|
||||
lbl = Label::keywordParameter(name) and
|
||||
param = fn.getInnerScope().getArgByName(name) and
|
||||
not param.isSelf() and
|
||||
ref.asExpr() = param
|
||||
ref.(DataFlow::ExprNode).getNode().getNode() = param
|
||||
)
|
||||
or
|
||||
lbl = Label::selfParameter() and
|
||||
ref.asExpr() = any(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf())
|
||||
ref.(DataFlow::ExprNode).getNode().getNode() =
|
||||
any(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf())
|
||||
)
|
||||
or
|
||||
// Built-ins, treated as members of the module `builtins`
|
||||
@@ -762,7 +802,7 @@ module API {
|
||||
base = potential_import_star_base(s) and
|
||||
lbl =
|
||||
Label::member(any(string name |
|
||||
ImportStar::namePossiblyDefinedInImportStar(ref.asCfgNode(), name, s)
|
||||
ImportStar::namePossiblyDefinedInImportStar(ref.(DataFlow::CfgNode).getNode(), name, s)
|
||||
))
|
||||
)
|
||||
or
|
||||
@@ -854,7 +894,7 @@ module API {
|
||||
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
|
||||
Stages::TypeTracking::ref() and
|
||||
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
|
||||
not result instanceof DataFlow::ModuleVariableNode
|
||||
result instanceof DataFlow::ExprNode
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1044,7 +1084,7 @@ module API {
|
||||
ApiLabel memberFromRef(DataFlow::AttrRef ref) {
|
||||
result = member(ref.getAttributeName())
|
||||
or
|
||||
not exists(ref.getAttributeName()) and
|
||||
ref.unknownAttribute() and
|
||||
result = unknownMember()
|
||||
}
|
||||
|
||||
|
||||
107
python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll
Normal file
107
python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll
Normal file
@@ -0,0 +1,107 @@
|
||||
/** Provides classes and predicates for defining flow summaries. */
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.frameworks.data.ModelsAsData
|
||||
private import semmle.python.ApiGraphs
|
||||
private import internal.FlowSummaryImpl as Impl
|
||||
private import internal.DataFlowUtil
|
||||
private import internal.DataFlowPrivate
|
||||
|
||||
// import all instances below
|
||||
private module Summaries {
|
||||
private import semmle.python.Frameworks
|
||||
}
|
||||
|
||||
class SummaryComponent = Impl::Public::SummaryComponent;
|
||||
|
||||
/** Provides predicates for constructing summary components. */
|
||||
module SummaryComponent {
|
||||
private import Impl::Public::SummaryComponent as SC
|
||||
|
||||
predicate parameter = SC::parameter/1;
|
||||
|
||||
predicate argument = SC::argument/1;
|
||||
|
||||
predicate content = SC::content/1;
|
||||
|
||||
/** Gets a summary component that represents a list element. */
|
||||
SummaryComponent listElement() { result = content(any(ListElementContent c)) }
|
||||
|
||||
/** Gets a summary component that represents the return value of a call. */
|
||||
SummaryComponent return() { result = SC::return(any(ReturnKind rk)) }
|
||||
}
|
||||
|
||||
class SummaryComponentStack = Impl::Public::SummaryComponentStack;
|
||||
|
||||
/** Provides predicates for constructing stacks of summary components. */
|
||||
module SummaryComponentStack {
|
||||
private import Impl::Public::SummaryComponentStack as SCS
|
||||
|
||||
predicate singleton = SCS::singleton/1;
|
||||
|
||||
predicate push = SCS::push/2;
|
||||
|
||||
predicate argument = SCS::argument/1;
|
||||
|
||||
/** Gets a singleton stack representing the return value of a call. */
|
||||
SummaryComponentStack return() { result = singleton(SummaryComponent::return()) }
|
||||
}
|
||||
|
||||
/** A callable with a flow summary, identified by a unique string. */
|
||||
abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable {
|
||||
bindingset[this]
|
||||
SummarizedCallable() { any() }
|
||||
|
||||
/**
|
||||
* Same as
|
||||
*
|
||||
* ```ql
|
||||
* propagatesFlow(
|
||||
* SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
|
||||
* )
|
||||
* ```
|
||||
*
|
||||
* but uses an external (string) representation of the input and output stacks.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate propagatesFlowExt(string input, string output, boolean preservesValue) { none() }
|
||||
}
|
||||
|
||||
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;
|
||||
// // This gives access to getNodeFromPath, which is not constrained to `CallNode`s
|
||||
// // as `resolvedSummaryBase` is.
|
||||
// private import semmle.python.frameworks.data.internal.ApiGraphModels as AGM
|
||||
//
|
||||
// private class SummarizedCallableFromModel extends SummarizedCallable {
|
||||
// string package;
|
||||
// string type;
|
||||
// string path;
|
||||
// SummarizedCallableFromModel() {
|
||||
// ModelOutput::relevantSummaryModel(package, type, path, _, _, _) and
|
||||
// this = package + ";" + type + ";" + path
|
||||
// }
|
||||
// override CallCfgNode getACall() {
|
||||
// exists(API::CallNode base |
|
||||
// ModelOutput::resolvedSummaryBase(package, type, path, base) and
|
||||
// result = base.getACall()
|
||||
// )
|
||||
// }
|
||||
// override ArgumentNode getACallback() {
|
||||
// exists(API::Node base |
|
||||
// base = AGM::getNodeFromPath(package, type, path) and
|
||||
// result = base.getAValueReachableFromSource()
|
||||
// )
|
||||
// }
|
||||
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
// exists(string kind |
|
||||
// ModelOutput::relevantSummaryModel(package, type, path, input, output, kind)
|
||||
// |
|
||||
// kind = "value" and
|
||||
// preservesValue = true
|
||||
// or
|
||||
// kind = "taint" and
|
||||
// preservesValue = false
|
||||
// )
|
||||
// }
|
||||
// }
|
||||
@@ -40,7 +40,7 @@ abstract class AttrRef extends Node {
|
||||
or
|
||||
exists(LocalSourceNode nodeFrom |
|
||||
nodeFrom.flowsTo(this.getAttributeNameExpr()) and
|
||||
attrName = nodeFrom.asExpr().(StrConst).getText()
|
||||
attrName = nodeFrom.(CfgNode).getNode().getNode().(StrConst).getText()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -50,6 +50,9 @@ abstract class AttrRef extends Node {
|
||||
* better results.
|
||||
*/
|
||||
abstract string getAttributeName();
|
||||
|
||||
/** Holds if a name could not be determined for this attribute. */
|
||||
predicate unknownAttribute() { not exists(this.getAttributeName()) }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -175,7 +178,7 @@ private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode {
|
||||
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
|
||||
|
||||
override string getAttributeName() {
|
||||
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
|
||||
result = this.getAttributeNameExpr().(CfgNode).getNode().getNode().(StrConst).getText()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -251,7 +254,7 @@ private class GetAttrCallAsAttrRead extends AttrRead, CfgNode {
|
||||
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
|
||||
|
||||
override string getAttributeName() {
|
||||
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
|
||||
result = this.getAttributeNameExpr().(CfgNode).getNode().getNode().(StrConst).getText()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -60,9 +60,9 @@ module Builtins {
|
||||
* Currently this is an over-approximation, and may not account for things like overwriting a
|
||||
* built-in with a different value.
|
||||
*/
|
||||
DataFlow::Node likelyBuiltin(string name) {
|
||||
DataFlow::CfgNode likelyBuiltin(string name) {
|
||||
exists(Module m |
|
||||
result.asCfgNode() =
|
||||
result.getNode() =
|
||||
any(NameNode n |
|
||||
possible_builtin_accessed_in_module(n, name, m) and
|
||||
not possible_builtin_defined_in_module(name, m)
|
||||
|
||||
@@ -7,15 +7,22 @@
|
||||
private import python
|
||||
private import DataFlowPublic
|
||||
private import semmle.python.SpecialMethods
|
||||
private import FlowSummaryImpl as FlowSummaryImpl
|
||||
|
||||
/** A parameter position represented by an integer. */
|
||||
class ParameterPosition extends int {
|
||||
ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
|
||||
|
||||
/** Holds if this position represents a positional parameter at position `pos`. */
|
||||
predicate isPositional(int pos) { this = pos } // with the current representation, all parameters are positional
|
||||
}
|
||||
|
||||
/** An argument position represented by an integer. */
|
||||
class ArgumentPosition extends int {
|
||||
ArgumentPosition() { exists(any(DataFlowCall c).getArg(this)) }
|
||||
ArgumentPosition() { this in [-2, -1] or exists(any(Call c).getArg(this)) }
|
||||
|
||||
/** Holds if this position represents a positional argument at position `pos`. */
|
||||
predicate isPositional(int pos) { this = pos } // with the current representation, all arguments are positional
|
||||
}
|
||||
|
||||
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
|
||||
@@ -96,7 +103,7 @@ module ArgumentPassing {
|
||||
* Used to limit the size of predicates.
|
||||
*/
|
||||
predicate connects(CallNode call, CallableValue callable) {
|
||||
exists(DataFlowCall c |
|
||||
exists(NormalCall c |
|
||||
call = c.getNode() and
|
||||
callable = c.getCallable().getCallableValue()
|
||||
)
|
||||
@@ -268,6 +275,18 @@ module ArgumentPassing {
|
||||
|
||||
import ArgumentPassing
|
||||
|
||||
/** A callable defined in library code, identified by a unique string. */
|
||||
abstract class LibraryCallable extends string {
|
||||
bindingset[this]
|
||||
LibraryCallable() { any() }
|
||||
|
||||
/** Gets a call to this library callable. */
|
||||
abstract CallCfgNode getACall();
|
||||
|
||||
/** Gets a data-flow node, where this library callable is used as a call-back. */
|
||||
abstract ArgumentNode getACallback();
|
||||
}
|
||||
|
||||
/**
|
||||
* IPA type for DataFlowCallable.
|
||||
*
|
||||
@@ -282,27 +301,33 @@ newtype TDataFlowCallable =
|
||||
callable instanceof ClassValue
|
||||
} or
|
||||
TLambda(Function lambda) { lambda.isLambda() } or
|
||||
TModule(Module m)
|
||||
TModule(Module m) or
|
||||
TLibraryCallable(LibraryCallable callable)
|
||||
|
||||
/** A callable. */
|
||||
abstract class DataFlowCallable extends TDataFlowCallable {
|
||||
class DataFlowCallable extends TDataFlowCallable {
|
||||
/** Gets a textual representation of this element. */
|
||||
abstract string toString();
|
||||
string toString() { result = "DataFlowCallable" }
|
||||
|
||||
/** Gets a call to this callable. */
|
||||
abstract CallNode getACall();
|
||||
CallNode getACall() { none() }
|
||||
|
||||
/** Gets the scope of this callable */
|
||||
abstract Scope getScope();
|
||||
Scope getScope() { none() }
|
||||
|
||||
/** Gets the specified parameter of this callable */
|
||||
abstract NameNode getParameter(int n);
|
||||
NameNode getParameter(int n) { none() }
|
||||
|
||||
/** Gets the name of this callable. */
|
||||
abstract string getName();
|
||||
string getName() { none() }
|
||||
|
||||
/** Gets a callable value for this callable, if one exists. */
|
||||
abstract CallableValue getCallableValue();
|
||||
/** Gets a callable value for this callable, if any. */
|
||||
CallableValue getCallableValue() { none() }
|
||||
|
||||
/** Gets the underlying library callable, if any. */
|
||||
LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
|
||||
|
||||
Location getLocation() { none() }
|
||||
}
|
||||
|
||||
/** A class representing a callable value. */
|
||||
@@ -343,6 +368,8 @@ class DataFlowLambda extends DataFlowCallable, TLambda {
|
||||
override FunctionValue getCallableValue() {
|
||||
result.getOrigin().getNode() = lambda.getDefinition()
|
||||
}
|
||||
|
||||
Expr getDefinition() { result = lambda.getDefinition() }
|
||||
}
|
||||
|
||||
/** A class representing the scope in which a `ModuleVariableNode` appears. */
|
||||
@@ -364,6 +391,27 @@ class DataFlowModuleScope extends DataFlowCallable, TModule {
|
||||
override CallableValue getCallableValue() { none() }
|
||||
}
|
||||
|
||||
class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
|
||||
LibraryCallable callable;
|
||||
|
||||
LibraryCallableValue() { this = TLibraryCallable(callable) }
|
||||
|
||||
override string toString() { result = callable.toString() }
|
||||
|
||||
override CallNode getACall() { result = callable.getACall().getNode() }
|
||||
|
||||
/** Gets a data-flow node, where this library callable is used as a call-back. */
|
||||
ArgumentNode getACallback() { result = callable.getACallback() }
|
||||
|
||||
override Scope getScope() { none() }
|
||||
|
||||
override NameNode getParameter(int n) { none() }
|
||||
|
||||
override string getName() { result = callable }
|
||||
|
||||
override LibraryCallable asLibraryCallable() { result = callable }
|
||||
}
|
||||
|
||||
/**
|
||||
* IPA type for DataFlowCall.
|
||||
*
|
||||
@@ -379,90 +427,140 @@ class DataFlowModuleScope extends DataFlowCallable, TModule {
|
||||
* TODO: Add `TClassMethodCall` mapping `cls` appropriately.
|
||||
*/
|
||||
newtype TDataFlowCall =
|
||||
TFunctionCall(CallNode call) { call = any(FunctionValue f).getAFunctionCall() } or
|
||||
/** Bound methods need to make room for the explicit self parameter */
|
||||
TMethodCall(CallNode call) { call = any(FunctionValue f).getAMethodCall() } or
|
||||
TClassCall(CallNode call) { call = any(ClassValue c | not c.isAbsent()).getACall() } or
|
||||
TSpecialCall(SpecialMethodCallNode special)
|
||||
/**
|
||||
* Includes function calls, method calls, class calls and library calls.
|
||||
* All these will be associated with a `CallNode`.
|
||||
*/
|
||||
TNormalCall(CallNode call) or
|
||||
/**
|
||||
* Includes calls to special methods.
|
||||
* These will be associated with a `SpecialMethodCallNode`.
|
||||
*/
|
||||
TSpecialCall(SpecialMethodCallNode special) or
|
||||
/** A synthesized call inside a summarized callable */
|
||||
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
|
||||
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
|
||||
}
|
||||
|
||||
/** A call. */
|
||||
/** A call found in the program source (as opposed to a synthesised summary call). */
|
||||
class TExtractedDataFlowCall = TSpecialCall or TNormalCall;
|
||||
|
||||
/** A call that is taken into account by the global data flow computation. */
|
||||
abstract class DataFlowCall extends TDataFlowCall {
|
||||
/** Gets a textual representation of this element. */
|
||||
abstract string toString();
|
||||
|
||||
/** Get the callable to which this call goes. */
|
||||
/** Get the callable to which this call goes, if such exists. */
|
||||
abstract DataFlowCallable getCallable();
|
||||
|
||||
/**
|
||||
* Gets the argument to this call that will be sent
|
||||
* to the `n`th parameter of the callable.
|
||||
* to the `n`th parameter of the callable, if any.
|
||||
*/
|
||||
abstract Node getArg(int n);
|
||||
|
||||
/** Get the control flow node representing this call. */
|
||||
/** Get the control flow node representing this call, if any. */
|
||||
abstract ControlFlowNode getNode();
|
||||
|
||||
/** Gets the enclosing callable of this call. */
|
||||
abstract DataFlowCallable getEnclosingCallable();
|
||||
|
||||
/** Gets the location of this dataflow call. */
|
||||
Location getLocation() { result = this.getNode().getLocation() }
|
||||
abstract Location getLocation();
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
}
|
||||
|
||||
/** A call found in the program source (as opposed to a synthesised call). */
|
||||
abstract class ExtractedDataFlowCall extends DataFlowCall, TExtractedDataFlowCall {
|
||||
final override Location getLocation() { result = this.getNode().getLocation() }
|
||||
|
||||
abstract override DataFlowCallable getCallable();
|
||||
|
||||
abstract override Node getArg(int n);
|
||||
|
||||
abstract override ControlFlowNode getNode();
|
||||
}
|
||||
|
||||
/** A call associated with a `CallNode`. */
|
||||
class NormalCall extends ExtractedDataFlowCall, TNormalCall {
|
||||
CallNode call;
|
||||
|
||||
NormalCall() { this = TNormalCall(call) }
|
||||
|
||||
override string toString() { result = call.toString() }
|
||||
|
||||
abstract override Node getArg(int n);
|
||||
|
||||
override CallNode getNode() { result = call }
|
||||
|
||||
abstract override DataFlowCallable getCallable();
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to a function/lambda.
|
||||
* A call to a function.
|
||||
* This excludes calls to bound methods, classes, and special methods.
|
||||
* Bound method calls and class calls insert an argument for the explicit
|
||||
* `self` parameter, and special method calls have special argument passing.
|
||||
*/
|
||||
class FunctionCall extends DataFlowCall, TFunctionCall {
|
||||
CallNode call;
|
||||
DataFlowCallable callable;
|
||||
class FunctionCall extends NormalCall {
|
||||
DataFlowCallableValue callable;
|
||||
|
||||
FunctionCall() {
|
||||
this = TFunctionCall(call) and
|
||||
call = any(FunctionValue f).getAFunctionCall() and
|
||||
call = callable.getACall()
|
||||
}
|
||||
|
||||
override string toString() { result = call.toString() }
|
||||
override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
|
||||
|
||||
override DataFlowCallable getCallable() { result = callable }
|
||||
}
|
||||
|
||||
/** A call to a lambda. */
|
||||
class LambdaCall extends NormalCall {
|
||||
DataFlowLambda callable;
|
||||
|
||||
LambdaCall() {
|
||||
call = callable.getACall() and
|
||||
callable = TLambda(any(Function f))
|
||||
}
|
||||
|
||||
override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
|
||||
|
||||
override ControlFlowNode getNode() { result = call }
|
||||
|
||||
override DataFlowCallable getCallable() { result = callable }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a call to a bound method call.
|
||||
* The node representing the instance is inserted as argument to the `self` parameter.
|
||||
*/
|
||||
class MethodCall extends DataFlowCall, TMethodCall {
|
||||
CallNode call;
|
||||
class MethodCall extends NormalCall {
|
||||
FunctionValue bm;
|
||||
|
||||
MethodCall() {
|
||||
this = TMethodCall(call) and
|
||||
call = bm.getACall()
|
||||
}
|
||||
MethodCall() { call = bm.getAMethodCall() }
|
||||
|
||||
private CallableValue getCallableValue() { result = bm }
|
||||
|
||||
override string toString() { result = call.toString() }
|
||||
|
||||
override Node getArg(int n) {
|
||||
n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
|
||||
or
|
||||
n = 0 and result = TCfgNode(call.getFunction().(AttrNode).getObject())
|
||||
}
|
||||
|
||||
override ControlFlowNode getNode() { result = call }
|
||||
|
||||
override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -471,34 +569,27 @@ class MethodCall extends DataFlowCall, TMethodCall {
|
||||
* That makes the call node be the post-update node holding the value of the object
|
||||
* after the constructor has run.
|
||||
*/
|
||||
class ClassCall extends DataFlowCall, TClassCall {
|
||||
CallNode call;
|
||||
class ClassCall extends NormalCall {
|
||||
ClassValue c;
|
||||
|
||||
ClassCall() {
|
||||
this = TClassCall(call) and
|
||||
not c.isAbsent() and
|
||||
call = c.getACall()
|
||||
}
|
||||
|
||||
private CallableValue getCallableValue() { c.getScope().getInitMethod() = result.getScope() }
|
||||
|
||||
override string toString() { result = call.toString() }
|
||||
|
||||
override Node getArg(int n) {
|
||||
n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
|
||||
or
|
||||
n = 0 and result = TSyntheticPreUpdateNode(TCfgNode(call))
|
||||
}
|
||||
|
||||
override ControlFlowNode getNode() { result = call }
|
||||
|
||||
override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
|
||||
}
|
||||
|
||||
/** A call to a special method. */
|
||||
class SpecialCall extends DataFlowCall, TSpecialCall {
|
||||
class SpecialCall extends ExtractedDataFlowCall, TSpecialCall {
|
||||
SpecialMethodCallNode special;
|
||||
|
||||
SpecialCall() { this = TSpecialCall(special) }
|
||||
@@ -518,8 +609,172 @@ class SpecialCall extends DataFlowCall, TSpecialCall {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to a summarized callable, a `LibraryCallable`.
|
||||
*
|
||||
* We currently exclude all resolved calls. This means that a call to, say, `map`, which
|
||||
* is a `ClassCall`, cannot currently be given a summary.
|
||||
* We hope to lift this restriction in the future and include all potential calls to summaries
|
||||
* in this class.
|
||||
*/
|
||||
class LibraryCall extends NormalCall {
|
||||
LibraryCall() {
|
||||
// TODO: share this with `resolvedCall`
|
||||
not (
|
||||
call = any(DataFlowCallableValue cv).getACall()
|
||||
or
|
||||
call = any(DataFlowLambda l).getACall()
|
||||
or
|
||||
// TODO: this should be covered by `DataFlowCallableValue`, but a `ClassValue` is not a `CallableValue`.
|
||||
call = any(ClassValue c).getACall()
|
||||
)
|
||||
}
|
||||
|
||||
// TODO: Implement Python calling convention?
|
||||
override Node getArg(int n) { result = TCfgNode(call.getArg(n)) }
|
||||
|
||||
// We cannot refer to a `LibraryCallable` here,
|
||||
// as that could in turn refer to type tracking.
|
||||
// This call will be tied to a `LibraryCallable` via
|
||||
// `getViableCallabe` when the global data flow is assembled.
|
||||
override DataFlowCallable getCallable() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthesized call inside a callable with a flow summary.
|
||||
*
|
||||
* For example, in
|
||||
* ```python
|
||||
* map(lambda x: x + 1, [1, 2, 3])
|
||||
* ```
|
||||
*
|
||||
* there is a synthesized call to the lambda argument inside `map`.
|
||||
*/
|
||||
class SummaryCall extends DataFlowCall, TSummaryCall {
|
||||
private FlowSummaryImpl::Public::SummarizedCallable c;
|
||||
private Node receiver;
|
||||
|
||||
SummaryCall() { this = TSummaryCall(c, receiver) }
|
||||
|
||||
/** Gets the data flow node that this call targets. */
|
||||
Node getReceiver() { result = receiver }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
|
||||
|
||||
override DataFlowCallable getCallable() { none() }
|
||||
|
||||
override Node getArg(int n) { none() }
|
||||
|
||||
override ControlFlowNode getNode() { none() }
|
||||
|
||||
override string toString() { result = "[summary] call to " + receiver + " in " + c }
|
||||
|
||||
override Location getLocation() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The value of a parameter at function entry, viewed as a node in a data
|
||||
* flow graph.
|
||||
*/
|
||||
abstract class ParameterNodeImpl extends Node {
|
||||
abstract Parameter getParameter();
|
||||
|
||||
/**
|
||||
* Holds if this node is the parameter of callable `c` at the
|
||||
* (zero-based) index `i`.
|
||||
*/
|
||||
abstract predicate isParameterOf(DataFlowCallable c, int i);
|
||||
}
|
||||
|
||||
/** A parameter for a library callable with a flow summary. */
|
||||
class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
|
||||
private FlowSummaryImpl::Public::SummarizedCallable sc;
|
||||
private int pos;
|
||||
|
||||
SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
|
||||
|
||||
override Parameter getParameter() { none() }
|
||||
|
||||
override predicate isParameterOf(DataFlowCallable c, int i) {
|
||||
sc = c.asLibraryCallable() and i = pos
|
||||
}
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = sc }
|
||||
|
||||
override string toString() { result = "parameter " + pos + " of " + sc }
|
||||
|
||||
// Hack to return "empty location"
|
||||
override predicate hasLocationInfo(
|
||||
string file, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
file = "" and
|
||||
startline = 0 and
|
||||
startcolumn = 0 and
|
||||
endline = 0 and
|
||||
endcolumn = 0
|
||||
}
|
||||
}
|
||||
|
||||
/** A data-flow node used to model flow summaries. */
|
||||
class SummaryNode extends Node, TSummaryNode {
|
||||
private FlowSummaryImpl::Public::SummarizedCallable c;
|
||||
private FlowSummaryImpl::Private::SummaryNodeState state;
|
||||
|
||||
SummaryNode() { this = TSummaryNode(c, state) }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
|
||||
|
||||
override string toString() { result = "[summary] " + state + " in " + c }
|
||||
|
||||
// Hack to return "empty location"
|
||||
override predicate hasLocationInfo(
|
||||
string file, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
file = "" and
|
||||
startline = 0 and
|
||||
startcolumn = 0 and
|
||||
endline = 0 and
|
||||
endcolumn = 0
|
||||
}
|
||||
}
|
||||
|
||||
private class SummaryReturnNode extends SummaryNode, ReturnNode {
|
||||
private ReturnKind rk;
|
||||
|
||||
SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
|
||||
|
||||
override ReturnKind getKind() { result = rk }
|
||||
}
|
||||
|
||||
private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
|
||||
SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
|
||||
|
||||
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
|
||||
FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
|
||||
}
|
||||
}
|
||||
|
||||
private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
|
||||
private Node pre;
|
||||
|
||||
SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
|
||||
|
||||
override Node getPreUpdateNode() { result = pre }
|
||||
}
|
||||
|
||||
/** Gets a viable run-time target for the call `call`. */
|
||||
DataFlowCallable viableCallable(DataFlowCall call) { result = call.getCallable() }
|
||||
DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
|
||||
result = call.getCallable()
|
||||
or
|
||||
// A call to a library callable with a flow summary
|
||||
// In this situation we can not resolve the callable from the call,
|
||||
// as that would make data flow depend on type tracking.
|
||||
// Instead we reolve the call from the summary.
|
||||
exists(LibraryCallable callable |
|
||||
result = TLibraryCallable(callable) and
|
||||
call.getNode() = callable.getACall().getNode()
|
||||
)
|
||||
}
|
||||
|
||||
private newtype TReturnKind = TNormalReturnKind()
|
||||
|
||||
@@ -533,26 +788,51 @@ class ReturnKind extends TReturnKind {
|
||||
}
|
||||
|
||||
/** A data flow node that represents a value returned by a callable. */
|
||||
class ReturnNode extends CfgNode {
|
||||
Return ret;
|
||||
|
||||
// See `TaintTrackingImplementation::returnFlowStep`
|
||||
ReturnNode() { node = ret.getValue().getAFlowNode() }
|
||||
|
||||
abstract class ReturnNode extends Node {
|
||||
/** Gets the kind of this return node. */
|
||||
ReturnKind getKind() { any() }
|
||||
}
|
||||
|
||||
/** A data flow node that represents the output of a call. */
|
||||
class OutNode extends CfgNode {
|
||||
OutNode() { node instanceof CallNode }
|
||||
/** A data flow node that represents a value returned by a callable. */
|
||||
class ExtractedReturnNode extends ReturnNode, CfgNode {
|
||||
// See `TaintTrackingImplementation::returnFlowStep`
|
||||
ExtractedReturnNode() { node = any(Return ret).getValue().getAFlowNode() }
|
||||
|
||||
override ReturnKind getKind() { any() }
|
||||
}
|
||||
|
||||
/** A data-flow node that represents the output of a call. */
|
||||
abstract class OutNode extends Node {
|
||||
/** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
|
||||
abstract DataFlowCall getCall(ReturnKind kind);
|
||||
}
|
||||
|
||||
private module OutNodes {
|
||||
/**
|
||||
* A data-flow node that reads a value returned directly by a callable.
|
||||
*/
|
||||
class ExprOutNode extends OutNode, ExprNode {
|
||||
private DataFlowCall call;
|
||||
|
||||
ExprOutNode() { call.(ExtractedDataFlowCall).getNode() = this.getNode() }
|
||||
|
||||
override DataFlowCall getCall(ReturnKind kind) {
|
||||
result = call and
|
||||
kind = kind
|
||||
}
|
||||
}
|
||||
|
||||
private class SummaryOutNode extends SummaryNode, OutNode {
|
||||
SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
|
||||
|
||||
override DataFlowCall getCall(ReturnKind kind) {
|
||||
FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that can read the value returned from `call` with return kind
|
||||
* `kind`.
|
||||
*/
|
||||
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
|
||||
call.getNode() = result.getNode() and
|
||||
kind = TNormalReturnKind()
|
||||
}
|
||||
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
|
||||
|
||||
@@ -2,6 +2,7 @@ private import python
|
||||
private import DataFlowPublic
|
||||
private import semmle.python.essa.SsaCompute
|
||||
private import semmle.python.dataflow.new.internal.ImportResolution
|
||||
private import FlowSummaryImpl as FlowSummaryImpl
|
||||
// Since we allow extra data-flow steps from modeled frameworks, we import these
|
||||
// up-front, to ensure these are included. This provides a more seamless experience from
|
||||
// a user point of view, since they don't need to know they need to import a specific
|
||||
@@ -21,7 +22,7 @@ import DataFlowDispatchPointsTo
|
||||
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
|
||||
|
||||
/** Holds if `p` is a `ParameterNode` of `c` with position `pos`. */
|
||||
predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) {
|
||||
predicate isParameterNode(ParameterNodeImpl p, DataFlowCallable c, ParameterPosition pos) {
|
||||
p.isParameterOf(c, pos)
|
||||
}
|
||||
|
||||
@@ -77,7 +78,7 @@ module SyntheticPreUpdateNode {
|
||||
* that is mapped to the `self` parameter. That way, constructor calls represent the value of the
|
||||
* object after the constructor (currently only `__init__`) has run.
|
||||
*/
|
||||
CfgNode objectCreationNode() { result.getNode().(CallNode) = any(ClassCall c).getNode() }
|
||||
CfgNode objectCreationNode() { result.getNode() = any(ClassCall c).getNode() }
|
||||
}
|
||||
|
||||
import SyntheticPreUpdateNode
|
||||
@@ -87,6 +88,8 @@ deprecated module syntheticPostUpdateNode = SyntheticPostUpdateNode;
|
||||
|
||||
/** A module collecting the different reasons for synthesising a post-update node. */
|
||||
module SyntheticPostUpdateNode {
|
||||
private import semmle.python.SpecialMethods
|
||||
|
||||
/** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
|
||||
class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
|
||||
NeedsSyntheticPostUpdateNode pre;
|
||||
@@ -136,6 +139,8 @@ module SyntheticPostUpdateNode {
|
||||
Node argumentPreUpdateNode() {
|
||||
result = any(FunctionCall c).getArg(_)
|
||||
or
|
||||
result = any(LambdaCall c).getArg(_)
|
||||
or
|
||||
// Avoid argument 0 of method calls as those have read post-update nodes.
|
||||
exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
|
||||
or
|
||||
@@ -145,11 +150,18 @@ module SyntheticPostUpdateNode {
|
||||
exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
|
||||
or
|
||||
// any argument of any call that we have not been able to resolve
|
||||
exists(CallNode call | not call = any(DataFlowCall c).getNode() |
|
||||
exists(CallNode call | not resolvedCall(call) |
|
||||
result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `call` can be resolved as a normal call */
|
||||
private predicate resolvedCall(CallNode call) {
|
||||
call = any(DataFlowCallableValue cv).getACall()
|
||||
or
|
||||
call = any(DataFlowLambda l).getACall()
|
||||
}
|
||||
|
||||
/** Gets the pre-update node associated with a store. This is used for when an object might have its value changed after a store. */
|
||||
CfgNode storePreUpdateNode() {
|
||||
exists(Attribute a |
|
||||
@@ -287,10 +299,22 @@ module EssaFlow {
|
||||
* This is the local flow predicate that is used as a building block in global
|
||||
* data flow.
|
||||
*
|
||||
* It includes flow steps from flow summaries.
|
||||
*/
|
||||
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
|
||||
or
|
||||
summaryFlowSteps(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the local flow predicate that is used as a building block in
|
||||
* type tracking, it does _not_ include steps from flow summaries.
|
||||
*
|
||||
* Local flow can happen either at import time, when the module is initialised
|
||||
* or at runtime when callables in the module are called.
|
||||
*/
|
||||
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) {
|
||||
// If there is local flow out of a node `node`, we want flow
|
||||
// both out of `node` and any post-update node of `node`.
|
||||
exists(Node node |
|
||||
@@ -326,6 +350,34 @@ predicate runtimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
predicate summaryFlowSteps(Node nodeFrom, Node nodeTo) {
|
||||
// If there is local flow out of a node `node`, we want flow
|
||||
// both out of `node` and any post-update node of `node`.
|
||||
exists(Node node |
|
||||
nodeFrom = update(node) and
|
||||
(
|
||||
importTimeSummaryFlowStep(node, nodeTo) or
|
||||
runtimeSummaryFlowStep(node, nodeTo)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
predicate importTimeSummaryFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// As a proxy for whether statements can be executed at import time,
|
||||
// we check if they appear at the top level.
|
||||
// This will miss statements inside functions called from the top level.
|
||||
isTopLevel(nodeFrom) and
|
||||
isTopLevel(nodeTo) and
|
||||
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, true)
|
||||
}
|
||||
|
||||
predicate runtimeSummaryFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// Anything not at the top level can be executed at runtime.
|
||||
not isTopLevel(nodeFrom) and
|
||||
not isTopLevel(nodeTo) and
|
||||
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, true)
|
||||
}
|
||||
|
||||
/** `ModuleVariable`s are accessed via jump steps at runtime. */
|
||||
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
|
||||
// Module variable read
|
||||
@@ -474,6 +526,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
matchStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom, c, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -667,6 +721,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
attributeReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
kwUnpackReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
|
||||
}
|
||||
|
||||
/** Data flows from a sequence to a subscript of the sequence. */
|
||||
@@ -791,6 +847,8 @@ predicate clearsContent(Node n, Content c) {
|
||||
matchClearStep(n, c)
|
||||
or
|
||||
attributeClearStep(n, c)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -842,15 +900,38 @@ int accessPathLimit() { result = 5 }
|
||||
predicate forceHighPrecision(Content c) { none() }
|
||||
|
||||
/** Holds if `n` should be hidden from path explanations. */
|
||||
predicate nodeIsHidden(Node n) { none() }
|
||||
predicate nodeIsHidden(Node n) {
|
||||
n instanceof SummaryNode
|
||||
or
|
||||
n instanceof SummaryParameterNode
|
||||
}
|
||||
|
||||
class LambdaCallKind = Unit;
|
||||
|
||||
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
|
||||
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() }
|
||||
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
|
||||
// lambda
|
||||
kind = kind and
|
||||
creation.asExpr() = c.(DataFlowLambda).getDefinition()
|
||||
or
|
||||
// normal function
|
||||
exists(FunctionDef def |
|
||||
def.defines(creation.asVar().getSourceVariable()) and
|
||||
def.getDefinedFunction() = c.(DataFlowCallableValue).getCallableValue().getScope()
|
||||
)
|
||||
or
|
||||
// summarized function
|
||||
exists(Call call |
|
||||
creation.asExpr() = call.getAnArg() and
|
||||
creation = c.(LibraryCallableValue).getACallback()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
|
||||
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }
|
||||
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) {
|
||||
receiver = call.(SummaryCall).getReceiver() and
|
||||
exists(kind)
|
||||
}
|
||||
|
||||
/** Extra data-flow steps needed for lambda flow analysis. */
|
||||
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
|
||||
@@ -862,4 +943,6 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves
|
||||
* One example would be to allow flow like `p.foo = p.bar;`, which is disallowed
|
||||
* by default as a heuristic.
|
||||
*/
|
||||
predicate allowParameterReturnInSelf(ParameterNode p) { none() }
|
||||
predicate allowParameterReturnInSelf(ParameterNode p) {
|
||||
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p)
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import Attributes
|
||||
import LocalSources
|
||||
private import semmle.python.essa.SsaCompute
|
||||
private import semmle.python.dataflow.new.internal.ImportStar
|
||||
private import FlowSummaryImpl as FlowSummaryImpl
|
||||
|
||||
/**
|
||||
* IPA type for data flow nodes.
|
||||
@@ -100,7 +101,17 @@ newtype TNode =
|
||||
//
|
||||
// So for now we live with having these synthetic ORM nodes for _all_ classes, which
|
||||
// is a bit wasteful, but we don't think it will hurt too much.
|
||||
TSyntheticOrmModelNode(Class cls)
|
||||
TSyntheticOrmModelNode(Class cls) or
|
||||
TSummaryNode(
|
||||
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNodeState state
|
||||
) {
|
||||
FlowSummaryImpl::Private::summaryNodeRange(c, state)
|
||||
} or
|
||||
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
|
||||
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
|
||||
}
|
||||
|
||||
class TParameterNode = TCfgNode or TSummaryParameterNode;
|
||||
|
||||
/** Helper for `Node::getEnclosingCallable`. */
|
||||
private DataFlowCallable getCallableScope(Scope s) {
|
||||
@@ -277,40 +288,56 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
|
||||
* The value of a parameter at function entry, viewed as a node in a data
|
||||
* flow graph.
|
||||
*/
|
||||
class ParameterNode extends CfgNode, LocalSourceNode {
|
||||
class ParameterNode extends Node, TParameterNode instanceof ParameterNodeImpl {
|
||||
/** Gets the parameter corresponding to this node, if any. */
|
||||
final Parameter getParameter() { result = super.getParameter() }
|
||||
}
|
||||
|
||||
/** A parameter node found in the source code (not in a summary). */
|
||||
class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
|
||||
//, LocalSourceNode {
|
||||
ParameterDefinition def;
|
||||
|
||||
ParameterNode() {
|
||||
ExtractedParameterNode() {
|
||||
node = def.getDefiningNode() and
|
||||
// Disregard parameters that we cannot resolve
|
||||
// TODO: Make this unnecessary
|
||||
exists(DataFlowCallable c | node = c.getParameter(_))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this node is the parameter of callable `c` at the
|
||||
* (zero-based) index `i`.
|
||||
*/
|
||||
predicate isParameterOf(DataFlowCallable c, int i) { node = c.getParameter(i) }
|
||||
override predicate isParameterOf(DataFlowCallable c, int i) { node = c.getParameter(i) }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
|
||||
|
||||
/** Gets the `Parameter` this `ParameterNode` represents. */
|
||||
Parameter getParameter() { result = def.getParameter() }
|
||||
override Parameter getParameter() { result = def.getParameter() }
|
||||
}
|
||||
|
||||
class LocalSourceParameterNode extends ExtractedParameterNode, LocalSourceNode { }
|
||||
|
||||
/** Gets a node corresponding to parameter `p`. */
|
||||
ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
|
||||
ExtractedParameterNode parameterNode(Parameter p) { result.getParameter() = p }
|
||||
|
||||
/** A data flow node that represents a call argument. */
|
||||
class ArgumentNode extends Node {
|
||||
ArgumentNode() { this = any(DataFlowCall c).getArg(_) }
|
||||
|
||||
abstract class ArgumentNode extends Node {
|
||||
/** Holds if this argument occurs at the given position in the given call. */
|
||||
predicate argumentOf(DataFlowCall call, int pos) { this = call.getArg(pos) }
|
||||
abstract predicate argumentOf(DataFlowCall call, ArgumentPosition pos);
|
||||
|
||||
/** Gets the call in which this node is an argument. */
|
||||
final DataFlowCall getCall() { this.argumentOf(result, _) }
|
||||
/** Gets the call in which this node is an argument, if any. */
|
||||
final ExtractedDataFlowCall getCall() { this.argumentOf(result, _) }
|
||||
}
|
||||
|
||||
/** A data flow node that represents a call argument found in the source code. */
|
||||
class ExtractedArgumentNode extends ArgumentNode {
|
||||
ExtractedArgumentNode() { this = any(ExtractedDataFlowCall c).getArg(_) }
|
||||
|
||||
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
|
||||
this.extractedArgumentOf(call, pos)
|
||||
}
|
||||
|
||||
predicate extractedArgumentOf(ExtractedDataFlowCall call, ArgumentPosition pos) {
|
||||
this = call.getArg(pos)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -390,7 +417,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
|
||||
|
||||
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
|
||||
EssaNode getAWrite() {
|
||||
result.asVar().getDefinition().(EssaNodeDefinition).definedBy(var, any(DefinitionNode defn))
|
||||
result.getVar().getDefinition().(EssaNodeDefinition).definedBy(var, any(DefinitionNode defn))
|
||||
}
|
||||
|
||||
/** Gets the possible values of the variable at the end of import time */
|
||||
|
||||
@@ -5,12 +5,20 @@
|
||||
private import python
|
||||
private import DataFlowPrivate
|
||||
import DataFlowPublic
|
||||
private import FlowSummaryImpl as FlowSummaryImpl
|
||||
|
||||
/**
|
||||
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
|
||||
* (intra-procedural) step.
|
||||
*/
|
||||
predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) }
|
||||
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo)
|
||||
or
|
||||
// Simple flow through library code is included in the exposed local
|
||||
// step relation, even though flow is technically inter-procedural.
|
||||
// This is a convention followed across languages.
|
||||
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(nodeFrom, nodeTo, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if data flows from `source` to `sink` in zero or more local
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,225 @@
|
||||
/**
|
||||
* Provides Python specific classes and predicates for defining flow summaries.
|
||||
*
|
||||
* Flow summaries are defined for callables that are not extracted.
|
||||
* Such callables go by different names in different parts of our codebase:
|
||||
*
|
||||
* - in `FlowSummary.qll`, which is user facing, they are called `SummarizedCallable`s.
|
||||
* These contain summaries, implemented by the user via the predicates `propagatesFlow` and `propagatesFlowExt`.
|
||||
*
|
||||
* - in the data flow layer, they are called `LibraryCallable`s (as in the Ruby codebase).
|
||||
* These are identified by strings and has predicates for finding calls to them.
|
||||
*
|
||||
* Having both extracted and non-extracted callables means that we now have three types of calls:
|
||||
* - Extracted calls to extracted callables, either `NormalCall` or `SpecialCall`. These are handled by standard data flow.
|
||||
* - Extracted calls to non-extracted callables, `LibraryCall`. These are handled by loking up the relevant summary when the
|
||||
* global data flwo graph is connected up via `getViableCallable`.
|
||||
* - Non-extracted calls, `SummaryCall`. These are synthesised by the flow summary framework.
|
||||
*
|
||||
* The first two can be referred to as `ExtractedDataFlowCall`. In fact, `LibraryCall` is a subclass of `NormalCall`, where
|
||||
* `getCallable` is set to `none()`. The member predicate `ExtractedDataFlowCall::getCallable` is _not_ the mechanism for
|
||||
* call resolution in global data flow. That mechanism is `getViableCallable`.
|
||||
* Resolving a call to a non-extracted callable goes via `LibraryCallable::getACall`, which may involve type tracking.
|
||||
* To avoid that type tracking becomes mutualy recursive with data flow, type tracking must use a call graph not including summaries.
|
||||
* Type tracking sees the callgraph given by `ExtractedDataFlowCall::getACallable`.
|
||||
*
|
||||
* We do not support summaries of special methods via the special methods framework,
|
||||
* the summary would have to identify the call.
|
||||
*
|
||||
* We might, while we still extract the standard library, want to support flow summaries of
|
||||
* extracted callables, so that we can model part of the standard library with flow summaries.
|
||||
* For this to work, we have be careful with the enclosing callable predicate.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import DataFlowPrivate
|
||||
private import DataFlowPublic
|
||||
private import DataFlowImplCommon
|
||||
private import FlowSummaryImpl::Private
|
||||
private import FlowSummaryImpl::Public
|
||||
private import semmle.python.dataflow.new.FlowSummary as FlowSummary
|
||||
|
||||
class SummarizedCallableBase = string;
|
||||
|
||||
/** View a `SummarizedCallable` as a `DataFlowCallable`. */
|
||||
DataFlowCallable inject(SummarizedCallable c) { result.asLibraryCallable() = c }
|
||||
|
||||
/** Gets the parameter position of the instance parameter. */
|
||||
ArgumentPosition instanceParameterPosition() { none() } // disables implicit summary flow to `this` for callbacks
|
||||
|
||||
/** Gets the synthesized summary data-flow node for the given values. */
|
||||
Node summaryNode(SummarizedCallable c, SummaryNodeState state) { result = TSummaryNode(c, state) }
|
||||
|
||||
/** Gets the synthesized data-flow call for `receiver`. */
|
||||
SummaryCall summaryDataFlowCall(Node receiver) { receiver = result.getReceiver() }
|
||||
|
||||
/** Gets the type of content `c`. */
|
||||
DataFlowType getContentType(Content c) { any() }
|
||||
|
||||
/** Gets the return type of kind `rk` for callable `c`. */
|
||||
bindingset[c, rk]
|
||||
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
|
||||
|
||||
/**
|
||||
* Gets the type of the `i`th parameter in a synthesized call that targets a
|
||||
* callback of type `t`.
|
||||
*/
|
||||
bindingset[t, i]
|
||||
DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
|
||||
|
||||
/**
|
||||
* Gets the return type of kind `rk` in a synthesized call that targets a
|
||||
* callback of type `t`.
|
||||
*/
|
||||
DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() }
|
||||
|
||||
/**
|
||||
* Holds if an external flow summary exists for `c` with input specification
|
||||
* `input`, output specification `output`, kind `kind`, and a flag `generated`
|
||||
* stating whether the summary is autogenerated.
|
||||
*/
|
||||
predicate summaryElement(
|
||||
FlowSummary::SummarizedCallable c, string input, string output, string kind, boolean generated
|
||||
) {
|
||||
exists(boolean preservesValue |
|
||||
c.propagatesFlowExt(input, output, preservesValue) and
|
||||
(if preservesValue = true then kind = "value" else kind = "taint") and
|
||||
generated = false
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a negative flow summary exists for `c`, which means that there is no
|
||||
* flow through `c`. The flag `generated` states whether the summary is autogenerated.
|
||||
* Note. Negative flow summaries has not been implemented for Python.
|
||||
*/
|
||||
predicate negativeSummaryElement(FlowSummary::SummarizedCallable c, boolean generated) { none() }
|
||||
|
||||
/**
|
||||
* Gets the summary component for specification component `c`, if any.
|
||||
*
|
||||
* This covers all the Python-specific components of a flow summary.
|
||||
*/
|
||||
SummaryComponent interpretComponentSpecific(AccessPathToken c) {
|
||||
c = "ListElement" and
|
||||
result = FlowSummary::SummaryComponent::listElement()
|
||||
}
|
||||
|
||||
/** Gets the textual representation of a summary component in the format used for flow summaries. */
|
||||
string getComponentSpecificCsv(SummaryComponent sc) {
|
||||
sc = TContentSummaryComponent(any(ListElementContent c)) and
|
||||
result = "ListElement"
|
||||
}
|
||||
|
||||
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
|
||||
string getParameterPositionCsv(ParameterPosition pos) { result = pos.toString() }
|
||||
|
||||
/** Gets the textual representation of an argument position in the format used for flow summaries. */
|
||||
string getArgumentPositionCsv(ArgumentPosition pos) { result = pos.toString() }
|
||||
|
||||
/** Holds if input specification component `c` needs a reference. */
|
||||
predicate inputNeedsReferenceSpecific(string c) { none() }
|
||||
|
||||
/** Holds if output specification component `c` needs a reference. */
|
||||
predicate outputNeedsReferenceSpecific(string c) { none() }
|
||||
|
||||
/** Gets the return kind corresponding to specification `"ReturnValue"`. */
|
||||
ReturnKind getReturnValueKind() { any() }
|
||||
|
||||
/**
|
||||
* All definitions in this module are required by the shared implementation
|
||||
* (for source/sink interpretation), but they are unused for Python, where
|
||||
* we rely on API graphs instead.
|
||||
*/
|
||||
private module UnusedSourceSinkInterpretation {
|
||||
/**
|
||||
* Holds if an external source specification exists for `n` with output specification
|
||||
* `output`, kind `kind`, and a flag `generated` stating whether the source specification is
|
||||
* autogenerated.
|
||||
*/
|
||||
predicate sourceElement(AstNode n, string output, string kind, boolean generated) { none() }
|
||||
|
||||
/**
|
||||
* Holds if an external sink specification exists for `n` with input specification
|
||||
* `input`, kind `kind` and a flag `generated` stating whether the sink specification is
|
||||
* autogenerated.
|
||||
*/
|
||||
predicate sinkElement(AstNode n, string input, string kind, boolean generated) { none() }
|
||||
|
||||
class SourceOrSinkElement = AstNode;
|
||||
|
||||
/** An entity used to interpret a source/sink specification. */
|
||||
class InterpretNode extends AstNode_ {
|
||||
// InterpretNode is going away, this is just a dummy implementation.
|
||||
// However, we have some old location tests picking them up, so we
|
||||
// explicitly define them to not exist.
|
||||
InterpretNode() { none() }
|
||||
|
||||
/** Gets the element that this node corresponds to, if any. */
|
||||
SourceOrSinkElement asElement() { none() }
|
||||
|
||||
/** Gets the data-flow node that this node corresponds to, if any. */
|
||||
Node asNode() { none() }
|
||||
|
||||
/** Gets the call that this node corresponds to, if any. */
|
||||
DataFlowCall asCall() { none() }
|
||||
|
||||
/** Gets the callable that this node corresponds to, if any. */
|
||||
DataFlowCallable asCallable() { none() }
|
||||
|
||||
/** Gets the target of this call, if any. */
|
||||
SourceOrSinkElement getCallTarget() { none() }
|
||||
}
|
||||
|
||||
/** Provides additional sink specification logic. */
|
||||
predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
|
||||
|
||||
/** Provides additional source specification logic. */
|
||||
predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
|
||||
}
|
||||
|
||||
import UnusedSourceSinkInterpretation
|
||||
|
||||
module ParsePositions {
|
||||
private import FlowSummaryImpl
|
||||
|
||||
private predicate isParamBody(string body) {
|
||||
exists(AccessPathToken tok |
|
||||
tok.getName() = "Parameter" and
|
||||
body = tok.getAnArgument()
|
||||
)
|
||||
}
|
||||
|
||||
private predicate isArgBody(string body) {
|
||||
exists(AccessPathToken tok |
|
||||
tok.getName() = "Argument" and
|
||||
body = tok.getAnArgument()
|
||||
)
|
||||
}
|
||||
|
||||
predicate isParsedParameterPosition(string c, int i) {
|
||||
isParamBody(c) and
|
||||
i = AccessPath::parseInt(c)
|
||||
}
|
||||
|
||||
predicate isParsedArgumentPosition(string c, int i) {
|
||||
isArgBody(c) and
|
||||
i = AccessPath::parseInt(c)
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
|
||||
ArgumentPosition parseParamBody(string s) {
|
||||
exists(int i |
|
||||
ParsePositions::isParsedParameterPosition(s, i) and
|
||||
result.isPositional(i)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
|
||||
ParameterPosition parseArgBody(string s) {
|
||||
exists(int i |
|
||||
ParsePositions::isParsedArgumentPosition(s, i) and
|
||||
result.isPositional(i)
|
||||
)
|
||||
}
|
||||
@@ -244,7 +244,7 @@ class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instan
|
||||
*/
|
||||
predicate iterableUnpackingAssignmentFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(AssignmentTarget target |
|
||||
nodeFrom.asExpr() = target.getValue() and
|
||||
nodeFrom.(CfgNode).getNode().getNode() = target.getValue() and
|
||||
nodeTo = TIterableSequenceNode(target)
|
||||
)
|
||||
}
|
||||
@@ -255,7 +255,7 @@ predicate iterableUnpackingAssignmentFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
*/
|
||||
predicate iterableUnpackingForReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
|
||||
exists(ForTarget target |
|
||||
nodeFrom.asExpr() = target.getSource() and
|
||||
nodeFrom.getNode().getNode() = target.getSource() and
|
||||
target instanceof SequenceNode and
|
||||
nodeTo = TIterableSequenceNode(target)
|
||||
) and
|
||||
@@ -273,7 +273,7 @@ predicate iterableUnpackingForReadStep(CfgNode nodeFrom, Content c, Node nodeTo)
|
||||
predicate iterableUnpackingTupleFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(UnpackingAssignmentSequenceTarget target |
|
||||
nodeFrom = TIterableSequenceNode(target) and
|
||||
nodeTo.asCfgNode() = target
|
||||
nodeTo.(CfgNode).getNode() = target
|
||||
)
|
||||
}
|
||||
|
||||
@@ -305,7 +305,7 @@ predicate iterableUnpackingConvertingReadStep(Node nodeFrom, Content c, Node nod
|
||||
predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
exists(UnpackingAssignmentSequenceTarget target |
|
||||
nodeFrom = TIterableElementNode(target) and
|
||||
nodeTo.asCfgNode() = target and
|
||||
nodeTo.(CfgNode).getNode() = target and
|
||||
exists(int index | exists(target.getElement(index)) |
|
||||
c.(TupleElementContent).getIndex() = index
|
||||
)
|
||||
@@ -331,7 +331,7 @@ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo
|
||||
not exists(target.getAnElement().(StarredNode)) and
|
||||
starIndex = -1
|
||||
|
|
||||
nodeFrom.asCfgNode() = target and
|
||||
nodeFrom.(CfgNode).getNode() = target and
|
||||
element = target.getElement(index) and
|
||||
(
|
||||
if starIndex = -1 or index < starIndex
|
||||
|
||||
@@ -36,7 +36,7 @@ class LocalSourceNode extends Node {
|
||||
LocalSourceNode() {
|
||||
Stages::DataFlow::ref() and
|
||||
this instanceof ExprNode and
|
||||
not simpleLocalFlowStep(_, this)
|
||||
not simpleLocalFlowStepForTypetracking(_, this)
|
||||
or
|
||||
// We include all module variable nodes, as these act as stepping stones between writes and
|
||||
// reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be
|
||||
|
||||
@@ -69,8 +69,8 @@ predicate matchSubjectFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
subject = match.getSubject() and
|
||||
target = match.getCase(_).(Case).getPattern()
|
||||
|
|
||||
nodeFrom.asExpr() = subject and
|
||||
nodeTo.asCfgNode().getNode() = target
|
||||
nodeFrom.(CfgNode).getNode().getNode() = subject and
|
||||
nodeTo.(CfgNode).getNode().getNode() = target
|
||||
)
|
||||
}
|
||||
|
||||
@@ -84,12 +84,13 @@ predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// That way, information can propagate from the interior pattern to the alias.
|
||||
//
|
||||
// the subject flows to the interior pattern
|
||||
nodeFrom.asCfgNode().getNode() = subject and
|
||||
nodeTo.asCfgNode().getNode() = subject.getPattern()
|
||||
nodeFrom.(CfgNode).getNode().getNode() = subject and
|
||||
nodeTo.(CfgNode).getNode().getNode() = subject.getPattern()
|
||||
or
|
||||
// the interior pattern flows to the alias
|
||||
nodeFrom.asCfgNode().getNode() = subject.getPattern() and
|
||||
nodeTo.asVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() = alias
|
||||
nodeFrom.(CfgNode).getNode().getNode() = subject.getPattern() and
|
||||
nodeTo.(EssaNode).getVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() =
|
||||
alias
|
||||
)
|
||||
}
|
||||
|
||||
@@ -99,8 +100,8 @@ predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
*/
|
||||
predicate matchOrFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(MatchOrPattern subject, Pattern pattern | pattern = subject.getAPattern() |
|
||||
nodeFrom.asCfgNode().getNode() = subject and
|
||||
nodeTo.asCfgNode().getNode() = pattern
|
||||
nodeFrom.(CfgNode).getNode().getNode() = subject and
|
||||
nodeTo.(CfgNode).getNode().getNode() = pattern
|
||||
)
|
||||
}
|
||||
|
||||
@@ -110,8 +111,8 @@ predicate matchOrFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
*/
|
||||
predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(MatchLiteralPattern pattern, Expr literal | literal = pattern.getLiteral() |
|
||||
nodeFrom.asExpr() = literal and
|
||||
nodeTo.asCfgNode().getNode() = pattern
|
||||
nodeFrom.(CfgNode).getNode().getNode() = literal and
|
||||
nodeTo.(CfgNode).getNode().getNode() = pattern
|
||||
)
|
||||
}
|
||||
|
||||
@@ -121,8 +122,14 @@ predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
*/
|
||||
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
|
||||
nodeFrom.asCfgNode().getNode() = capture and
|
||||
nodeTo.asVar().getDefinition().(PatternCaptureDefinition).getDefiningNode().getNode() = var
|
||||
nodeFrom.(CfgNode).getNode().getNode() = capture and
|
||||
nodeTo
|
||||
.(EssaNode)
|
||||
.getVar()
|
||||
.getDefinition()
|
||||
.(PatternCaptureDefinition)
|
||||
.getDefiningNode()
|
||||
.getNode() = var
|
||||
)
|
||||
}
|
||||
|
||||
@@ -132,8 +139,8 @@ predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
*/
|
||||
predicate matchValueFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(MatchValuePattern pattern, Expr value | value = pattern.getValue() |
|
||||
nodeFrom.asExpr() = value and
|
||||
nodeTo.asCfgNode().getNode() = pattern
|
||||
nodeFrom.(CfgNode).getNode().getNode() = value and
|
||||
nodeTo.(CfgNode).getNode().getNode() = pattern
|
||||
)
|
||||
}
|
||||
|
||||
@@ -145,8 +152,8 @@ predicate matchSequenceReadStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
exists(MatchSequencePattern subject, int index, Pattern element |
|
||||
element = subject.getPattern(index)
|
||||
|
|
||||
nodeFrom.asCfgNode().getNode() = subject and
|
||||
nodeTo.asCfgNode().getNode() = element and
|
||||
nodeFrom.(CfgNode).getNode().getNode() = subject and
|
||||
nodeTo.(CfgNode).getNode().getNode() = element and
|
||||
(
|
||||
// tuple content
|
||||
c.(TupleElementContent).getIndex() = index
|
||||
@@ -173,7 +180,7 @@ predicate matchStarReadStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
exists(MatchSequencePattern subject, int index, MatchStarPattern star |
|
||||
star = subject.getPattern(index)
|
||||
|
|
||||
nodeFrom.asCfgNode().getNode() = subject and
|
||||
nodeFrom.(CfgNode).getNode().getNode() = subject and
|
||||
nodeTo = TStarPatternElementNode(star) and
|
||||
(
|
||||
// tuple content
|
||||
@@ -200,7 +207,7 @@ predicate matchStarReadStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
predicate matchStarStoreStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
exists(MatchStarPattern star |
|
||||
nodeFrom = TStarPatternElementNode(star) and
|
||||
nodeTo.asCfgNode().getNode() = star.getTarget() and
|
||||
nodeTo.(CfgNode).getNode().getNode() = star.getTarget() and
|
||||
c instanceof ListElementContent
|
||||
)
|
||||
}
|
||||
@@ -218,8 +225,8 @@ predicate matchMappingReadStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
key = keyValue.getKey() and
|
||||
value = keyValue.getValue()
|
||||
|
|
||||
nodeFrom.asCfgNode().getNode() = subject and
|
||||
nodeTo.asCfgNode().getNode() = value and
|
||||
nodeFrom.(CfgNode).getNode().getNode() = subject and
|
||||
nodeTo.(CfgNode).getNode().getNode() = value and
|
||||
c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
|
||||
)
|
||||
}
|
||||
@@ -233,8 +240,8 @@ predicate matchMappingReadStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
*/
|
||||
predicate matchMappingFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(MatchMappingPattern subject, MatchDoubleStarPattern dstar | dstar = subject.getAMapping() |
|
||||
nodeFrom.asCfgNode().getNode() = subject and
|
||||
nodeTo.asCfgNode().getNode() = dstar.getTarget()
|
||||
nodeFrom.(CfgNode).getNode().getNode() = subject and
|
||||
nodeTo.(CfgNode).getNode().getNode() = dstar.getTarget()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -251,7 +258,7 @@ predicate matchMappingClearStep(Node n, Content c) {
|
||||
key = keyValue.getKey() and
|
||||
dstar = subject.getAMapping()
|
||||
|
|
||||
n.asCfgNode().getNode() = dstar.getTarget() and
|
||||
n.(CfgNode).getNode().getNode() = dstar.getTarget() and
|
||||
c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
|
||||
)
|
||||
}
|
||||
@@ -266,8 +273,8 @@ predicate matchClassReadStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
attr = keyword.getAttribute() and
|
||||
value = keyword.getValue()
|
||||
|
|
||||
nodeFrom.asCfgNode().getNode() = subject and
|
||||
nodeTo.asCfgNode().getNode() = value and
|
||||
nodeFrom.(CfgNode).getNode().getNode() = subject and
|
||||
nodeTo.(CfgNode).getNode().getNode() = value and
|
||||
c.(AttributeContent).getAttribute() = attr.getId()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ class Node = DataFlowPublic::Node;
|
||||
|
||||
class TypeTrackingNode = DataFlowPublic::TypeTrackingNode;
|
||||
|
||||
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;
|
||||
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStepForTypetracking/2;
|
||||
|
||||
predicate jumpStep = DataFlowPrivate::jumpStepSharedWithTypeTracker/2;
|
||||
|
||||
@@ -35,16 +35,22 @@ string getPossibleContentName() {
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private DataFlowPrivate::DataFlowCallable getCallableForArgument(
|
||||
DataFlowPublic::ArgumentNode nodeFrom, int i
|
||||
DataFlowPublic::ExtractedArgumentNode nodeFrom, int i
|
||||
) {
|
||||
exists(DataFlowPrivate::DataFlowCall call |
|
||||
nodeFrom.argumentOf(call, i) and
|
||||
exists(DataFlowPrivate::ExtractedDataFlowCall call |
|
||||
nodeFrom.extractedArgumentOf(call, i) and
|
||||
result = call.getCallable()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
|
||||
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
|
||||
/**
|
||||
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
|
||||
*
|
||||
* Flow into summarized library methods is not included, as that will lead to negative
|
||||
* recursion (or, at best, terrible performance), since identifying calls to library
|
||||
* methods is done using API graphs (which uses type tracking).
|
||||
*/
|
||||
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPrivate::ParameterNodeImpl nodeTo) {
|
||||
// TODO: Support special methods?
|
||||
exists(DataFlowPrivate::DataFlowCallable callable, int i |
|
||||
callable = getCallableForArgument(nodeFrom, i) and
|
||||
@@ -54,8 +60,9 @@ predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::Parame
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
|
||||
predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
|
||||
exists(DataFlowPrivate::DataFlowCall call |
|
||||
nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
|
||||
exists(DataFlowPrivate::ExtractedDataFlowCall call |
|
||||
nodeFrom.getEnclosingCallable() = call.getCallable() and
|
||||
nodeTo.(DataFlowPublic::CfgNode).getNode() = call.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.dataflow.new.FlowSummary
|
||||
private import semmle.python.frameworks.PEP249
|
||||
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
|
||||
private import semmle.python.frameworks.internal.SelfRefMixin
|
||||
@@ -3670,6 +3671,23 @@ private module StdlibPrivate {
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
|
||||
}
|
||||
|
||||
/** A flow summary for `reversed`. */
|
||||
class ReversedSummary extends SummarizedCallable {
|
||||
ReversedSummary() { this = "builtins.reversed" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() { result = API::builtin("reversed").getACall() }
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() {
|
||||
result = API::builtin("reversed").getAValueReachableFromSource()
|
||||
}
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[0].ListElement" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -26,14 +26,14 @@ import semmle.python.ApiGraphs::API as API
|
||||
class Unit = PY::Unit;
|
||||
|
||||
// Re-export libraries needed by ApiGraphModels.qll
|
||||
import semmle.python.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax
|
||||
import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax
|
||||
import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow
|
||||
private import AccessPathSyntax
|
||||
|
||||
/**
|
||||
* Holds if models describing `package` may be relevant for the analysis of this database.
|
||||
*/
|
||||
predicate isPackageUsed(string package) { exists(API::moduleImport(package)) }
|
||||
predicate isPackageUsed(string package) { API::moduleImportExists(package) }
|
||||
|
||||
/** Gets a Python-specific interpretation of the `(package, type, path)` tuple after resolving the first `n` access path tokens. */
|
||||
bindingset[package, type, path]
|
||||
|
||||
@@ -65,13 +65,17 @@ private class DefaultSafeExternalApi extends SafeExternalApi {
|
||||
|
||||
/** A node representing data being passed to an external API through a call. */
|
||||
class ExternalApiDataNode extends DataFlow::Node {
|
||||
DataFlowPrivate::DataFlowCall call;
|
||||
DataFlowPrivate::DataFlowCallable callable;
|
||||
int i;
|
||||
|
||||
ExternalApiDataNode() {
|
||||
exists(call.getLocation().getFile().getRelativePath()) and
|
||||
callable = call.getCallable() and
|
||||
exists(DataFlowPrivate::DataFlowCall call |
|
||||
exists(call.getLocation().getFile().getRelativePath())
|
||||
|
|
||||
callable = call.getCallable() and
|
||||
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
|
||||
this = call.getArg(i)
|
||||
) and
|
||||
not any(SafeExternalApi safe).getSafeCallable() = callable and
|
||||
exists(Value cv | cv = callable.getCallableValue() |
|
||||
cv.isAbsent()
|
||||
@@ -82,8 +86,6 @@ class ExternalApiDataNode extends DataFlow::Node {
|
||||
or
|
||||
not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
|
||||
) and
|
||||
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
|
||||
this = call.getArg(i) and
|
||||
// Not already modeled as a taint step
|
||||
not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and
|
||||
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.FlowTest
|
||||
import experimental.dataflow.testTaintConfig
|
||||
private import semmle.python.dataflow.new.internal.PrintNode
|
||||
|
||||
class DataFlowTest extends FlowTest {
|
||||
DataFlowTest() { this = "DataFlowTest" }
|
||||
|
||||
override string flowTag() { result = "flow" }
|
||||
|
||||
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
exists(TestConfiguration cfg | cfg.hasFlow(source, sink))
|
||||
}
|
||||
}
|
||||
|
||||
query predicate missingAnnotationOnSink(Location location, string error, string element) {
|
||||
error = "ERROR, you should add `# $ MISSING: flow` annotation" and
|
||||
exists(DataFlow::Node sink |
|
||||
exists(DataFlow::CallCfgNode call |
|
||||
// note: we only care about `SINK` and not `SINK_F`, so we have to reconstruct manually.
|
||||
call.getFunction().asCfgNode().(NameNode).getId() = "SINK" and
|
||||
(sink = call.getArg(_) or sink = call.getArgByName(_))
|
||||
) and
|
||||
location = sink.getLocation() and
|
||||
element = prettyExpr(sink.asExpr()) and
|
||||
not any(TestConfiguration config).hasFlow(_, sink) and
|
||||
not exists(FalseNegativeExpectation missingResult |
|
||||
missingResult.getTag() = "flow" and
|
||||
missingResult.getLocation().getFile() = location.getFile() and
|
||||
missingResult.getLocation().getStartLine() = location.getStartLine()
|
||||
)
|
||||
)
|
||||
}
|
||||
@@ -2,6 +2,7 @@ import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import TestUtilities.InlineExpectationsTest
|
||||
private import semmle.python.dataflow.new.internal.PrintNode
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
|
||||
|
||||
/**
|
||||
* A routing test is designed to test that values are routed to the
|
||||
|
||||
@@ -12,7 +12,13 @@ class UnresolvedCallExpectations extends InlineExpectationsTest {
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
exists(CallNode call |
|
||||
not exists(DataFlowPrivate::DataFlowCall dfc | dfc.getNode() = call) and
|
||||
not exists(DataFlowPrivate::DataFlowCall dfc | dfc.getNode() = call |
|
||||
// For every `CallNode`, there is a `DataFlowCall` in the form of a `NormalCall`.
|
||||
// It does not really count, as it has some abstract overrides. For instance, it does not
|
||||
// define `getCallable`, so checking for the existence of this guarantees that we are in a
|
||||
// properly resolved call.
|
||||
exists(dfc.getCallable())
|
||||
) and
|
||||
not call = API::builtin(_).getACall().asCfgNode() and
|
||||
location = call.getLocation() and
|
||||
tag = "unresolved_call" and
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
| file://:0:0:0:0 | parameter 0 of builtins.reversed |
|
||||
| test.py:1:19:1:19 | ControlFlowNode for x |
|
||||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
|
||||
| test.py:4:10:4:10 | ControlFlowNode for z |
|
||||
| test.py:7:19:7:19 | ControlFlowNode for a |
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | parameter 0 of builtins.reversed | file://:0:0:0:0 | parameter 0 of builtins.reversed |
|
||||
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | parameter 0 of builtins.reversed |
|
||||
| test.py:0:0:0:0 | GSSA Variable __name__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable __package__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable b |
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | parameter 0 of builtins.reversed |
|
||||
| test.py:0:0:0:0 | GSSA Variable __name__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable __package__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable b |
|
||||
|
||||
@@ -31,7 +31,7 @@ try:
|
||||
# `mypkg.foo` is a `missing module variable`, but `mypkg.subpkg.bar` is compeltely
|
||||
# ignored.
|
||||
import mypkg
|
||||
mypkg.foo(42)
|
||||
mypkg.subpkg.bar(43)
|
||||
mypkg.foo(42) # $ call=mypkg.foo(..) qlclass=NormalCall
|
||||
mypkg.subpkg.bar(43) # $ call=mypkg.subpkg.bar(..) qlclass=LibraryCall arg_0=43
|
||||
except:
|
||||
pass
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
| classes.py:14:17:14:60 | ControlFlowNode for Attribute() | classes.py:14:17:14:60 | ControlFlowNode for Attribute() |
|
||||
| classes.py:45:16:45:35 | ControlFlowNode for Attribute() | classes.py:45:16:45:35 | ControlFlowNode for Attribute() |
|
||||
| classes.py:60:17:60:27 | [pre objCreate] ControlFlowNode for With_init() | classes.py:54:18:54:21 | ControlFlowNode for self |
|
||||
| classes.py:242:9:242:24 | ControlFlowNode for set() | classes.py:242:9:242:24 | ControlFlowNode for set() |
|
||||
| classes.py:247:9:247:30 | ControlFlowNode for frozenset() | classes.py:247:9:247:30 | ControlFlowNode for frozenset() |
|
||||
| classes.py:252:9:252:28 | ControlFlowNode for dict() | classes.py:252:9:252:28 | ControlFlowNode for dict() |
|
||||
| classes.py:559:16:559:17 | ControlFlowNode for Str | classes.py:565:5:565:22 | ControlFlowNode for Subscript |
|
||||
| classes.py:565:5:565:16 | ControlFlowNode for with_getitem | classes.py:555:21:555:24 | ControlFlowNode for self |
|
||||
| classes.py:565:18:565:21 | ControlFlowNode for arg2 | classes.py:555:27:555:29 | ControlFlowNode for key |
|
||||
| classes.py:581:5:581:16 | ControlFlowNode for with_setitem | classes.py:570:21:570:24 | ControlFlowNode for self |
|
||||
@@ -11,29 +13,68 @@
|
||||
| classes.py:595:9:595:20 | ControlFlowNode for with_delitem | classes.py:586:21:586:24 | ControlFlowNode for self |
|
||||
| classes.py:595:22:595:25 | ControlFlowNode for arg2 | classes.py:586:27:586:29 | ControlFlowNode for key |
|
||||
| classes.py:618:16:618:28 | ControlFlowNode for Attribute() | classes.py:618:16:618:28 | ControlFlowNode for Attribute() |
|
||||
| classes.py:659:15:659:18 | ControlFlowNode for self | classes.py:667:5:667:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:661:16:661:19 | ControlFlowNode for self | classes.py:667:5:667:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:667:5:667:12 | ControlFlowNode for with_add | classes.py:657:17:657:20 | ControlFlowNode for self |
|
||||
| classes.py:667:5:667:12 | ControlFlowNode for with_add | classes.py:667:5:667:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:667:16:667:19 | ControlFlowNode for arg2 | classes.py:657:23:657:27 | ControlFlowNode for other |
|
||||
| classes.py:674:15:674:18 | ControlFlowNode for self | classes.py:682:5:682:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:676:16:676:19 | ControlFlowNode for self | classes.py:682:5:682:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:682:5:682:12 | ControlFlowNode for with_sub | classes.py:672:17:672:20 | ControlFlowNode for self |
|
||||
| classes.py:682:5:682:12 | ControlFlowNode for with_sub | classes.py:682:5:682:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:682:16:682:19 | ControlFlowNode for arg2 | classes.py:672:23:672:27 | ControlFlowNode for other |
|
||||
| classes.py:689:15:689:18 | ControlFlowNode for self | classes.py:697:5:697:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:691:16:691:19 | ControlFlowNode for self | classes.py:697:5:697:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:697:5:697:12 | ControlFlowNode for with_mul | classes.py:687:17:687:20 | ControlFlowNode for self |
|
||||
| classes.py:697:5:697:12 | ControlFlowNode for with_mul | classes.py:697:5:697:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:697:16:697:19 | ControlFlowNode for arg2 | classes.py:687:23:687:27 | ControlFlowNode for other |
|
||||
| classes.py:704:15:704:18 | ControlFlowNode for self | classes.py:712:5:712:22 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:706:16:706:19 | ControlFlowNode for self | classes.py:712:5:712:22 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:712:5:712:15 | ControlFlowNode for with_matmul | classes.py:702:20:702:23 | ControlFlowNode for self |
|
||||
| classes.py:712:5:712:15 | ControlFlowNode for with_matmul | classes.py:712:5:712:22 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:712:19:712:22 | ControlFlowNode for arg2 | classes.py:702:26:702:30 | ControlFlowNode for other |
|
||||
| classes.py:719:15:719:18 | ControlFlowNode for self | classes.py:727:5:727:23 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:721:16:721:19 | ControlFlowNode for self | classes.py:727:5:727:23 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:727:5:727:16 | ControlFlowNode for with_truediv | classes.py:717:21:717:24 | ControlFlowNode for self |
|
||||
| classes.py:727:5:727:16 | ControlFlowNode for with_truediv | classes.py:727:5:727:23 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:727:20:727:23 | ControlFlowNode for arg2 | classes.py:717:27:717:31 | ControlFlowNode for other |
|
||||
| classes.py:734:15:734:18 | ControlFlowNode for self | classes.py:742:5:742:25 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:736:16:736:19 | ControlFlowNode for self | classes.py:742:5:742:25 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:742:5:742:17 | ControlFlowNode for with_floordiv | classes.py:732:22:732:25 | ControlFlowNode for self |
|
||||
| classes.py:742:5:742:17 | ControlFlowNode for with_floordiv | classes.py:742:5:742:25 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:742:22:742:25 | ControlFlowNode for arg2 | classes.py:732:28:732:32 | ControlFlowNode for other |
|
||||
| classes.py:749:15:749:18 | ControlFlowNode for self | classes.py:757:5:757:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:751:16:751:19 | ControlFlowNode for self | classes.py:757:5:757:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:757:5:757:12 | ControlFlowNode for with_mod | classes.py:747:17:747:20 | ControlFlowNode for self |
|
||||
| classes.py:757:5:757:12 | ControlFlowNode for with_mod | classes.py:757:5:757:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:757:16:757:19 | ControlFlowNode for arg2 | classes.py:747:23:747:27 | ControlFlowNode for other |
|
||||
| classes.py:779:15:779:18 | ControlFlowNode for self | classes.py:793:5:793:20 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:781:16:781:19 | ControlFlowNode for self | classes.py:793:5:793:20 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:793:5:793:12 | ControlFlowNode for with_pow | classes.py:777:17:777:20 | ControlFlowNode for self |
|
||||
| classes.py:793:5:793:12 | ControlFlowNode for with_pow | classes.py:793:5:793:20 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:793:17:793:20 | ControlFlowNode for arg2 | classes.py:777:23:777:27 | ControlFlowNode for other |
|
||||
| classes.py:800:15:800:18 | ControlFlowNode for self | classes.py:808:5:808:23 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:802:16:802:19 | ControlFlowNode for self | classes.py:808:5:808:23 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:808:5:808:15 | ControlFlowNode for with_lshift | classes.py:798:20:798:23 | ControlFlowNode for self |
|
||||
| classes.py:808:5:808:15 | ControlFlowNode for with_lshift | classes.py:808:5:808:23 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:808:20:808:23 | ControlFlowNode for arg2 | classes.py:798:26:798:30 | ControlFlowNode for other |
|
||||
| classes.py:815:15:815:18 | ControlFlowNode for self | classes.py:823:5:823:23 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:817:16:817:19 | ControlFlowNode for self | classes.py:823:5:823:23 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:823:5:823:15 | ControlFlowNode for with_rshift | classes.py:813:20:813:23 | ControlFlowNode for self |
|
||||
| classes.py:823:5:823:15 | ControlFlowNode for with_rshift | classes.py:823:5:823:23 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:823:20:823:23 | ControlFlowNode for arg2 | classes.py:813:26:813:30 | ControlFlowNode for other |
|
||||
| classes.py:830:15:830:18 | ControlFlowNode for self | classes.py:838:5:838:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:832:16:832:19 | ControlFlowNode for self | classes.py:838:5:838:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:838:5:838:12 | ControlFlowNode for with_and | classes.py:828:17:828:20 | ControlFlowNode for self |
|
||||
| classes.py:838:5:838:12 | ControlFlowNode for with_and | classes.py:838:5:838:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:838:16:838:19 | ControlFlowNode for arg2 | classes.py:828:23:828:27 | ControlFlowNode for other |
|
||||
| classes.py:845:15:845:18 | ControlFlowNode for self | classes.py:853:5:853:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:847:16:847:19 | ControlFlowNode for self | classes.py:853:5:853:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:853:5:853:12 | ControlFlowNode for with_xor | classes.py:843:17:843:20 | ControlFlowNode for self |
|
||||
| classes.py:853:5:853:12 | ControlFlowNode for with_xor | classes.py:853:5:853:19 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:853:16:853:19 | ControlFlowNode for arg2 | classes.py:843:23:843:27 | ControlFlowNode for other |
|
||||
| classes.py:860:15:860:18 | ControlFlowNode for self | classes.py:868:5:868:18 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:862:16:862:19 | ControlFlowNode for self | classes.py:868:5:868:18 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:868:5:868:11 | ControlFlowNode for with_or | classes.py:858:16:858:19 | ControlFlowNode for self |
|
||||
| classes.py:868:5:868:11 | ControlFlowNode for with_or | classes.py:868:5:868:18 | ControlFlowNode for BinaryExpr |
|
||||
| classes.py:868:15:868:18 | ControlFlowNode for arg2 | classes.py:858:22:858:26 | ControlFlowNode for other |
|
||||
|
||||
@@ -20,7 +20,7 @@ class CallGraphConfig extends DataFlow::Configuration {
|
||||
node instanceof DataFlow::ParameterNode and
|
||||
// exclude parameters to the SINK-functions
|
||||
not exists(DataFlowPrivate::DataFlowCallable c |
|
||||
node.(DataFlow::ParameterNode).isParameterOf(c, _) and
|
||||
c.getParameter(_) = node.asCfgNode() and
|
||||
c.getName().matches("SINK_")
|
||||
)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
missingAnnotationOnSink
|
||||
failures
|
||||
@@ -0,0 +1,3 @@
|
||||
import python
|
||||
private import TestSummaries
|
||||
import experimental.dataflow.TestUtil.NormalTaintTrackingTest
|
||||
134
python/ql/test/experimental/dataflow/summaries/TestSummaries.qll
Normal file
134
python/ql/test/experimental/dataflow/summaries/TestSummaries.qll
Normal file
@@ -0,0 +1,134 @@
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.FlowSummary
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* This module ensures that the `callStep` predicate in
|
||||
* our type tracker implelemtation does not refer to the
|
||||
* `getACall` predicate on `SummarizedCallable`.
|
||||
*/
|
||||
module RecursionGuard {
|
||||
private import semmle.python.dataflow.new.internal.TypeTrackerSpecific as TT
|
||||
|
||||
private class RecursionGuard extends SummarizedCallable {
|
||||
RecursionGuard() { this = "RecursionGuard" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this and
|
||||
(TT::callStep(_, _) implies any())
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
}
|
||||
}
|
||||
|
||||
private class SummarizedCallableIdentity extends SummarizedCallable {
|
||||
SummarizedCallableIdentity() { this = "identity" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[0]" and
|
||||
output = "ReturnValue" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
// For lambda flow to work, implement lambdaCall and lambdaCreation
|
||||
private class SummarizedCallableApplyLambda extends SummarizedCallable {
|
||||
SummarizedCallableApplyLambda() { this = "apply_lambda" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[1]" and
|
||||
output = "Argument[0].Parameter[0]" and
|
||||
preservesValue = true
|
||||
or
|
||||
input = "Argument[0].ReturnValue" and
|
||||
output = "ReturnValue" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
private class SummarizedCallableReversed extends SummarizedCallable {
|
||||
SummarizedCallableReversed() { this = "reversed" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[0].ListElement" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
private class SummarizedCallableMap extends SummarizedCallable {
|
||||
SummarizedCallableMap() { this = "list_map" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[1].ListElement" and
|
||||
output = "Argument[0].Parameter[0]" and
|
||||
preservesValue = true
|
||||
or
|
||||
input = "Argument[0].ReturnValue" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
private class SummarizedCallableAppend extends SummarizedCallable {
|
||||
SummarizedCallableAppend() { this = "append_to_list" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[0]" and
|
||||
output = "ReturnValue" and
|
||||
preservesValue = false
|
||||
or
|
||||
input = "Argument[1]" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
private class SummarizedCallableJsonLoads extends SummarizedCallable {
|
||||
SummarizedCallableJsonLoads() { this = "json.loads" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result = API::moduleImport("json").getMember("loads").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() {
|
||||
result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource()
|
||||
}
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[0]" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
edges
|
||||
| summaries.py:32:11:32:26 | ControlFlowNode for identity() | summaries.py:33:6:33:12 | ControlFlowNode for tainted |
|
||||
| summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | summaries.py:32:11:32:26 | ControlFlowNode for identity() |
|
||||
| summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda |
|
||||
| summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() |
|
||||
| summaries.py:44:25:44:32 | ControlFlowNode for List | summaries.py:45:6:45:20 | ControlFlowNode for Subscript |
|
||||
| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:44:25:44:32 | ControlFlowNode for List |
|
||||
| summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] | summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] |
|
||||
| summaries.py:51:38:51:45 | ControlFlowNode for List [List element] | summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] |
|
||||
| summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | summaries.py:51:38:51:45 | ControlFlowNode for List [List element] |
|
||||
| summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] | summaries.py:52:6:52:22 | ControlFlowNode for Subscript |
|
||||
| summaries.py:57:27:57:63 | ControlFlowNode for list_map() [List element] | summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] |
|
||||
| summaries.py:57:55:57:62 | ControlFlowNode for List [List element] | summaries.py:57:27:57:63 | ControlFlowNode for list_map() [List element] |
|
||||
| summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | summaries.py:57:55:57:62 | ControlFlowNode for List [List element] |
|
||||
| summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] | summaries.py:58:6:58:31 | ControlFlowNode for Subscript |
|
||||
| summaries.py:60:26:60:53 | ControlFlowNode for list_map() [List element] | summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] |
|
||||
| summaries.py:60:45:60:52 | ControlFlowNode for List [List element] | summaries.py:60:26:60:53 | ControlFlowNode for list_map() [List element] |
|
||||
| summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | summaries.py:60:45:60:52 | ControlFlowNode for List [List element] |
|
||||
| summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] | summaries.py:61:6:61:30 | ControlFlowNode for Subscript |
|
||||
| summaries.py:63:16:63:41 | ControlFlowNode for append_to_list() [List element] | summaries.py:64:6:64:17 | ControlFlowNode for tainted_list [List element] |
|
||||
| summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | summaries.py:63:16:63:41 | ControlFlowNode for append_to_list() [List element] |
|
||||
| summaries.py:64:6:64:17 | ControlFlowNode for tainted_list [List element] | summaries.py:64:6:64:20 | ControlFlowNode for Subscript |
|
||||
| summaries.py:67:22:67:39 | ControlFlowNode for json_loads() [List element] | summaries.py:68:6:68:23 | ControlFlowNode for tainted_resultlist [List element] |
|
||||
| summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | summaries.py:67:22:67:39 | ControlFlowNode for json_loads() [List element] |
|
||||
| summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | summaries.py:68:6:68:26 | ControlFlowNode for Subscript |
|
||||
| summaries.py:68:6:68:23 | ControlFlowNode for tainted_resultlist [List element] | summaries.py:68:6:68:26 | ControlFlowNode for Subscript |
|
||||
nodes
|
||||
| summaries.py:32:11:32:26 | ControlFlowNode for identity() | semmle.label | ControlFlowNode for identity() |
|
||||
| summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
|
||||
| summaries.py:33:6:33:12 | ControlFlowNode for tainted | semmle.label | ControlFlowNode for tainted |
|
||||
| summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | semmle.label | ControlFlowNode for apply_lambda() |
|
||||
| summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
|
||||
| summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | semmle.label | ControlFlowNode for tainted_lambda |
|
||||
| summaries.py:44:25:44:32 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
|
||||
| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
|
||||
| summaries.py:45:6:45:20 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
|
||||
| summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] | semmle.label | ControlFlowNode for list_map() [List element] |
|
||||
| summaries.py:51:38:51:45 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
|
||||
| summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
|
||||
| summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] | semmle.label | ControlFlowNode for tainted_mapped [List element] |
|
||||
| summaries.py:52:6:52:22 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
|
||||
| summaries.py:57:27:57:63 | ControlFlowNode for list_map() [List element] | semmle.label | ControlFlowNode for list_map() [List element] |
|
||||
| summaries.py:57:55:57:62 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
|
||||
| summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
|
||||
| summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] | semmle.label | ControlFlowNode for tainted_mapped_explicit [List element] |
|
||||
| summaries.py:58:6:58:31 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
|
||||
| summaries.py:60:26:60:53 | ControlFlowNode for list_map() [List element] | semmle.label | ControlFlowNode for list_map() [List element] |
|
||||
| summaries.py:60:45:60:52 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
|
||||
| summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
|
||||
| summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] | semmle.label | ControlFlowNode for tainted_mapped_summary [List element] |
|
||||
| summaries.py:61:6:61:30 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
|
||||
| summaries.py:63:16:63:41 | ControlFlowNode for append_to_list() [List element] | semmle.label | ControlFlowNode for append_to_list() [List element] |
|
||||
| summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
|
||||
| summaries.py:64:6:64:17 | ControlFlowNode for tainted_list [List element] | semmle.label | ControlFlowNode for tainted_list [List element] |
|
||||
| summaries.py:64:6:64:20 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
|
||||
| summaries.py:67:22:67:39 | ControlFlowNode for json_loads() [List element] | semmle.label | ControlFlowNode for json_loads() [List element] |
|
||||
| summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
|
||||
| summaries.py:68:6:68:23 | ControlFlowNode for tainted_resultlist [List element] | semmle.label | ControlFlowNode for tainted_resultlist [List element] |
|
||||
| summaries.py:68:6:68:26 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
|
||||
subpaths
|
||||
invalidSpecComponent
|
||||
#select
|
||||
| summaries.py:33:6:33:12 | ControlFlowNode for tainted | summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | summaries.py:33:6:33:12 | ControlFlowNode for tainted | $@ | summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
|
||||
| summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | $@ | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
|
||||
| summaries.py:45:6:45:20 | ControlFlowNode for Subscript | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:45:6:45:20 | ControlFlowNode for Subscript | $@ | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
|
||||
| summaries.py:52:6:52:22 | ControlFlowNode for Subscript | summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | summaries.py:52:6:52:22 | ControlFlowNode for Subscript | $@ | summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
|
||||
| summaries.py:58:6:58:31 | ControlFlowNode for Subscript | summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | summaries.py:58:6:58:31 | ControlFlowNode for Subscript | $@ | summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
|
||||
| summaries.py:61:6:61:30 | ControlFlowNode for Subscript | summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | summaries.py:61:6:61:30 | ControlFlowNode for Subscript | $@ | summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
|
||||
| summaries.py:64:6:64:20 | ControlFlowNode for Subscript | summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | summaries.py:64:6:64:20 | ControlFlowNode for Subscript | $@ | summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
|
||||
| summaries.py:68:6:68:26 | ControlFlowNode for Subscript | summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | summaries.py:68:6:68:26 | ControlFlowNode for Subscript | $@ | summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
|
||||
68
python/ql/test/experimental/dataflow/summaries/summaries.py
Normal file
68
python/ql/test/experimental/dataflow/summaries/summaries.py
Normal file
@@ -0,0 +1,68 @@
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from testlib import expects
|
||||
|
||||
# These are defined so that we can evaluate the test code.
|
||||
NONSOURCE = "not a source"
|
||||
SOURCE = "source"
|
||||
|
||||
|
||||
def is_source(x):
|
||||
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
|
||||
|
||||
|
||||
def SINK(x):
|
||||
if is_source(x):
|
||||
print("OK")
|
||||
else:
|
||||
print("Unexpected flow", x)
|
||||
|
||||
|
||||
def SINK_F(x):
|
||||
if is_source(x):
|
||||
print("Unexpected flow", x)
|
||||
else:
|
||||
print("OK")
|
||||
|
||||
|
||||
# Simple summary
|
||||
tainted = identity(SOURCE)
|
||||
SINK(tainted) # $ flow="SOURCE, l:-1 -> tainted"
|
||||
|
||||
# Lambda summary
|
||||
tainted_lambda = apply_lambda(lambda x: x + 1, SOURCE)
|
||||
SINK(tainted_lambda) # $ flow="SOURCE, l:-1 -> tainted_lambda"
|
||||
|
||||
# A lambda that breaks the flow
|
||||
untainted_lambda = apply_lambda(lambda x: 1, SOURCE)
|
||||
SINK_F(untainted_lambda)
|
||||
|
||||
# Collection summaries
|
||||
tainted_list = reversed([SOURCE])
|
||||
SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]"
|
||||
|
||||
# Complex summaries
|
||||
def add_colon(x):
|
||||
return x + ":"
|
||||
|
||||
tainted_mapped = list_map(add_colon, [SOURCE])
|
||||
SINK(tainted_mapped[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped[0]"
|
||||
|
||||
def explicit_identity(x):
|
||||
return x
|
||||
|
||||
tainted_mapped_explicit = list_map(explicit_identity, [SOURCE])
|
||||
SINK(tainted_mapped_explicit[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]"
|
||||
|
||||
tainted_mapped_summary = list_map(identity, [SOURCE])
|
||||
SINK(tainted_mapped_summary[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_summary[0]"
|
||||
|
||||
tainted_list = append_to_list([], SOURCE)
|
||||
SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]"
|
||||
|
||||
from json import loads as json_loads
|
||||
tainted_resultlist = json_loads(SOURCE)
|
||||
SINK(tainted_resultlist[0]) # $ flow="SOURCE, l:-1 -> tainted_resultlist[0]"
|
||||
21
python/ql/test/experimental/dataflow/summaries/summaries.ql
Normal file
21
python/ql/test/experimental/dataflow/summaries/summaries.ql
Normal file
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* @kind path-problem
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.FlowSummary
|
||||
import DataFlow::PathGraph
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.internal.FlowSummaryImpl
|
||||
import semmle.python.ApiGraphs
|
||||
import experimental.dataflow.testTaintConfig
|
||||
private import TestSummaries
|
||||
|
||||
query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) {
|
||||
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and
|
||||
Private::External::invalidSpecComponent(s, c)
|
||||
}
|
||||
|
||||
from DataFlow::PathNode source, DataFlow::PathNode sink, TestConfiguration conf
|
||||
where conf.hasFlowPath(source, sink)
|
||||
select sink, source, sink, "$@", source, source.toString()
|
||||
51
python/ql/test/experimental/dataflow/testTaintConfig.qll
Normal file
51
python/ql/test/experimental/dataflow/testTaintConfig.qll
Normal file
@@ -0,0 +1,51 @@
|
||||
/**
|
||||
* Configuration to test selected data flow
|
||||
* Sources in the source code are denoted by the special name `SOURCE`,
|
||||
* and sinks are denoted by arguments to the special function `SINK`.
|
||||
* For example, given the test code
|
||||
* ```python
|
||||
* def test():
|
||||
* s = SOURCE
|
||||
* SINK(s)
|
||||
* ```
|
||||
* `SOURCE` will be a source and the second occurrence of `s` will be a sink.
|
||||
*
|
||||
* In order to test literals, alternative sources are defined for each type:
|
||||
*
|
||||
* for | use
|
||||
* ----------
|
||||
* string | `"source"`
|
||||
* integer | `42`
|
||||
* float | `42.0`
|
||||
* complex | `42j` (not supported yet)
|
||||
*/
|
||||
|
||||
private import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
|
||||
class TestConfiguration extends TaintTracking::Configuration {
|
||||
TestConfiguration() { this = "TestConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node node) {
|
||||
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE"
|
||||
or
|
||||
node.(DataFlow::CfgNode).getNode().getNode().(StrConst).getS() = "source"
|
||||
or
|
||||
node.(DataFlow::CfgNode).getNode().getNode().(IntegerLiteral).getN() = "42"
|
||||
or
|
||||
node.(DataFlow::CfgNode).getNode().getNode().(FloatLiteral).getN() = "42.0"
|
||||
// No support for complex numbers
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node node) {
|
||||
exists(CallNode call |
|
||||
call.getFunction().(NameNode).getId() in ["SINK", "SINK_F"] and
|
||||
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isSanitizerIn(DataFlow::Node node) { this.isSource(node) }
|
||||
|
||||
override int explorationLimit() { result = 5 }
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
import python
|
||||
import semmle.python.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax
|
||||
import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax
|
||||
import semmle.python.frameworks.data.ModelsAsData
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import python
|
||||
import semmle.python.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax
|
||||
import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax
|
||||
import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels
|
||||
import semmle.python.frameworks.data.ModelsAsData
|
||||
|
||||
|
||||
Reference in New Issue
Block a user