Merge pull request #11376 from RasmusWL/call-graph-code

Python: New type-tracking based call-graph
This commit is contained in:
Taus
2023-02-27 14:51:21 +01:00
committed by GitHub
208 changed files with 5068 additions and 2412 deletions

View File

@@ -0,0 +1,4 @@
---
category: majorAnalysis
---
* We use a new analysis for the call-graph (determining which function is called). This can lead to changed results. In most cases this is much more accurate than the old call-graph that was based on points-to, but we do lose a few valid edges in the call-graph, especially around methods that are not defined inside its' class.

View File

@@ -125,7 +125,7 @@ class ControlFlowNode extends @py_flow_node {
/** Gets a textual representation of this element. */
cached
string toString() {
Stages::DataFlow::ref() and
Stages::AST::ref() and
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
or
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
@@ -411,6 +411,12 @@ class CallNode extends ControlFlowNode {
result.getNode() = this.getNode().getStarArg() and
result.getBasicBlock().dominates(this.getBasicBlock())
}
/** Gets a dictionary (**) argument of this call, if any. */
ControlFlowNode getKwargs() {
result.getNode() = this.getNode().getKwargs() and
result.getBasicBlock().dominates(this.getBasicBlock())
}
}
/** A control flow corresponding to an attribute expression, such as `value.attr` */

File diff suppressed because it is too large Load Diff

View File

@@ -1,838 +0,0 @@
/**
* INTERNAL: Do not use.
*
* Points-to based call-graph.
*/
private import python
private import DataFlowPublic
private import semmle.python.SpecialMethods
private import FlowSummaryImpl as FlowSummaryImpl
/** A parameter position represented by an integer. */
class ParameterPosition extends int {
ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
/** Holds if this position represents a positional parameter at position `pos`. */
predicate isPositional(int pos) { this = pos } // with the current representation, all parameters are positional
}
/** An argument position represented by an integer. */
class ArgumentPosition extends int {
ArgumentPosition() { this in [-2, -1] or exists(any(Call c).getArg(this)) }
/** Holds if this position represents a positional argument at position `pos`. */
predicate isPositional(int pos) { this = pos } // with the current representation, all arguments are positional
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
pragma[inline]
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
/**
* Computes routing of arguments to parameters
*
* When a call contains more positional arguments than there are positional parameters,
* the extra positional arguments are passed as a tuple to a starred parameter. This is
* achieved by synthesizing a node `TPosOverflowNode(call, callable)`
* that represents the tuple of extra positional arguments. There is a store step from each
* extra positional argument to this node.
*
* CURRENTLY NOT SUPPORTED:
* When a call contains an iterable unpacking argument, such as `func(*args)`, it is expanded into positional arguments.
*
* CURRENTLY NOT SUPPORTED:
* If a call contains an iterable unpacking argument, such as `func(*args)`, and the callee contains a starred argument, any extra
* positional arguments are passed to the starred argument.
*
* When a call contains keyword arguments that do not correspond to keyword parameters, these
* extra keyword arguments are passed as a dictionary to a doubly starred parameter. This is
* achieved by synthesizing a node `TKwOverflowNode(call, callable)`
* that represents the dictionary of extra keyword arguments. There is a store step from each
* extra keyword argument to this node.
*
* When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, with entries corresponding to a keyword parameter,
* the value at such a key is unpacked and passed to the parameter. This is achieved
* by synthesizing an argument node `TKwUnpacked(call, callable, name)` representing the unpacked
* value. This node is used as the argument passed to the matching keyword parameter. There is a read
* step from the dictionary argument to the synthesized argument node.
*
* When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, and the callee contains a doubly starred parameter,
* entries which are not unpacked are passed to the doubly starred parameter. This is achieved by
* adding a dataflow step from the dictionary argument to `TKwOverflowNode(call, callable)` and a
* step to clear content of that node at any unpacked keys.
*
* ## Examples:
* Assume that we have the callable
* ```python
* def f(x, y, *t, **d):
* pass
* ```
* Then the call
* ```python
* f(0, 1, 2, a=3)
* ```
* will be modeled as
* ```python
* f(0, 1, [*t], [**d])
* ```
* where `[` and `]` denotes synthesized nodes, so `[*t]` is the synthesized tuple argument
* `TPosOverflowNode` and `[**d]` is the synthesized dictionary argument `TKwOverflowNode`.
* There will be a store step from `2` to `[*t]` at pos `0` and one from `3` to `[**d]` at key
* `a`.
*
* For the call
* ```python
* f(0, **{"y": 1, "a": 3})
* ```
* no tuple argument is synthesized. It is modeled as
* ```python
* f(0, [y=1], [**d])
* ```
* where `[y=1]` is the synthesized unpacked argument `TKwUnpacked` (with `name` = `y`). There is
* a read step from `**{"y": 1, "a": 3}` to `[y=1]` at key `y` to get the value passed to the parameter
* `y`. There is a dataflow step from `**{"y": 1, "a": 3}` to `[**d]` to transfer the content and
* a clearing of content at key `y` for node `[**d]`, since that value has been unpacked.
*/
module ArgumentPassing {
/**
* Holds if `call` represents a `DataFlowCall` to a `DataFlowCallable` represented by `callable`.
*
* It _may not_ be the case that `call = callable.getACall()`, i.e. if `call` represents a `ClassCall`.
*
* Used to limit the size of predicates.
*/
predicate connects(CallNode call, CallableValue callable) {
exists(NormalCall c |
call = c.getNode() and
callable = c.getCallable().getCallableValue()
)
}
/**
* Gets the `n`th parameter of `callable`.
* If the callable has a starred parameter, say `*tuple`, that is matched with `n=-1`.
* If the callable has a doubly starred parameter, say `**dict`, that is matched with `n=-2`.
* Note that, unlike other languages, we do _not_ use -1 for the position of `self` in Python,
* as it is an explicit parameter at position 0.
*/
NameNode getParameter(CallableValue callable, int n) {
// positional parameter
result = callable.getParameter(n)
or
// starred parameter, `*tuple`
exists(Function f |
f = callable.getScope() and
n = -1 and
result = f.getVararg().getAFlowNode()
)
or
// doubly starred parameter, `**dict`
exists(Function f |
f = callable.getScope() and
n = -2 and
result = f.getKwarg().getAFlowNode()
)
}
/**
* A type representing a mapping from argument indices to parameter indices.
* We currently use two mappings: NoShift, the identity, used for ordinary
* function calls, and ShiftOneUp which is used for calls where an extra argument
* is inserted. These include method calls, constructor calls and class calls.
* In these calls, the argument at index `n` is mapped to the parameter at position `n+1`.
*/
newtype TArgParamMapping =
TNoShift() or
TShiftOneUp()
/** A mapping used for parameter passing. */
abstract class ArgParamMapping extends TArgParamMapping {
/** Gets the index of the parameter that corresponds to the argument at index `argN`. */
bindingset[argN]
abstract int getParamN(int argN);
/** Gets a textual representation of this element. */
abstract string toString();
}
/** A mapping that passes argument `n` to parameter `n`. */
class NoShift extends ArgParamMapping, TNoShift {
NoShift() { this = TNoShift() }
override string toString() { result = "NoShift [n -> n]" }
bindingset[argN]
override int getParamN(int argN) { result = argN }
}
/** A mapping that passes argument `n` to parameter `n+1`. */
class ShiftOneUp extends ArgParamMapping, TShiftOneUp {
ShiftOneUp() { this = TShiftOneUp() }
override string toString() { result = "ShiftOneUp [n -> n+1]" }
bindingset[argN]
override int getParamN(int argN) { result = argN + 1 }
}
/**
* Gets the node representing the argument to `call` that is passed to the parameter at
* (zero-based) index `paramN` in `callable`. If this is a positional argument, it must appear
* at an index, `argN`, in `call` which satisfies `paramN = mapping.getParamN(argN)`.
*
* `mapping` will be the identity for function calls, but not for method- or constructor calls,
* where the first parameter is `self` and the first positional argument is passed to the second positional parameter.
* Similarly for classmethod calls, where the first parameter is `cls`.
*
* NOT SUPPORTED: Keyword-only parameters.
*/
Node getArg(CallNode call, ArgParamMapping mapping, CallableValue callable, int paramN) {
connects(call, callable) and
(
// positional argument
exists(int argN |
paramN = mapping.getParamN(argN) and
result = TCfgNode(call.getArg(argN))
)
or
// keyword argument
// TODO: Since `getArgName` have no results for keyword-only parameters,
// these are currently not supported.
exists(Function f, string argName |
f = callable.getScope() and
f.getArgName(paramN) = argName and
result = TCfgNode(call.getArgByName(unbind_string(argName)))
)
or
// a synthesized argument passed to the starred parameter (at position -1)
callable.getScope().hasVarArg() and
paramN = -1 and
result = TPosOverflowNode(call, callable)
or
// a synthesized argument passed to the doubly starred parameter (at position -2)
callable.getScope().hasKwArg() and
paramN = -2 and
result = TKwOverflowNode(call, callable)
or
// argument unpacked from dict
exists(string name |
call_unpacks(call, mapping, callable, name, paramN) and
result = TKwUnpackedNode(call, callable, name)
)
)
}
/** Currently required in `getArg` in order to prevent a bad join. */
bindingset[result, s]
private string unbind_string(string s) { result <= s and s <= result }
/** Gets the control flow node that is passed as the `n`th overflow positional argument. */
ControlFlowNode getPositionalOverflowArg(CallNode call, CallableValue callable, int n) {
connects(call, callable) and
exists(Function f, int posCount, int argNr |
f = callable.getScope() and
f.hasVarArg() and
posCount = f.getPositionalParameterCount() and
result = call.getArg(argNr) and
argNr >= posCount and
argNr = posCount + n
)
}
/** Gets the control flow node that is passed as the overflow keyword argument with key `key`. */
ControlFlowNode getKeywordOverflowArg(CallNode call, CallableValue callable, string key) {
connects(call, callable) and
exists(Function f |
f = callable.getScope() and
f.hasKwArg() and
not exists(f.getArgByName(key)) and
result = call.getArgByName(key)
)
}
/**
* Holds if `call` unpacks a dictionary argument in order to pass it via `name`.
* It will then be passed to the parameter of `callable` at index `paramN`.
*/
predicate call_unpacks(
CallNode call, ArgParamMapping mapping, CallableValue callable, string name, int paramN
) {
connects(call, callable) and
exists(Function f |
f = callable.getScope() and
not exists(int argN | paramN = mapping.getParamN(argN) | exists(call.getArg(argN))) and // no positional argument available
name = f.getArgName(paramN) and
// not exists(call.getArgByName(name)) and // only matches keyword arguments not preceded by **
// TODO: make the below logic respect control flow splitting (by not going to the AST).
not call.getNode().getANamedArg().(Keyword).getArg() = name and // no keyword argument available
paramN >= 0 and
paramN < f.getPositionalParameterCount() + f.getKeywordOnlyParameterCount() and
exists(call.getNode().getKwargs()) // dict argument available
)
}
}
import ArgumentPassing
/** A callable defined in library code, identified by a unique string. */
abstract class LibraryCallable extends string {
bindingset[this]
LibraryCallable() { any() }
/** Gets a call to this library callable. */
abstract CallCfgNode getACall();
/** Gets a data-flow node, where this library callable is used as a call-back. */
abstract ArgumentNode getACallback();
}
/**
* IPA type for DataFlowCallable.
*
* A callable is either a function value, a class value, or a module (for enclosing `ModuleVariableNode`s).
* A module has no calls.
*/
newtype TDataFlowCallable =
TCallableValue(CallableValue callable) {
callable instanceof FunctionValue and
not callable.(FunctionValue).isLambda()
or
callable instanceof ClassValue
} or
TLambda(Function lambda) { lambda.isLambda() } or
TModule(Module m) or
TLibraryCallable(LibraryCallable callable)
/** A callable. */
class DataFlowCallable extends TDataFlowCallable {
/** Gets a textual representation of this element. */
string toString() { result = "DataFlowCallable" }
/** Gets a call to this callable. */
CallNode getACall() { none() }
/** Gets the scope of this callable */
Scope getScope() { none() }
/** Gets the specified parameter of this callable */
NameNode getParameter(int n) { none() }
/** Gets the name of this callable. */
string getName() { none() }
/** Gets a callable value for this callable, if any. */
CallableValue getCallableValue() { none() }
/** Gets the underlying library callable, if any. */
LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
Location getLocation() { none() }
}
/** A class representing a callable value. */
class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
CallableValue callable;
DataFlowCallableValue() { this = TCallableValue(callable) }
override string toString() { result = callable.toString() }
override CallNode getACall() { result = callable.getACall() }
override Scope getScope() { result = callable.getScope() }
override NameNode getParameter(int n) { result = getParameter(callable, n) }
override string getName() { result = callable.getName() }
override CallableValue getCallableValue() { result = callable }
}
/** A class representing a callable lambda. */
class DataFlowLambda extends DataFlowCallable, TLambda {
Function lambda;
DataFlowLambda() { this = TLambda(lambda) }
override string toString() { result = lambda.toString() }
override CallNode getACall() { result = this.getCallableValue().getACall() }
override Scope getScope() { result = lambda.getEvaluatingScope() }
override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
override string getName() { result = "Lambda callable" }
override FunctionValue getCallableValue() {
result.getOrigin().getNode() = lambda.getDefinition()
}
Expr getDefinition() { result = lambda.getDefinition() }
}
/** A class representing the scope in which a `ModuleVariableNode` appears. */
class DataFlowModuleScope extends DataFlowCallable, TModule {
Module mod;
DataFlowModuleScope() { this = TModule(mod) }
override string toString() { result = mod.toString() }
override CallNode getACall() { none() }
override Scope getScope() { result = mod }
override NameNode getParameter(int n) { none() }
override string getName() { result = mod.getName() }
override CallableValue getCallableValue() { none() }
}
class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
LibraryCallable callable;
LibraryCallableValue() { this = TLibraryCallable(callable) }
override string toString() { result = callable.toString() }
override CallNode getACall() { result = callable.getACall().getNode() }
/** Gets a data-flow node, where this library callable is used as a call-back. */
ArgumentNode getACallback() { result = callable.getACallback() }
override Scope getScope() { none() }
override NameNode getParameter(int n) { none() }
override string getName() { result = callable }
override LibraryCallable asLibraryCallable() { result = callable }
}
/**
* IPA type for DataFlowCall.
*
* Calls corresponding to `CallNode`s are either to callable values or to classes.
* The latter is directed to the callable corresponding to the `__init__` method of the class.
*
* An `__init__` method can also be called directly, so that the callable can be targeted by
* different types of calls. In that case, the parameter mappings will be different,
* as the class call will synthesize an argument node to be mapped to the `self` parameter.
*
* A call corresponding to a special method call is handled by the corresponding `SpecialMethodCallNode`.
*
* TODO: Add `TClassMethodCall` mapping `cls` appropriately.
*/
newtype TDataFlowCall =
/**
* Includes function calls, method calls, class calls and library calls.
* All these will be associated with a `CallNode`.
*/
TNormalCall(CallNode call) or
/**
* Includes calls to special methods.
* These will be associated with a `SpecialMethodCallNode`.
*/
TSpecialCall(SpecialMethodCallNode special) or
/** A synthesized call inside a summarized callable */
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
/** A call found in the program source (as opposed to a synthesised summary call). */
class TExtractedDataFlowCall = TSpecialCall or TNormalCall;
/** A call that is taken into account by the global data flow computation. */
abstract class DataFlowCall extends TDataFlowCall {
/** Gets a textual representation of this element. */
abstract string toString();
/** Get the callable to which this call goes, if such exists. */
abstract DataFlowCallable getCallable();
/**
* Gets the argument to this call that will be sent
* to the `n`th parameter of the callable, if any.
*/
abstract Node getArg(int n);
/** Get the control flow node representing this call, if any. */
abstract ControlFlowNode getNode();
/** Gets the enclosing callable of this call. */
abstract DataFlowCallable getEnclosingCallable();
/** Gets the location of this dataflow call. */
abstract Location getLocation();
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/** A call found in the program source (as opposed to a synthesised call). */
abstract class ExtractedDataFlowCall extends DataFlowCall, TExtractedDataFlowCall {
final override Location getLocation() { result = this.getNode().getLocation() }
abstract override DataFlowCallable getCallable();
abstract override Node getArg(int n);
abstract override ControlFlowNode getNode();
}
/** A call associated with a `CallNode`. */
class NormalCall extends ExtractedDataFlowCall, TNormalCall {
CallNode call;
NormalCall() { this = TNormalCall(call) }
override string toString() { result = call.toString() }
abstract override Node getArg(int n);
override CallNode getNode() { result = call }
abstract override DataFlowCallable getCallable();
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
}
/**
* A call to a function.
* This excludes calls to bound methods, classes, and special methods.
* Bound method calls and class calls insert an argument for the explicit
* `self` parameter, and special method calls have special argument passing.
*/
class FunctionCall extends NormalCall {
DataFlowCallableValue callable;
FunctionCall() {
call = any(FunctionValue f).getAFunctionCall() and
call = callable.getACall()
}
override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
override DataFlowCallable getCallable() { result = callable }
}
/** A call to a lambda. */
class LambdaCall extends NormalCall {
DataFlowLambda callable;
LambdaCall() {
call = callable.getACall() and
callable = TLambda(any(Function f))
}
override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
override DataFlowCallable getCallable() { result = callable }
}
/**
* Represents a call to a bound method call.
* The node representing the instance is inserted as argument to the `self` parameter.
*/
class MethodCall extends NormalCall {
FunctionValue bm;
MethodCall() { call = bm.getAMethodCall() }
private CallableValue getCallableValue() { result = bm }
override Node getArg(int n) {
n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
or
n = 0 and result = TCfgNode(call.getFunction().(AttrNode).getObject())
}
override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
}
/**
* Represents a call to a class.
* The pre-update node for the call is inserted as argument to the `self` parameter.
* That makes the call node be the post-update node holding the value of the object
* after the constructor has run.
*/
class ClassCall extends NormalCall {
ClassValue c;
ClassCall() {
not c.isAbsent() and
call = c.getACall()
}
private CallableValue getCallableValue() { c.getScope().getInitMethod() = result.getScope() }
override Node getArg(int n) {
n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
or
n = 0 and result = TSyntheticPreUpdateNode(TCfgNode(call))
}
override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
}
/** A call to a special method. */
class SpecialCall extends ExtractedDataFlowCall, TSpecialCall {
SpecialMethodCallNode special;
SpecialCall() { this = TSpecialCall(special) }
override string toString() { result = special.toString() }
override Node getArg(int n) { result = TCfgNode(special.(SpecialMethod::Potential).getArg(n)) }
override ControlFlowNode getNode() { result = special }
override DataFlowCallable getCallable() {
result = TCallableValue(special.getResolvedSpecialMethod())
}
override DataFlowCallable getEnclosingCallable() {
result.getScope() = special.getNode().getScope()
}
}
/**
* A call to a summarized callable, a `LibraryCallable`.
*
* We currently exclude all resolved calls. This means that a call to, say, `map`, which
* is a `ClassCall`, cannot currently be given a summary.
* We hope to lift this restriction in the future and include all potential calls to summaries
* in this class.
*/
class LibraryCall extends NormalCall {
LibraryCall() {
// TODO: share this with `resolvedCall`
not (
call = any(DataFlowCallableValue cv).getACall()
or
call = any(DataFlowLambda l).getACall()
or
// TODO: this should be covered by `DataFlowCallableValue`, but a `ClassValue` is not a `CallableValue`.
call = any(ClassValue c).getACall()
)
}
// TODO: Implement Python calling convention?
override Node getArg(int n) { result = TCfgNode(call.getArg(n)) }
// We cannot refer to a `LibraryCallable` here,
// as that could in turn refer to type tracking.
// This call will be tied to a `LibraryCallable` via
// `getViableCallabe` when the global data flow is assembled.
override DataFlowCallable getCallable() { none() }
}
/**
* A synthesized call inside a callable with a flow summary.
*
* For example, in
* ```python
* map(lambda x: x + 1, [1, 2, 3])
* ```
*
* there is a synthesized call to the lambda argument inside `map`.
*/
class SummaryCall extends DataFlowCall, TSummaryCall {
private FlowSummaryImpl::Public::SummarizedCallable c;
private Node receiver;
SummaryCall() { this = TSummaryCall(c, receiver) }
/** Gets the data flow node that this call targets. */
Node getReceiver() { result = receiver }
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
override DataFlowCallable getCallable() { none() }
override Node getArg(int n) { none() }
override ControlFlowNode getNode() { none() }
override string toString() { result = "[summary] call to " + receiver + " in " + c }
override Location getLocation() { none() }
}
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
abstract class ParameterNodeImpl extends Node {
abstract Parameter getParameter();
/**
* Holds if this node is the parameter of callable `c` at the
* (zero-based) index `i`.
*/
abstract predicate isParameterOf(DataFlowCallable c, int i);
}
/** A parameter for a library callable with a flow summary. */
class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
private FlowSummaryImpl::Public::SummarizedCallable sc;
private int pos;
SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
override Parameter getParameter() { none() }
override predicate isParameterOf(DataFlowCallable c, int i) {
sc = c.asLibraryCallable() and i = pos
}
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = sc }
override string toString() { result = "parameter " + pos + " of " + sc }
// Hack to return "empty location"
override predicate hasLocationInfo(
string file, int startline, int startcolumn, int endline, int endcolumn
) {
file = "" and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}
}
/** A data-flow node used to model flow summaries. */
class SummaryNode extends Node, TSummaryNode {
private FlowSummaryImpl::Public::SummarizedCallable c;
private FlowSummaryImpl::Private::SummaryNodeState state;
SummaryNode() { this = TSummaryNode(c, state) }
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
override string toString() { result = "[summary] " + state + " in " + c }
// Hack to return "empty location"
override predicate hasLocationInfo(
string file, int startline, int startcolumn, int endline, int endcolumn
) {
file = "" and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}
}
private class SummaryReturnNode extends SummaryNode, ReturnNode {
private ReturnKind rk;
SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
override ReturnKind getKind() { result = rk }
}
private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
}
}
private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
private Node pre;
SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
override Node getPreUpdateNode() { result = pre }
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
result = call.getCallable()
or
// A call to a library callable with a flow summary
// In this situation we can not resolve the callable from the call,
// as that would make data flow depend on type tracking.
// Instead we resolve the call from the summary.
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.getNode() = callable.getACall().getNode()
)
}
private newtype TReturnKind = TNormalReturnKind()
/**
* A return kind. A return kind describes how a value can be returned
* from a callable. For Python, this is simply a method return.
*/
class ReturnKind extends TReturnKind {
/** Gets a textual representation of this element. */
string toString() { result = "return" }
}
/** A data flow node that represents a value returned by a callable. */
abstract class ReturnNode extends Node {
/** Gets the kind of this return node. */
ReturnKind getKind() { any() }
}
/** A data flow node that represents a value returned by a callable. */
class ExtractedReturnNode extends ReturnNode, CfgNode {
// See `TaintTrackingImplementation::returnFlowStep`
ExtractedReturnNode() { node = any(Return ret).getValue().getAFlowNode() }
override ReturnKind getKind() { any() }
}
/** A data-flow node that represents the output of a call. */
abstract class OutNode extends Node {
/** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
abstract DataFlowCall getCall(ReturnKind kind);
}
private module OutNodes {
/**
* A data-flow node that reads a value returned directly by a callable.
*/
class ExprOutNode extends OutNode, ExprNode {
private DataFlowCall call;
ExprOutNode() { call.(ExtractedDataFlowCall).getNode() = this.getNode() }
override DataFlowCall getCall(ReturnKind kind) {
result = call and
kind = kind
}
}
private class SummaryOutNode extends SummaryNode, OutNode {
SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
override DataFlowCall getCall(ReturnKind kind) {
FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
}
}
}
/**
* Gets a node that can read the value returned from `call` with return kind
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }

View File

@@ -16,7 +16,7 @@ private import semmle.python.Frameworks
// make it more digestible.
import MatchUnpacking
import IterableUnpacking
import DataFlowDispatchPointsTo
import DataFlowDispatch
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
@@ -39,162 +39,267 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
//--------
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
/** DEPRECATED: Alias for `SyntheticPreUpdateNode` */
deprecated module syntheticPreUpdateNode = SyntheticPreUpdateNode;
// =============================================================================
// SyntheticPreUpdateNode
// =============================================================================
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
CallNode node;
/** A module collecting the different reasons for synthesising a pre-update node. */
module SyntheticPreUpdateNode {
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
NeedsSyntheticPreUpdateNode post;
SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(node) }
SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(post) }
/** Gets the node for which this is a synthetic pre-update node. */
CfgNode getPostUpdateNode() { result.getNode() = node }
/** Gets the node for which this is a synthetic pre-update node. */
Node getPostUpdateNode() { result = post }
override string toString() { result = "[pre] " + node.toString() }
override string toString() { result = "[pre " + post.label() + "] " + post.toString() }
override Scope getScope() { result = node.getScope() }
override Scope getScope() { result = post.getScope() }
override Location getLocation() { result = post.getLocation() }
}
/** A data flow node for which we should synthesise an associated pre-update node. */
class NeedsSyntheticPreUpdateNode extends PostUpdateNode {
NeedsSyntheticPreUpdateNode() { this = objectCreationNode() }
override Node getPreUpdateNode() { result.(SyntheticPreUpdateNode).getPostUpdateNode() = this }
/**
* Gets the label for this kind of node. This will figure in the textual representation of the synthesized pre-update node.
*
* There is currently only one reason for needing a pre-update node, so we always use that as the label.
*/
string label() { result = "objCreate" }
}
/**
* Calls to constructors are treated as post-update nodes for the synthesized argument
* that is mapped to the `self` parameter. That way, constructor calls represent the value of the
* object after the constructor (currently only `__init__`) has run.
*/
CfgNode objectCreationNode() { result.getNode() = any(ClassCall c).getNode() }
override Location getLocation() { result = node.getLocation() }
}
import SyntheticPreUpdateNode
// =============================================================================
// *args (StarArgs) related
// =============================================================================
/**
* A (synthetic) data-flow parameter node to capture all positional arguments that
* should be passed to the `*args` parameter.
*
* To handle
* ```py
* def func(*args):
* for arg in args:
* sink(arg)
*
* func(source1, source2, ...)
* ```
*
* we add a synthetic parameter to `func` that accepts any positional argument at (or
* after) the index for the `*args` parameter. We add a store step (at any list index) to the real
* `*args` parameter. This means we can handle the code above, but if the code had done `sink(args[0])`
* we would (wrongly) add flow for `source2` as well.
*
* To solve this more precisely, we could add a synthetic argument with position `*args`
* that had store steps with the correct index (like we do for mapping keyword arguments to a
* `**kwargs` parameter). However, if a single call could go to 2 different
* targets with `*args` parameters at different positions, as in the example below, it's unclear what
* index to store `2` at. For the `foo` callable it should be 1, for the `bar` callable it should be 0.
* So this information would need to be encoded in the arguments of a `ArgumentPosition` branch, and
* one of the arguments would be which callable is the target. However, we cannot build `ArgumentPosition`
* branches based on the call-graph, so this strategy doesn't work.
*
* Another approach to solving it precisely is to add multiple synthetic parameters that have store steps
* to the real `*args` parameter. So for the example below, `foo` would need to have synthetic parameter
* nodes for indexes 1 and 2 (which would have store step for index 0 and 1 of the `*args` parameter),
* and `bar` would need it for indexes 1, 2, and 3. The question becomes how many synthetic parameters to
* create, which _must_ be `max(Call call, int i | exists(call.getArg(i)))`, since (again) we can't base
* this on the call-graph. And each function with a `*args` parameter would need this many extra synthetic
* nodes. My gut feeling at that this simple approach will be good enough, but if we need to get it more
* precise, it should be possible to do it like this.
*
* In PR review, @yoff suggested an alternative approach for more precise handling:
*
* - At the call site, all positional arguments are stored into a synthetic starArgs argument, always tarting at index 0
* - This is sent to a synthetic star parameter
* - At the receiving end, we know the offset of a potential real star parameter, so we can define read steps accordingly: In foo, we read from the synthetic star parameter at index 1 and store to the real star parameter at index 0.
*
* ```py
* def foo(one, *args): ...
* def bar(*args): ...
*
* func = foo if <cond> else bar
* func(1, 2, 3)
*/
class SynthStarArgsElementParameterNode extends ParameterNodeImpl,
TSynthStarArgsElementParameterNode {
DataFlowCallable callable;
/** DEPRECATED: Alias for `SyntheticPostUpdateNode` */
deprecated module syntheticPostUpdateNode = SyntheticPostUpdateNode;
SynthStarArgsElementParameterNode() { this = TSynthStarArgsElementParameterNode(callable) }
/** A module collecting the different reasons for synthesising a post-update node. */
module SyntheticPostUpdateNode {
private import semmle.python.SpecialMethods
override string toString() { result = "SynthStarArgsElementParameterNode" }
/** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
NeedsSyntheticPostUpdateNode pre;
override Scope getScope() { result = callable.getScope() }
SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(pre) }
override Location getLocation() { result = callable.getLocation() }
override Node getPreUpdateNode() { result = pre }
override string toString() { result = "[post " + pre.label() + "] " + pre.toString() }
override Scope getScope() { result = pre.getScope() }
override Location getLocation() { result = pre.getLocation() }
}
/** A data flow node for which we should synthesise an associated post-update node. */
class NeedsSyntheticPostUpdateNode extends Node {
NeedsSyntheticPostUpdateNode() {
this = argumentPreUpdateNode()
or
this = storePreUpdateNode()
or
this = readPreUpdateNode()
}
/**
* Gets the label for this kind of node. This will figure in the textual representation of the synthesized post-update node.
* We favour being an arguments as the reason for the post-update node in case multiple reasons apply.
*/
string label() {
if this = argumentPreUpdateNode()
then result = "arg"
else
if this = storePreUpdateNode()
then result = "store"
else result = "read"
}
}
/**
* Gets the pre-update node for this node.
*
* An argument might have its value changed as a result of a call.
* Certain arguments, such as implicit self arguments are already post-update nodes
* and should not have an extra node synthesised.
*/
Node argumentPreUpdateNode() {
result = any(FunctionCall c).getArg(_)
or
result = any(LambdaCall c).getArg(_)
or
// Avoid argument 0 of method calls as those have read post-update nodes.
exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
or
result = any(SpecialCall c).getArg(_)
or
// Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
or
// any argument of any call that we have not been able to resolve
exists(CallNode call | not resolvedCall(call) |
result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
)
}
/** Holds if `call` can be resolved as a normal call */
private predicate resolvedCall(CallNode call) {
call = any(DataFlowCallableValue cv).getACall()
or
call = any(DataFlowLambda l).getACall()
}
/** Gets the pre-update node associated with a store. This is used for when an object might have its value changed after a store. */
CfgNode storePreUpdateNode() {
exists(Attribute a |
result.getNode() = a.getObject().getAFlowNode() and
a.getCtx() instanceof Store
)
}
/**
* Gets a node marking the state change of an object after a read.
*
* A reverse read happens when the result of a read is modified, e.g. in
* ```python
* l = [ mutable ]
* l[0].mutate()
* ```
* we may now have changed the content of `l`. To track this, there must be
* a postupdate node for `l`.
*/
CfgNode readPreUpdateNode() {
exists(Attribute a |
result.getNode() = a.getObject().getAFlowNode() and
a.getCtx() instanceof Load
)
or
result.getNode() = any(SubscriptNode s).getObject()
or
// The dictionary argument is read from if the callable has parameters matching the keys.
result.getNode().getNode() = any(Call call).getKwargs()
}
override Parameter getParameter() { none() }
}
import SyntheticPostUpdateNode
predicate synthStarArgsElementParameterNodeStoreStep(
SynthStarArgsElementParameterNode nodeFrom, ListElementContent c, ParameterNode nodeTo
) {
c = c and // suppress warning about unused parameter
exists(DataFlowCallable callable, ParameterPosition ppos |
nodeFrom = TSynthStarArgsElementParameterNode(callable) and
nodeTo = callable.getParameter(ppos) and
ppos.isStarArgs(_)
)
}
// =============================================================================
// **kwargs (DictSplat) related
// =============================================================================
/**
* A (synthetic) data-flow node that represents all keyword arguments, as if they had
* been passed in a `**kwargs` argument.
*/
class SynthDictSplatArgumentNode extends Node, TSynthDictSplatArgumentNode {
CallNode node;
SynthDictSplatArgumentNode() { this = TSynthDictSplatArgumentNode(node) }
override string toString() { result = "SynthDictSplatArgumentNode" }
override Scope getScope() { result = node.getScope() }
override Location getLocation() { result = node.getLocation() }
}
private predicate synthDictSplatArgumentNodeStoreStep(
ArgumentNode nodeFrom, DictionaryElementContent c, SynthDictSplatArgumentNode nodeTo
) {
exists(string name, CallNode call, ArgumentPosition keywordPos |
nodeTo = TSynthDictSplatArgumentNode(call) and
getCallArg(call, _, _, nodeFrom, keywordPos) and
keywordPos.isKeyword(name) and
c.getKey() = name
)
}
/**
* Ensures that the a `**kwargs` parameter will not contain elements with names of
* keyword parameters.
*
* For example, for the function below, it's not possible that the `kwargs` dictionary
* can contain an element with the name `a`, since that parameter can be given as a
* keyword argument.
*
* ```py
* def func(a, **kwargs):
* ...
* ```
*/
private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryElementContent c) {
exists(DataFlowCallable callable, ParameterPosition dictSplatPos, ParameterPosition keywordPos |
dictSplatPos.isDictSplat() and
(
n.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
or
n = TSummaryParameterNode(callable.asLibraryCallable(), dictSplatPos)
) and
exists(callable.getParameter(keywordPos)) and
keywordPos.isKeyword(c.getKey())
)
}
/**
* A synthetic data-flow node to allow flow to keyword parameters from a `**kwargs` argument.
*
* Take the code snippet below as an example. Since the call only has a `**kwargs` argument,
* with a `**` argument position, we add this synthetic parameter node with `**` parameter position,
* and a read step to the `p1` parameter.
*
* ```py
* def foo(p1, p2): ...
*
* kwargs = {"p1": 42, "p2": 43}
* foo(**kwargs)
* ```
*
*
* Note that this will introduce a bit of redundancy in cases like
*
* ```py
* foo(p1=taint(1), p2=taint(2))
* ```
*
* where direct keyword matching is possible, since we construct a synthesized dict
* splat argument (`SynthDictSplatArgumentNode`) at the call site, which means that
* `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
* nodes (and similarly for `p2`). However, this redundancy is OK since
* (a) it means that type-tracking through keyword arguments also works in most cases,
* (b) read/store steps can be avoided when direct keyword matching is possible, and
* hence access path limits are not a concern, and
* (c) since the synthesized nodes are hidden, the reported data-flow paths will be
* collapsed anyway.
*/
class SynthDictSplatParameterNode extends ParameterNodeImpl, TSynthDictSplatParameterNode {
DataFlowCallable callable;
SynthDictSplatParameterNode() { this = TSynthDictSplatParameterNode(callable) }
override string toString() { result = "SynthDictSplatParameterNode" }
override Scope getScope() { result = callable.getScope() }
override Location getLocation() { result = callable.getLocation() }
override Parameter getParameter() { none() }
}
/**
* Flow step from the synthetic `**kwargs` parameter to the real `**kwargs` parameter.
* Due to restriction in dataflow library, we can only give one of them as result for
* `DataFlowCallable.getParameter`, so this is a workaround to ensure there is flow to
* _both_ of them.
*/
private predicate dictSplatParameterNodeFlowStep(
ParameterNodeImpl nodeFrom, ParameterNodeImpl nodeTo
) {
exists(DataFlowCallable callable |
nodeFrom = TSynthDictSplatParameterNode(callable) and
(
nodeTo.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
or
exists(ParameterPosition pos |
nodeTo = TSummaryParameterNode(callable.asLibraryCallable(), pos) and
pos.isDictSplat()
)
)
)
}
/**
* Reads from the synthetic **kwargs parameter to each keyword parameter.
*/
predicate synthDictSplatParameterNodeReadStep(
SynthDictSplatParameterNode nodeFrom, DictionaryElementContent c, ParameterNode nodeTo
) {
exists(DataFlowCallable callable, ParameterPosition ppos |
nodeFrom = TSynthDictSplatParameterNode(callable) and
nodeTo = callable.getParameter(ppos) and
ppos.isKeyword(c.getKey())
)
}
// =============================================================================
// PostUpdateNode
// =============================================================================
abstract class PostUpdateNodeImpl extends Node {
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
}
class SyntheticPostUpdateNode extends PostUpdateNodeImpl, TSyntheticPostUpdateNode {
ControlFlowNode node;
SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(node) }
override Node getPreUpdateNode() { result.(CfgNode).getNode() = node }
override string toString() { result = "[post] " + node.toString() }
override Scope getScope() { result = node.getScope() }
override Location getLocation() { result = node.getLocation() }
}
class NonSyntheticPostUpdateNode extends PostUpdateNodeImpl, CfgNode {
SyntheticPreUpdateNode pre;
NonSyntheticPostUpdateNode() { this = pre.getPostUpdateNode() }
override Node getPreUpdateNode() { result = pre }
}
class DataFlowExpr = Expr;
@@ -274,13 +379,6 @@ module EssaFlow {
iterableUnpackingFlowStep(nodeFrom, nodeTo)
or
matchFlowStep(nodeFrom, nodeTo)
or
// Overflow keyword argument
exists(CallNode call, CallableValue callable |
call = callable.getACall() and
nodeTo = TKwOverflowNode(call, callable) and
nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode()
)
}
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
@@ -305,6 +403,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
or
summaryFlowSteps(nodeFrom, nodeTo)
or
dictSplatParameterNodeFlowStep(nodeFrom, nodeTo)
}
/**
@@ -521,15 +621,15 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
or
attributeStoreStep(nodeFrom, c, nodeTo)
or
posOverflowStoreStep(nodeFrom, c, nodeTo)
or
kwOverflowStoreStep(nodeFrom, c, nodeTo)
or
matchStoreStep(nodeFrom, c, nodeTo)
or
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom, c, nodeTo)
or
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
}
/**
@@ -669,30 +769,6 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode n
)
}
/**
* Holds if `nodeFrom` flows into the synthesized positional overflow argument (`nodeTo`)
* at the position indicated by `c`.
*/
predicate posOverflowStoreStep(CfgNode nodeFrom, TupleElementContent c, Node nodeTo) {
exists(CallNode call, CallableValue callable, int n |
nodeFrom.asCfgNode() = getPositionalOverflowArg(call, callable, n) and
nodeTo = TPosOverflowNode(call, callable) and
c.getIndex() = n
)
}
/**
* Holds if `nodeFrom` flows into the synthesized keyword overflow argument (`nodeTo`)
* at the key indicated by `c`.
*/
predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
exists(CallNode call, CallableValue callable, string key |
nodeFrom.asCfgNode() = getKeywordOverflowArg(call, callable, key) and
nodeTo = TKwOverflowNode(call, callable) and
c.getKey() = key
)
}
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
exists(Function f, Parameter p, ParameterDefinition def |
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
@@ -722,9 +798,9 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
or
attributeReadStep(nodeFrom, c, nodeTo)
or
kwUnpackReadStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
or
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
}
/** Data flows from a sequence to a subscript of the sequence. */
@@ -814,43 +890,19 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
nodeTo.accesses(nodeFrom, c.getAttribute())
}
/**
* Holds if `nodeFrom` is a dictionary argument being unpacked and `nodeTo` is the
* synthesized unpacked argument with the name indicated by `c`.
*/
predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
exists(CallNode call, string name |
nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode() and
nodeTo = TKwUnpackedNode(call, _, name) and
name = c.getKey()
)
}
/**
* Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
* whenever `call` unpacks `name`.
*/
predicate kwOverflowClearStep(Node n, Content c) {
exists(CallNode call, CallableValue callable, string name |
call_unpacks(call, _, callable, name, _) and
n = TKwOverflowNode(call, callable) and
c.(DictionaryElementContent).getKey() = name
)
}
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, Content c) {
kwOverflowClearStep(n, c)
or
matchClearStep(n, c)
or
attributeClearStep(n, c)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
or
dictSplatParameterNodeClearStep(n, c)
}
/**
@@ -906,23 +958,24 @@ predicate nodeIsHidden(Node n) {
n instanceof SummaryNode
or
n instanceof SummaryParameterNode
or
n instanceof SynthStarArgsElementParameterNode
or
n instanceof SynthDictSplatArgumentNode
or
n instanceof SynthDictSplatParameterNode
}
class LambdaCallKind = Unit;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
// lambda
// lambda and plain functions
kind = kind and
creation.asExpr() = c.(DataFlowLambda).getDefinition()
or
// normal function
exists(FunctionDef def |
def.defines(creation.asVar().getSourceVariable()) and
def.getDefinedFunction() = c.(DataFlowCallableValue).getCallableValue().getScope()
)
creation.asExpr() = c.(DataFlowPlainFunction).getScope().getDefinition()
or
// summarized function
exists(kind) and // avoid warning on unused 'kind'
exists(Call call |
creation.asExpr() = call.getAnArg() and
creation = c.(LibraryCallableValue).getACallback()

View File

@@ -31,10 +31,44 @@ newtype TNode =
or
node.getNode() instanceof Pattern
} or
/** A synthetic node representing the value of an object before a state change */
TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
/** A synthetic node representing the value of an object after a state change. */
TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
/**
* A synthetic node representing the value of an object before a state change.
*
* For class calls we pass a synthetic self argument, so attribute writes in
* `__init__` is reflected on the resulting object (we need special logic for this
* since there is no `return` in `__init__`)
*/
// NOTE: since we can't rely on the call graph, but we want to have synthetic
// pre-update nodes for class calls, we end up getting synthetic pre-update nodes for
// ALL calls :|
TSyntheticPreUpdateNode(CallNode call) or
/**
* A synthetic node representing the value of an object after a state change.
* See QLDoc for `PostUpdateNode`.
*/
TSyntheticPostUpdateNode(ControlFlowNode node) {
exists(CallNode call |
node = call.getArg(_)
or
node = call.getArgByName(_)
or
// `self` argument when handling class instance calls (`__call__` special method))
node = call.getFunction()
)
or
node = any(AttrNode a).getObject()
or
node = any(SubscriptNode s).getObject()
or
// self parameter when used implicitly in `super()`
exists(Class cls, Function func, ParameterDefinition def |
func = cls.getAMethod() and
not isStaticmethod(func) and
// this matches what we do in ExtractedParameterNode
def.getDefiningNode() = node and
def.getParameter() = func.getArg(0)
)
} or
/** A node representing a global (module-level) variable in a specific module. */
TModuleVariableNode(Module m, GlobalVariable v) {
v.getScope() = m and
@@ -45,37 +79,6 @@ newtype TNode =
ImportStar::globalNameDefinedInModule(v.getId(), m)
)
} or
/**
* A node representing the overflow positional arguments to a call.
* That is, `call` contains more positional arguments than there are
* positional parameters in `callable`. The extra ones are passed as
* a tuple to a starred parameter; this synthetic node represents that tuple.
*/
TPosOverflowNode(CallNode call, CallableValue callable) {
exists(getPositionalOverflowArg(call, callable, _))
} or
/**
* A node representing the overflow keyword arguments to a call.
* That is, `call` contains keyword arguments for keys that do not have
* keyword parameters in `callable`. These extra ones are passed as
* a dictionary to a doubly starred parameter; this synthetic node
* represents that dictionary.
*/
TKwOverflowNode(CallNode call, CallableValue callable) {
exists(getKeywordOverflowArg(call, callable, _))
or
ArgumentPassing::connects(call, callable) and
exists(call.getNode().getKwargs()) and
callable.getScope().hasKwArg()
} or
/**
* A node representing an unpacked element of a dictionary argument.
* That is, `call` contains argument `**{"foo": bar}` which is passed
* to parameter `foo` of `callable`.
*/
TKwUnpackedNode(CallNode call, CallableValue callable, string name) {
call_unpacks(call, _, callable, name, _)
} or
/**
* A synthetic node representing that an iterable sequence flows to consumer.
*/
@@ -109,10 +112,18 @@ newtype TNode =
} or
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
} or
/** A synthetic node to capture positional arguments that are passed to a `*args` parameter. */
TSynthStarArgsElementParameterNode(DataFlowCallable callable) {
exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
} or
/** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
TSynthDictSplatParameterNode(DataFlowCallable callable) {
exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
}
class TParameterNode = TCfgNode or TSummaryParameterNode;
/** Helper for `Node::getEnclosingCallable`. */
private DataFlowCallable getCallableScope(Scope s) {
result.getScope() = s
@@ -288,7 +299,7 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
class ParameterNode extends Node, TParameterNode instanceof ParameterNodeImpl {
class ParameterNode extends Node instanceof ParameterNodeImpl {
/** Gets the parameter corresponding to this node, if any. */
final Parameter getParameter() { result = super.getParameter() }
}
@@ -298,18 +309,8 @@ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
//, LocalSourceNode {
ParameterDefinition def;
ExtractedParameterNode() {
node = def.getDefiningNode() and
// Disregard parameters that we cannot resolve
// TODO: Make this unnecessary
exists(DataFlowCallable c | node = c.getParameter(_))
}
ExtractedParameterNode() { node = def.getDefiningNode() }
override predicate isParameterOf(DataFlowCallable c, int i) { node = c.getParameter(i) }
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
/** Gets the `Parameter` this `ParameterNode` represents. */
override Parameter getParameter() { result = def.getParameter() }
}
@@ -327,16 +328,24 @@ abstract class ArgumentNode extends Node {
final ExtractedDataFlowCall getCall() { this.argumentOf(result, _) }
}
/** A data flow node that represents a call argument found in the source code. */
/**
* A data flow node that represents a call argument found in the source code.
*/
class ExtractedArgumentNode extends ArgumentNode {
ExtractedArgumentNode() { this = any(ExtractedDataFlowCall c).getArg(_) }
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
this.extractedArgumentOf(call, pos)
ExtractedArgumentNode() {
// for resolved calls, we need to allow all argument nodes
getCallArg(_, _, _, this, _)
or
// for potential summaries we allow all normal call arguments
normalCallArg(_, this, _)
or
// and self arguments
this.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject()
}
predicate extractedArgumentOf(ExtractedDataFlowCall call, ArgumentPosition pos) {
this = call.getArg(pos)
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
this = call.getArgument(pos) and
call instanceof ExtractedDataFlowCall
}
}
@@ -345,16 +354,17 @@ class ExtractedArgumentNode extends ArgumentNode {
* changed its state.
*
* This can be either the argument to a callable after the callable returns
* (which might have mutated the argument), or the qualifier of a field after
* an update to the field.
* (which might have mutated the argument), the qualifier of a field after
* an update to the field, or a container such as a list/dictionary after an element
* update.
*
* Nodes corresponding to AST elements, for example `ExprNode`s, usually refer
* to the value before the update with the exception of `ObjectCreationNode`s,
* to the value before the update with the exception of class calls,
* which represents the value _after_ the constructor has run.
*/
abstract class PostUpdateNode extends Node {
class PostUpdateNode extends Node instanceof PostUpdateNodeImpl {
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
Node getPreUpdateNode() { result = super.getPreUpdateNode() }
}
/**
@@ -448,70 +458,6 @@ private predicate resolved_import_star_module(Module m, string name, Node n) {
)
}
/**
* The node holding the extra positional arguments to a call. This node is passed as a tuple
* to the starred parameter of the callable.
*/
class PosOverflowNode extends Node, TPosOverflowNode {
CallNode call;
PosOverflowNode() { this = TPosOverflowNode(call, _) }
override string toString() { result = "PosOverflowNode for " + call.getNode().toString() }
override DataFlowCallable getEnclosingCallable() {
exists(Node node |
node = TCfgNode(call) and
result = node.getEnclosingCallable()
)
}
override Location getLocation() { result = call.getLocation() }
}
/**
* The node holding the extra keyword arguments to a call. This node is passed as a dictionary
* to the doubly starred parameter of the callable.
*/
class KwOverflowNode extends Node, TKwOverflowNode {
CallNode call;
KwOverflowNode() { this = TKwOverflowNode(call, _) }
override string toString() { result = "KwOverflowNode for " + call.getNode().toString() }
override DataFlowCallable getEnclosingCallable() {
exists(Node node |
node = TCfgNode(call) and
result = node.getEnclosingCallable()
)
}
override Location getLocation() { result = call.getLocation() }
}
/**
* The node representing the synthetic argument of a call that is unpacked from a dictionary
* argument.
*/
class KwUnpackedNode extends Node, TKwUnpackedNode {
CallNode call;
string name;
KwUnpackedNode() { this = TKwUnpackedNode(call, _, name) }
override string toString() { result = "KwUnpacked " + name }
override DataFlowCallable getEnclosingCallable() {
exists(Node node |
node = TCfgNode(call) and
result = node.getEnclosingCallable()
)
}
override Location getLocation() { result = call.getLocation() }
}
/**
* A synthetic node representing an iterable sequence. Used for changing content type
* for instance from a `ListElement` to a `TupleElement`, especially if the content is

View File

@@ -61,11 +61,11 @@ bindingset[c, rk]
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
/**
* Gets the type of the `i`th parameter in a synthesized call that targets a
* callback of type `t`.
* Gets the type of the parameter matching arguments at position `pos` in a
* synthesized call that targets a callback of type `t`.
*/
bindingset[t, i]
DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
bindingset[t, pos]
DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() }
/**
* Gets the return type of kind `rk` in a synthesized call that targets a
@@ -114,10 +114,34 @@ string getComponentSpecific(SummaryComponent sc) {
}
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
string getParameterPosition(ParameterPosition pos) { result = pos.toString() }
string getParameterPosition(ParameterPosition pos) {
pos.isSelf() and result = "self"
or
exists(int i |
pos.isPositional(i) and
result = i.toString()
)
or
exists(string name |
pos.isKeyword(name) and
result = name + ":"
)
}
/** Gets the textual representation of an argument position in the format used for flow summaries. */
string getArgumentPosition(ArgumentPosition pos) { result = pos.toString() }
string getArgumentPosition(ArgumentPosition pos) {
pos.isSelf() and result = "self"
or
exists(int i |
pos.isPositional(i) and
result = i.toString()
)
or
exists(string name |
pos.isKeyword(name) and
result = name + ":"
)
}
/** Holds if input specification component `c` needs a reference. */
predicate inputNeedsReferenceSpecific(string c) { none() }
@@ -197,29 +221,55 @@ module ParsePositions {
)
}
predicate isParsedParameterPosition(string c, int i) {
predicate isParsedPositionalParameterPosition(string c, int i) {
isParamBody(c) and
i = AccessPath::parseInt(c)
}
predicate isParsedArgumentPosition(string c, int i) {
predicate isParsedKeywordParameterPosition(string c, string paramName) {
isParamBody(c) and
c = paramName + ":"
}
predicate isParsedPositionalArgumentPosition(string c, int i) {
isArgBody(c) and
i = AccessPath::parseInt(c)
}
predicate isParsedKeywordArgumentPosition(string c, string argName) {
isArgBody(c) and
c = argName + ":"
}
}
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
ArgumentPosition parseParamBody(string s) {
exists(int i |
ParsePositions::isParsedParameterPosition(s, i) and
ParsePositions::isParsedPositionalParameterPosition(s, i) and
result.isPositional(i)
)
or
exists(string name |
ParsePositions::isParsedKeywordParameterPosition(s, name) and
result.isKeyword(name)
)
or
s = "self" and
result.isSelf()
}
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
ParameterPosition parseArgBody(string s) {
exists(int i |
ParsePositions::isParsedArgumentPosition(s, i) and
ParsePositions::isParsedPositionalArgumentPosition(s, i) and
result.isPositional(i)
)
or
exists(string name |
ParsePositions::isParsedKeywordArgumentPosition(s, name) and
result.isKeyword(name)
)
or
s = "self" and
result.isSelf()
}

View File

@@ -60,22 +60,6 @@ string getPossibleContentName() {
result = any(DataFlowPublic::AttrRef a).getAttributeName()
}
/**
* Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
*
* Helper predicate to avoid bad join order experienced in `callStep`.
* This happened when `isParameterOf` was joined _before_ `getCallable`.
*/
pragma[nomagic]
private DataFlowPrivate::DataFlowCallable getCallableForArgument(
DataFlowPublic::ExtractedArgumentNode nodeFrom, int i
) {
exists(DataFlowPrivate::ExtractedDataFlowCall call |
nodeFrom.extractedArgumentOf(call, i) and
result = call.getCallable()
)
}
/**
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
*
@@ -83,11 +67,15 @@ private DataFlowPrivate::DataFlowCallable getCallableForArgument(
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPrivate::ParameterNodeImpl nodeTo) {
// TODO: Support special methods?
exists(DataFlowPrivate::DataFlowCallable callable, int i |
callable = getCallableForArgument(nodeFrom, i) and
nodeTo.isParameterOf(callable, i)
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
exists(
DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCallable callable,
DataFlowPrivate::ArgumentPosition apos, DataFlowPrivate::ParameterPosition ppos
|
nodeFrom = call.getArgument(apos) and
nodeTo = callable.getParameter(ppos) and
DataFlowPrivate::parameterMatch(ppos, apos) and
callable = call.getCallable()
)
}

View File

@@ -1465,7 +1465,19 @@ private module StdlibPrivate {
t.start() and
result = openCall and
(
openCall instanceof OpenCall
openCall instanceof OpenCall and
// don't include the open call inside of Path.open in pathlib.py since
// the call to `path_obj.open` is covered by `PathLibOpenCall`.
not exists(Module mod, Class cls, Function func |
openCall.(OpenCall).asCfgNode().getScope() = func and
func.getName() = "open" and
func.getScope() = cls and
cls.getName() = "Path" and
cls.getScope() = mod and
mod.getName() = "pathlib" and
// do allow this call if we're analyzing pathlib.py as part of CPython though
not exists(mod.getFile().getRelativePath())
)
or
openCall instanceof PathLibOpenCall
)

View File

@@ -93,6 +93,8 @@ module Stages {
exists(PyFlow::DefinitionNode b)
or
exists(any(PyFlow::SequenceNode n).getElement(_))
or
exists(any(PyFlow::ControlFlowNode c).toString())
}
}
@@ -125,6 +127,45 @@ module Stages {
}
}
/**
* The points-to stage.
*/
cached
module PointsTo {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the points-to stage.
*/
cached
predicate ref() { 1 = 1 }
private import semmle.python.pointsto.Base as PointsToBase
private import semmle.python.types.Object as TypeObject
private import semmle.python.objects.TObject as TObject
private import semmle.python.objects.ObjectInternal as ObjectInternal
// have to alias since this module is also called PointsTo
private import semmle.python.pointsto.PointsTo as RealPointsTo
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backref() {
1 = 1
or
PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
or
exists(TypeObject::Object a)
or
exists(TObject::TObject f)
or
exists(any(ObjectInternal::ObjectInternal o).toString())
or
RealPointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
}
}
/**
* The `dataflow` stage.
*/
@@ -138,14 +179,9 @@ module Stages {
predicate ref() { 1 = 1 }
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
private import semmle.python.dataflow.new.internal.LocalSources as LocalSources
private import semmle.python.internal.Awaited as Awaited
private import semmle.python.pointsto.Base as PointsToBase
private import semmle.python.types.Object as TypeObject
private import semmle.python.objects.TObject as TObject
private import semmle.python.Flow as Flow
private import semmle.python.objects.ObjectInternal as ObjectInternal
private import semmle.python.pointsto.PointsTo as PointsTo
/**
* DONT USE!
@@ -159,21 +195,13 @@ module Stages {
or
any(DataFlowPublic::Node node).hasLocationInfo(_, _, _, _, _)
or
DataFlowDispatch::resolveCall(_, _, _)
or
DataFlowDispatch::getCallArg(_, _, _, _, _)
or
any(LocalSources::LocalSourceNode n).flowsTo(_)
or
exists(Awaited::awaited(_))
or
PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
or
exists(TypeObject::Object a)
or
exists(TObject::TObject f)
or
exists(any(Flow::ControlFlowNode c).toString())
or
exists(any(ObjectInternal::ObjectInternal o).toString())
or
PointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
}
}
}

View File

@@ -216,7 +216,7 @@ class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject {
override Builtin getBuiltin() { this = TBuiltinOpaqueObject(result) }
override string toString() {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
result = this.getBuiltin().getClass().getName() + " object"
}

View File

@@ -318,7 +318,7 @@ module BaseFlow {
predicate scope_entry_value_transfer_from_earlier(
EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope
) {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
exists(SsaSourceVariable var |
essa_var_scope(var, pred_scope, pred_var) and
scope_entry_def_scope(var, succ_scope, succ_def)

View File

@@ -2566,7 +2566,7 @@ module AttributePointsTo {
predicate variableAttributePointsTo(
EssaVariable var, Context context, string name, ObjectInternal value, CfgOrigin origin
) {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
definitionAttributePointsTo(var.getDefinition(), context, name, value, origin)
or
exists(EssaVariable prev |

View File

@@ -57,16 +57,43 @@ module CleartextLogging {
/** A piece of data printed, considered as a flow sink. */
class PrintedDataAsSink extends Sink {
PrintedDataAsSink() {
this = API::builtin("print").getACall().getArg(_)
or
// special handling of writing to `sys.stdout` and `sys.stderr`, which is
// essentially the same as printing
this =
API::moduleImport("sys")
.getMember(["stdout", "stderr"])
.getMember("write")
.getACall()
.getArg(0)
(
this = API::builtin("print").getACall().getArg(_)
or
// special handling of writing to `sys.stdout` and `sys.stderr`, which is
// essentially the same as printing
this =
API::moduleImport("sys")
.getMember(["stdout", "stderr"])
.getMember("write")
.getACall()
.getArg(0)
) and
// since some of the inner error handling implementation of the logging module is
// ```py
// sys.stderr.write('Message: %r\n'
// 'Arguments: %s\n' % (record.msg,
// record.args))
// ```
// any time we would report flow to such a logging sink, we can ALSO report
// the flow to the `record.msg`/`record.args` sinks -- obviously we
// don't want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `logging.info` calls in the following example
// due to use-use flow
// ```py
// logging.info(user_controlled)
// logging.info(user_controlled)
// ```
//
// The same approach is used in the command injection query.
not exists(Module loggingInit |
loggingInit.getName() = "logging.__init__" and
this.getScope().getEnclosingModule() = loggingInit and
// do allow this call if we're analyzing logging/__init__.py as part of CPython though
not exists(loggingInit.getFile().getRelativePath())
)
}
}
}

View File

@@ -50,7 +50,34 @@ module CleartextStorage {
/** The data written to a file, considered as a flow sink. */
class FileWriteDataAsSink extends Sink {
FileWriteDataAsSink() { this = any(FileSystemWriteAccess write).getADataNode() }
FileWriteDataAsSink() {
this = any(FileSystemWriteAccess write).getADataNode() and
// since implementation of Path.write_bytes in pathlib.py is like
// ```py
// def write_bytes(self, data):
// with self.open(mode='wb') as f:
// return f.write(data)
// ```
// any time we would report flow to the `Path.write_bytes` sink, we can ALSO report
// the flow from the `data` parameter to the `f.write` sink -- obviously we
// don't want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `p.write` calls in the following example
// due to use-use flow
// ```py
// p.write(user_controlled)
// p.write(user_controlled)
// ```
//
// The same approach is used in the command injection query.
not exists(Module pathlib |
pathlib.getName() = "pathlib" and
this.getScope().getEnclosingModule() = pathlib and
// do allow this call if we're analyzing pathlib.py as part of CPython though
not exists(pathlib.getFile().getRelativePath())
)
}
}
/** The data written to a cookie on a HTTP response, considered as a flow sink. */

View File

@@ -76,6 +76,9 @@ module CommandInjection {
// `subprocess`. See:
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
//
// The same approach is used in the path-injection, cleartext-storage, and
// cleartext-logging queries.
not this.getScope().getEnclosingModule().getName() in [
"os", "subprocess", "platform", "popen2"
]

View File

@@ -58,7 +58,33 @@ module PathInjection {
* A file system access, considered as a flow sink.
*/
class FileSystemAccessAsSink extends Sink {
FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
FileSystemAccessAsSink() {
this = any(FileSystemAccess e).getAPathArgument() and
// since implementation of Path.open in pathlib.py is like
// ```py
// def open(self, ...):
// return io.open(self, ...)
// ```
// any time we would report flow to the `path_obj.open` sink, we can ALSO report
// the flow from the `self` parameter to the `io.open` sink -- obviously we
// don't want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `p.open` calls in the following example
// due to use-use flow
// ```py
// p.open()
// p.open()
// ```
//
// The same approach is used in the command injection query.
not exists(Module pathlib |
pathlib.getName() = "pathlib" and
this.getScope().getEnclosingModule() = pathlib and
// do allow this call if we're analyzing pathlib.py as part of CPython though
not exists(pathlib.getFile().getRelativePath())
)
}
}
private import semmle.python.frameworks.data.ModelsAsData

View File

@@ -41,7 +41,32 @@ module StackTraceExposure {
/**
* A source of exception info, considered as a flow source.
*/
class ExceptionInfoAsSource extends Source instanceof ExceptionInfo { }
class ExceptionInfoAsSource extends Source instanceof ExceptionInfo {
ExceptionInfoAsSource() {
// since `traceback.format_exc()` in Python 2 is internally implemented as
// ```py
// def format_exc(limit=None):
// """Like print_exc() but return a string."""
// try:
// etype, value, tb = sys.exc_info()
// return ''.join(format_exception(etype, value, tb, limit))
// finally:
// etype = value = tb = None
// ```
// any time we would report flow to such from a call to format_exc, we can ALSO report
// the flow from the `sys.exc_info()` source -- obviously we don't want that.
//
//
// To avoid this, we use the same approach as for sinks in the command injection
// query (and others).
not exists(Module traceback |
traceback.getName() = "traceback" and
this.getScope().getEnclosingModule() = traceback and
// do allow this call if we're analyzing traceback.py as part of CPython though
not exists(traceback.getFile().getRelativePath())
)
}
}
/**
* The body of a HTTP response that will be returned from a server, considered as a flow sink.

View File

@@ -5,7 +5,7 @@ private import semmle.python.internal.CachedStages
cached
private predicate is_an_object(@py_object obj) {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
/* CFG nodes for numeric literals, all of which have a @py_cobject for the value of that literal */
obj instanceof ControlFlowNode and
not obj.(ControlFlowNode).getNode() instanceof IntegerLiteral and
@@ -78,7 +78,7 @@ class Object extends @py_object {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
this.hasOrigin() and
this.getOrigin()
.getLocation()
@@ -98,7 +98,7 @@ class Object extends @py_object {
/** Gets a textual representation of this element. */
cached
string toString() {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
not this = undefinedVariable() and
not this = unknownValue() and
exists(ClassObject type | type.asBuiltin() = this.asBuiltin().getClass() |

View File

@@ -1,48 +1,36 @@
/**
* Definitions for reasoning about untrusted data used in APIs defined outside the
* database.
* user-written code.
*/
import python
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
private import semmle.python.types.Builtins
private import semmle.python.objects.ObjectInternal
// IMPLEMENTATION NOTES:
//
// This query uses *both* the new data-flow library, and points-to. Why? To get this
// finished quickly, so it can provide value for our field team and ourselves.
//
// In the long run, it should not need to use points-to for anything. Possibly this can
// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
// library to be fully operational.
//
// At least it will allow us to provide a baseline comparison against a solution that
// doesn't use points-to at all
//
// There is a few dirty things we do here:
// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
// publicly, but we really want access to them.
// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
// `DataFlowCallable`
// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
// really wanted to polish our points-to implementation, we could move this
// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
// probably require some more work: for this query, it's totally ok to use
// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
// figure out if that is desirable in general. I simply skipped a corner here!
// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
/**
* A callable that is considered a "safe" external API from a security perspective.
* An external API that is considered "safe" from a security perspective.
*/
class SafeExternalApi extends Unit {
/** Gets a callable that is considered a "safe" external API from a security perspective. */
abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
/**
* Gets a call that is considered "safe" from a security perspective. You can use API
* graphs to find calls to functions you know are safe.
*
* Which works even when the external library isn't extracted.
*/
abstract DataFlow::CallCfgNode getSafeCall();
/**
* Gets a callable that is considered a "safe" external API from a security
* perspective.
*
* You probably want to define this as `none()` and use `getSafeCall` instead, since
* that can handle the external library not being extracted.
*/
DataFlowPrivate::DataFlowCallable getSafeCallable() { none() }
}
/** DEPRECATED: Alias for SafeExternalApi */
@@ -50,42 +38,127 @@ deprecated class SafeExternalAPI = SafeExternalApi;
/** The default set of "safe" external APIs. */
private class DefaultSafeExternalApi extends SafeExternalApi {
override DataFlowPrivate::DataFlowCallable getSafeCallable() {
exists(CallableValue cv | cv = result.getCallableValue() |
cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
or
exists(ClassValue cls, string attr |
cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"]
|
cls.lookup(attr) = cv
)
override DataFlow::CallCfgNode getSafeCall() {
result =
API::builtin([
"len", "enumerate", "isinstance", "getattr", "hasattr", "bool", "float", "int", "repr",
"str", "type"
]).getACall()
}
}
/**
* Gets a human readable representation of `node`.
*
* Note that this is only defined for API nodes that are allowed as external APIs,
* so `None.json.dumps` will for example not be allowed.
*/
string apiNodeToStringRepr(API::Node node) {
node = API::builtin(result)
or
node = API::moduleImport(result)
or
exists(API::Node base, string basename |
base.getDepth() < node.getDepth() and
basename = apiNodeToStringRepr(base) and
not base = API::builtin(["None", "True", "False"])
|
exists(string m | node = base.getMember(m) | result = basename + "." + m)
or
node = base.getReturn() and
result = basename + "()" and
not base.getACall() = any(SafeExternalApi safe).getSafeCall()
or
node = base.getAwaited() and
result = basename
)
}
predicate resolvedCall(CallNode call) {
DataFlowPrivate::resolveCall(call, _, _) or
DataFlowPrivate::resolveClassCall(call, _)
}
newtype TInterestingExternalApiCall =
TUnresolvedCall(DataFlow::CallCfgNode call) {
exists(call.getLocation().getFile().getRelativePath()) and
not resolvedCall(call.getNode()) and
not call = any(SafeExternalApi safe).getSafeCall()
} or
TResolvedCall(DataFlowPrivate::DataFlowCall call) {
exists(call.getLocation().getFile().getRelativePath()) and
exists(call.getCallable()) and
not call.getCallable() = any(SafeExternalApi safe).getSafeCallable() and
// ignore calls inside codebase, and ignore calls that are marked as safe. This is
// only needed as long as we extract dependencies. When we stop doing that, all
// targets of resolved calls will be from user-written code.
not exists(call.getCallable().getLocation().getFile().getRelativePath()) and
not exists(DataFlow::CallCfgNode callCfgNode | callCfgNode.getNode() = call.getNode() |
any(SafeExternalApi safe).getSafeCall() = callCfgNode
)
}
abstract class InterestingExternalApiCall extends TInterestingExternalApiCall {
/** Gets the argument at position `apos`, if any */
abstract DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos);
/** Gets a textual representation of this element. */
abstract string toString();
/**
* Gets a human-readable name for the external API.
*/
abstract string getApiName();
}
class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
DataFlow::CallCfgNode call;
UnresolvedCall() { this = TUnresolvedCall(call) }
override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
exists(int i | apos.isPositional(i) | result = call.getArg(i))
or
exists(string name | apos.isKeyword(name) | result = call.getArgByName(name))
}
override string toString() {
result = "ExternalAPI:UnresolvedCall: " + call.getNode().getNode().toString()
}
override string getApiName() {
exists(API::Node apiNode |
result = apiNodeToStringRepr(apiNode) and
apiNode.getACall() = call
)
}
}
class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
DataFlowPrivate::DataFlowCall dfCall;
ResolvedCall() { this = TResolvedCall(dfCall) }
override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
result = dfCall.getArgument(apos)
}
override string toString() {
result = "ExternalAPI:ResolvedCall: " + dfCall.getNode().getNode().toString()
}
override string getApiName() {
exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
result = apiNodeToStringRepr(apiNode) and
apiNode.getACall() = call
)
}
}
/** A node representing data being passed to an external API through a call. */
class ExternalApiDataNode extends DataFlow::Node {
DataFlowPrivate::DataFlowCallable callable;
int i;
ExternalApiDataNode() {
exists(DataFlowPrivate::DataFlowCall call |
exists(call.getLocation().getFile().getRelativePath())
|
callable = call.getCallable() and
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
this = call.getArg(i)
) and
not any(SafeExternalApi safe).getSafeCallable() = callable and
exists(Value cv | cv = callable.getCallableValue() |
cv.isAbsent()
or
cv.isBuiltin()
or
cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
or
not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
) and
exists(InterestingExternalApiCall call | this = call.getArgument(_)) and
// Not already modeled as a taint step
not TaintTrackingPrivate::defaultAdditionalTaintStep(this, _) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
@@ -95,12 +168,6 @@ class ExternalApiDataNode extends DataFlow::Node {
TaintTrackingPrivate::defaultAdditionalTaintStep(_, post)
)
}
/** Gets the index for the parameter that will receive this untrusted data */
int getIndex() { result = i }
/** Gets the callable to which this argument is passed. */
DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
}
/** DEPRECATED: Alias for ExternalApiDataNode */
@@ -133,19 +200,26 @@ deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
/** An external API which is used with untrusted data. */
private newtype TExternalApi =
/** An untrusted API method `m` where untrusted data is passed at `index`. */
TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
exists(UntrustedExternalApiDataNode n |
callable = n.getCallable() and
index = n.getIndex()
MkExternalApi(string repr, DataFlowPrivate::ArgumentPosition apos) {
exists(UntrustedExternalApiDataNode ex, InterestingExternalApiCall call |
ex = call.getArgument(apos) and
repr = call.getApiName()
)
}
/** An external API which is used with untrusted data. */
class ExternalApiUsedWithUntrustedData extends TExternalApi {
/** A argument of an external API which is used with untrusted data. */
class ExternalApiUsedWithUntrustedData extends MkExternalApi {
string repr;
DataFlowPrivate::ArgumentPosition apos;
ExternalApiUsedWithUntrustedData() { this = MkExternalApi(repr, apos) }
/** Gets a possibly untrusted use of this external API. */
UntrustedExternalApiDataNode getUntrustedDataNode() {
this = TExternalApiParameter(result.getCallable(), result.getIndex())
exists(InterestingExternalApiCall call |
result = call.getArgument(apos) and
call.getApiName() = repr
)
}
/** Gets the number of untrusted sources used with this external API. */
@@ -154,63 +228,8 @@ class ExternalApiUsedWithUntrustedData extends TExternalApi {
}
/** Gets a textual representation of this element. */
string toString() {
exists(
DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
string indexString
|
this = TExternalApiParameter(callable, index) and
indexString = "param " + index and
exists(CallableValue cv | cv = callable.getCallableValue() |
callableString =
cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
or
not exists(cv.getScope()) and
(
cv instanceof BuiltinFunctionValue and
callableString = pretty_builtin_function_value(cv)
or
cv instanceof BuiltinMethodValue and
callableString = pretty_builtin_method_value(cv)
or
not cv instanceof BuiltinFunctionValue and
not cv instanceof BuiltinMethodValue and
callableString = cv.toString()
)
) and
result = callableString + " [" + indexString + "]"
)
}
string toString() { result = repr + " [" + apos + "]" }
}
/** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */
deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData;
/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
result = prefix_with_module_if_found(b)
)
}
/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
result = prefix_with_module_if_found(cls) + "." + b.getName()
)
or
not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
result = b.getName()
)
}
/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
private string prefix_with_module_if_found(Builtin b) {
exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
result = mod.getName() + "." + b.getName()
)
or
not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
result = b.getName()
}

View File

@@ -11,11 +11,9 @@ relevant for security analysis of this application.</p>
<p>An external API is defined as a call to a method that is not defined in the source
code, and is not modeled as a taint step in the default taint library. External APIs may
be from the Python standard library or dependencies. The query will report the fully qualified name,
along with <code>[param x]</code>, where <code>x</code> indicates the position of
the parameter receiving the untrusted data. Note that for methods and
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
respectively.</p>
be from the Python standard library or dependencies. The query will report the fully
qualified name, along with <code>[position index]</code> or <code>[keyword name]</code>,
to indicate the argument passing the untrusted data.</p>
<p>Note that an excepted sink might not be included in the results, if it also defines a
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.</p>
<p>Note: Compared to the Java version of this query, we currently do not give special
care to methods that are overridden in the source code.</p>
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
</overview>
<recommendation>

View File

@@ -11,11 +11,9 @@ be modeled as either taint steps, or sinks for specific problems.</p>
<p>An external API is defined as a call to a method that is not defined in the source
code, and is not modeled as a taint step in the default taint library. External APIs may
be from the Python standard library or dependencies. The query will report the fully qualified name,
along with <code>[param x]</code>, where <code>x</code> indicates the position of
the parameter receiving the untrusted data. Note that for methods and
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
respectively.</p>
be from the Python standard library or dependencies. The query will report the fully
qualified name, along with <code>[position index]</code> or <code>[keyword name]</code>,
to indicate the argument passing the untrusted data.</p>
<p>Note that an excepted sink might not be included in the results, if it also defines a
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.</p>
<p>Note: Compared to the Java version of this query, we currently do not give special
care to methods that are overridden in the source code.</p>
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
</overview>
<recommendation>

View File

@@ -59,12 +59,11 @@ module InsecureRandomness {
*/
class RandomFnSink extends Sink {
RandomFnSink() {
exists(DataFlowCallable randomFn |
randomFn
.getName()
exists(Function func |
func.getName()
.regexpMatch("(?i).*(gen(erate)?|make|mk|create).*(nonce|salt|pepper|Password).*")
|
this.getEnclosingCallable() = randomFn
this.asExpr().getScope() = func
)
}
}

View File

@@ -1,9 +1,9 @@
/**
* @name Call graph
* @description An edge in the points-to call graph.
* @description An edge in the call graph.
* @kind problem
* @problem.severity recommendation
* @id py/meta/points-to-call-graph
* @id py/meta/call-graph
* @tags meta
* @precision very-low
*/
@@ -12,9 +12,9 @@ import python
import semmle.python.dataflow.new.internal.DataFlowPrivate
import meta.MetaMetrics
from DataFlowCall c, DataFlowCallableValue f
from DataFlowCall call, DataFlowCallable target
where
c.getCallable() = f and
not c.getLocation().getFile() instanceof IgnoredFile and
not f.getScope().getLocation().getFile() instanceof IgnoredFile
select c, "Call to $@", f.getScope(), f.toString()
target = viableCallable(call) and
not call.getLocation().getFile() instanceof IgnoredFile and
not target.getScope().getLocation().getFile() instanceof IgnoredFile
select call, "Call to $@", target.getScope(), target.toString()

View File

@@ -1,16 +1,55 @@
/**
* Provides predicates for measuring the quality of the call graph, that is,
* the number of calls that could be resolved to a callee.
* the number of calls that could be resolved to a target.
*/
import python
import meta.MetaMetrics
newtype TTarget =
TFunction(Function func) or
TClass(Class cls)
class Target extends TTarget {
/** Gets a textual representation of this element. */
abstract string toString();
/** Gets the location of this dataflow call. */
abstract Location getLocation();
/** Whether this target is relevant. */
predicate isRelevant() { exists(this.getLocation().getFile().getRelativePath()) }
}
class TargetFunction extends Target, TFunction {
Function func;
TargetFunction() { this = TFunction(func) }
override string toString() { result = func.toString() }
override Location getLocation() { result = func.getLocation() }
Function getFunction() { result = func }
}
class TargetClass extends Target, TClass {
Class cls;
TargetClass() { this = TClass(cls) }
override string toString() { result = cls.toString() }
override Location getLocation() { result = cls.getLocation() }
Class getClass() { result = cls }
}
/**
* A call that is (possibly) relevant for analysis quality.
* See `IgnoredFile` for details on what is excluded.
*/
class RelevantCall extends Call {
class RelevantCall extends CallNode {
RelevantCall() { not this.getLocation().getFile() instanceof IgnoredFile }
}
@@ -18,12 +57,16 @@ class RelevantCall extends Call {
module PointsToBasedCallGraph {
/** A call that can be resolved by points-to. */
class ResolvableCall extends RelevantCall {
Value callee;
Value targetValue;
ResolvableCall() { callee.getACall() = this.getAFlowNode() }
ResolvableCall() { targetValue.getACall() = this }
/** Gets a resolved callee of this call. */
Value getCallee() { result = callee }
/** Gets a resolved target of this call. */
Target getTarget() {
result.(TargetFunction).getFunction() = targetValue.(CallableValue).getScope()
or
result.(TargetClass).getClass() = targetValue.(ClassValue).getScope()
}
}
/** A call that cannot be resolved by points-to. */
@@ -32,34 +75,79 @@ module PointsToBasedCallGraph {
}
/**
* A call that can be resolved by points-to, where the resolved callee is relevant.
* Relevant callees include:
* - builtins
* - standard library
* A call that can be resolved by points-to, where the resolved target is relevant.
* Relevant targets include:
* - source code of the project
*/
class ResolvableCallRelevantCallee extends ResolvableCall {
ResolvableCallRelevantCallee() {
callee.isBuiltin()
or
exists(File file |
file = callee.(CallableValue).getScope().getLocation().getFile()
or
file = callee.(ClassValue).getScope().getLocation().getFile()
|
file.inStdlib()
or
// part of the source code of the project
exists(file.getRelativePath())
class ResolvableCallRelevantTarget extends ResolvableCall {
ResolvableCallRelevantTarget() {
exists(Target target | target = this.getTarget() |
exists(target.getLocation().getFile().getRelativePath())
)
}
}
/**
* A call that can be resolved by points-to, where the resolved callee is not considered relevant.
* See `ResolvableCallRelevantCallee` for the definition of relevance.
* A call that can be resolved by points-to, where the resolved target is not considered relevant.
* See `ResolvableCallRelevantTarget` for the definition of relevance.
*/
class ResolvableCallIrrelevantCallee extends ResolvableCall {
ResolvableCallIrrelevantCallee() { not this instanceof ResolvableCallRelevantCallee }
class ResolvableCallIrrelevantTarget extends ResolvableCall {
ResolvableCallIrrelevantTarget() { not this instanceof ResolvableCallRelevantTarget }
}
}
/** Provides classes for call-graph resolution by using type-tracking. */
module TypeTrackingBasedCallGraph {
private import semmle.python.dataflow.new.internal.DataFlowDispatch as TT
/** A call that can be resolved by type-tracking. */
class ResolvableCall extends RelevantCall {
ResolvableCall() {
exists(TT::TNormalCall(this, _, _))
or
TT::resolveClassCall(this, _)
}
/** Gets a resolved target of this call. */
Target getTarget() {
exists(TT::DataFlowCall call, TT::CallType ct, Function targetFunc |
call = TT::TNormalCall(this, targetFunc, ct) and
not ct instanceof TT::CallTypeClass and
targetFunc = result.(TargetFunction).getFunction()
)
or
// a TT::TNormalCall only exists when the call can be resolved to a function.
// Since points-to just says the call goes directly to the class itself, and
// type-tracking based wants to resolve this to the constructor, which might not
// exist. So to do a proper comparison, we don't require the call to be resolve to
// a specific function.
TT::resolveClassCall(this, result.(TargetClass).getClass())
}
}
/** A call that cannot be resolved by type-tracking. */
class UnresolvableCall extends RelevantCall {
UnresolvableCall() { not this instanceof ResolvableCall }
}
/**
* A call that can be resolved by type-tracking, where the resolved callee is relevant.
* Relevant targets include:
* - source code of the project
*/
class ResolvableCallRelevantTarget extends ResolvableCall {
ResolvableCallRelevantTarget() {
exists(Target target | target = this.getTarget() |
exists(target.getLocation().getFile().getRelativePath())
)
}
}
/**
* A call that can be resolved by type-tracking, where the resolved target is not considered relevant.
* See `ResolvableCallRelevantTarget` for the definition of relevance.
*/
class ResolvableCallIrrelevantTarget extends ResolvableCall {
ResolvableCallIrrelevantTarget() { not this instanceof ResolvableCallRelevantTarget }
}
}

View File

@@ -11,4 +11,4 @@
import python
import CallGraphQuality
select projectRoot(), count(PointsToBasedCallGraph::ResolvableCallRelevantCallee call)
select projectRoot(), count(PointsToBasedCallGraph::ResolvableCallRelevantTarget call)

View File

@@ -0,0 +1,17 @@
/**
* @name New call graph edge from using type-tracking instead of points-to
* @kind problem
* @problem.severity recommendation
* @id py/meta/type-tracking-call-graph
* @tags meta
* @precision very-low
*/
import python
import CallGraphQuality
from CallNode call, Target target
where
target.isRelevant() and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
select call, "$@ to $@", call, "Call", target, target.toString()

View File

@@ -0,0 +1,18 @@
/**
* @name Missing call graph edge from using type-tracking instead of points-to
* @kind problem
* @problem.severity recommendation
* @id py/meta/call-graph-missing
* @tags meta
* @precision very-low
*/
import python
import CallGraphQuality
from CallNode call, Target target
where
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
select call, "MISSING: $@ to $@", call, "Call", target, target.toString()

View File

@@ -0,0 +1,18 @@
/**
* @name New call graph edge from using type-tracking instead of points-to
* @kind problem
* @problem.severity recommendation
* @id py/meta/call-graph-new
* @tags meta
* @precision very-low
*/
import python
import CallGraphQuality
from CallNode call, Target target
where
target.isRelevant() and
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
select call, "NEW: $@ to $@", call, "Call", target, target.toString()

View File

@@ -0,0 +1,19 @@
/**
* @name New call graph edge from using type-tracking instead of points-to, that is ambiguous
* @kind problem
* @problem.severity recommendation
* @id py/meta/call-graph-new-ambiguous
* @tags meta
* @precision very-low
*/
import python
import CallGraphQuality
from CallNode call, Target target
where
target.isRelevant() and
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target and
1 < count(call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget())
select call, "NEW: $@ to $@", call, "Call", target, target.toString()

View File

@@ -0,0 +1,35 @@
/**
* @name Call graph edge overview from using type-tracking instead of points-to
* @id py/meta/call-graph-overview
* @precision very-low
*/
import python
import CallGraphQuality
from string tag, int c
where
tag = "SHARED" and
c =
count(CallNode call, Target target |
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
)
or
tag = "NEW" and
c =
count(CallNode call, Target target |
target.isRelevant() and
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
)
or
tag = "MISSING" and
c =
count(CallNode call, Target target |
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
)
select tag, c

View File

@@ -0,0 +1,18 @@
/**
* @name Shared call graph edge from using type-tracking instead of points-to
* @kind problem
* @problem.severity recommendation
* @id py/meta/call-graph-shared
* @tags meta
* @precision very-low
*/
import python
import CallGraphQuality
from CallNode call, Target target
where
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
select call, "SHARED: $@ to $@", call, "Call", target, target.toString()

View File

@@ -0,0 +1,47 @@
import python
import semmle.python.dataflow.new.DataFlow::DataFlow
import semmle.python.dataflow.new.internal.DataFlowPrivate
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
// TODO: this should be promoted to be a REAL consistency query by being placed in
// `python/ql/consistency-queries`. For for now it resides here.
private class MyConsistencyConfiguration extends ConsistencyConfiguration {
override predicate argHasPostUpdateExclude(ArgumentNode n) {
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
or
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
}
override predicate reverseReadExclude(Node n) {
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
// parameter, but dataflow-consistency queries should _not_ complain about there not
// being a post-update node for the synthetic `**kwargs` parameter.
n instanceof SynthDictSplatParameterNode
}
override predicate uniqueParameterNodeAtPositionExclude(
DataFlowCallable c, ParameterPosition pos, Node p
) {
// TODO: This can be removed once we solve the overlap of dictionary splat parameters
c.getParameter(pos) = p and
pos.isDictSplat() and
not exists(p.getLocation().getFile().getRelativePath())
}
override predicate uniqueParameterNodePositionExclude(
DataFlowCallable c, ParameterPosition pos, Node p
) {
// For normal parameters that can both be passed as positional arguments or keyword
// arguments, we currently have parameter positions for both cases..
//
// TODO: Figure out how bad breaking this consistency check is
exists(Function func, Parameter param |
c.getScope() = func and
p = parameterNode(param) and
c.getParameter(pos) = p and
param = func.getArg(_) and
param = func.getArgByName(_)
)
}
}

View File

@@ -26,29 +26,30 @@ abstract class RoutingTest extends InlineExpectationsTest {
element = fromNode.toString() and
(
tag = this.flowTag() and
if "\"" + tag + "\"" = this.fromValue(fromNode)
then value = ""
else value = this.fromValue(fromNode)
if "\"" + tag + "\"" = fromValue(fromNode) then value = "" else value = fromValue(fromNode)
or
// only have result for `func` tag if the function where `arg<n>` is used, is
// different from the function name of the call where `arg<n>` was specified as
// an argument
tag = "func" and
value = this.toFunc(toNode) and
not value = this.fromFunc(fromNode)
value = toFunc(toNode) and
not value = fromFunc(fromNode)
)
)
}
pragma[inline]
private string fromValue(DataFlow::Node fromNode) {
result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
}
pragma[inline]
private string fromFunc(DataFlow::ArgumentNode fromNode) {
result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
}
pragma[inline]
private string toFunc(DataFlow::Node toNode) {
result = toNode.getEnclosingCallable().getCallableValue().getScope().getQualifiedName() // TODO: More robust pretty printing?
}
}
pragma[inline]
private string fromValue(DataFlow::Node fromNode) {
result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
}
pragma[inline]
private string fromFunc(DataFlow::ArgumentNode fromNode) {
result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
}
pragma[inline]
private string toFunc(DataFlow::Node toNode) {
result = toNode.getEnclosingCallable().getQualifiedName()
}

View File

@@ -12,13 +12,10 @@ class UnresolvedCallExpectations extends InlineExpectationsTest {
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(CallNode call |
not exists(DataFlowPrivate::DataFlowCall dfc | dfc.getNode() = call |
// For every `CallNode`, there is a `DataFlowCall` in the form of a `NormalCall`.
// It does not really count, as it has some abstract overrides. For instance, it does not
// define `getCallable`, so checking for the existence of this guarantees that we are in a
// properly resolved call.
exists(dfc.getCallable())
not exists(DataFlowPrivate::DataFlowCall dfc |
exists(dfc.getCallable()) and dfc.getNode() = call
) and
not DataFlowPrivate::resolveClassCall(call, _) and
not call = API::builtin(_).getACall().asCfgNode() and
location = call.getLocation() and
tag = "unresolved_call" and

View File

@@ -1,3 +1,4 @@
| file://:0:0:0:0 | parameter 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,4 +1,4 @@
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

View File

@@ -1,4 +1,4 @@
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

View File

@@ -1,8 +1,8 @@
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter 0 of builtins.reversed | file://:0:0:0:0 | parameter 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |
@@ -10,6 +10,7 @@
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:1:1:21 | SynthDictSplatParameterNode | test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
@@ -52,8 +53,10 @@
| test.py:7:1:7:1 | ControlFlowNode for b | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:1:7:1 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:19:7:19 | [post arg] ControlFlowNode for a | test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
| test.py:7:19:7:19 | [post] ControlFlowNode for a | test.py:7:19:7:19 | [post] ControlFlowNode for a |

View File

@@ -1,4 +1,4 @@
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |

View File

@@ -1,12 +1,13 @@
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |
| test.py:0:0:0:0 | SSA variable $ |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x |
@@ -24,7 +25,9 @@
| test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
| test.py:7:19:7:19 | [post] ControlFlowNode for a |

View File

@@ -1,12 +1,13 @@
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |
| test.py:0:0:0:0 | SSA variable $ |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x |
@@ -24,7 +25,9 @@
| test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
| test.py:7:19:7:19 | [post] ControlFlowNode for a |

View File

@@ -0,0 +1,13 @@
| test.py:32:8:32:23 | CrosstalkTestX() | test.py:9:5:9:23 | Function __init__ | test.py:32:8:32:23 | [pre] ControlFlowNode for CrosstalkTestX() | self |
| test.py:33:8:33:23 | CrosstalkTestY() | test.py:21:5:21:23 | Function __init__ | test.py:33:8:33:23 | [pre] ControlFlowNode for CrosstalkTestY() | self |
| test.py:43:1:43:8 | func() | test.py:13:5:13:26 | Function setx | test.py:36:12:36:15 | ControlFlowNode for objx | self |
| test.py:43:1:43:8 | func() | test.py:13:5:13:26 | Function setx | test.py:43:6:43:7 | ControlFlowNode for IntegerLiteral | position 0 |
| test.py:43:1:43:8 | func() | test.py:25:5:25:26 | Function sety | test.py:38:12:38:15 | ControlFlowNode for objy | self |
| test.py:43:1:43:8 | func() | test.py:25:5:25:26 | Function sety | test.py:43:6:43:7 | ControlFlowNode for IntegerLiteral | position 0 |
| test.py:51:1:51:8 | func() | test.py:16:5:16:30 | Function setvalue | test.py:47:12:47:15 | ControlFlowNode for objx | self |
| test.py:51:1:51:8 | func() | test.py:16:5:16:30 | Function setvalue | test.py:51:6:51:7 | ControlFlowNode for IntegerLiteral | position 0 |
| test.py:51:1:51:8 | func() | test.py:28:5:28:30 | Function setvalue | test.py:49:12:49:15 | ControlFlowNode for objy | self |
| test.py:51:1:51:8 | func() | test.py:28:5:28:30 | Function setvalue | test.py:51:6:51:7 | ControlFlowNode for IntegerLiteral | position 0 |
| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:63:12:63:12 | ControlFlowNode for a | self |
| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:70:6:70:7 | ControlFlowNode for IntegerLiteral | position 0 |
| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:70:6:70:7 | ControlFlowNode for IntegerLiteral | self |

View File

@@ -0,0 +1,9 @@
private import python
private import semmle.python.dataflow.new.internal.DataFlowPrivate
private import semmle.python.dataflow.new.internal.DataFlowPublic
from DataFlowCall call, DataFlowCallable callable, ArgumentNode arg, ArgumentPosition apos
where
callable = call.getCallable() and
arg = call.getArgument(apos)
select call, callable, arg, apos

View File

@@ -0,0 +1,24 @@
uniqueEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox

View File

@@ -0,0 +1,2 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=0

View File

@@ -0,0 +1,70 @@
import random
cond = random.randint(0,1) == 1
# ------------------------------------------------------------------------------
# Calling different bound-methods based on conditional
# ------------------------------------------------------------------------------
class CrosstalkTestX:
def __init__(self):
self.x = None
self.y = None
def setx(self, value):
self.x = value
def setvalue(self, value):
self.x = value
class CrosstalkTestY:
def __init__(self):
self.x = None
self.y = None
def sety(self ,value):
self.y = value
def setvalue(self, value):
self.y = value
objx = CrosstalkTestX()
objy = CrosstalkTestY()
if cond:
func = objx.setx
else:
func = objy.sety
# What we're testing for is whether both objects are passed as self to both methods,
# which is wrong.
func(42)
if cond:
func = objx.setvalue
else:
func = objy.setvalue
func(43)
# ------------------------------------------------------------------------------
# Calling methods in different ways
# ------------------------------------------------------------------------------
class A(object):
def foo(self, arg="Default"):
print("A.foo", self, arg)
a = A()
if cond:
func = a.foo # `44` is passed as arg
else:
func = A.foo # `44` is passed as self
# What we're testing for is whether a single call ends up having both `a` and `44` is
# passed as self to `A.foo`, which is wrong.
func(44)

View File

@@ -1,6 +1,6 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.internal.DataFlowPrivate
import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
@@ -8,26 +8,29 @@ class DataFlowCallTest extends InlineExpectationsTest {
DataFlowCallTest() { this = "DataFlowCallTest" }
override string getARelevantTag() {
result in ["call", "qlclass"]
result in ["call", "callType"]
or
result = "arg_" + [0 .. 10]
result = "arg[" + any(DataFlowDispatch::ArgumentPosition pos).toString() + "]"
}
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(DataFlowCall call |
exists(DataFlowDispatch::DataFlowCall call |
location = call.getLocation() and
element = call.toString()
element = call.toString() and
exists(call.getCallable())
|
value = prettyExpr(call.getNode().getNode()) and
tag = "call"
or
value = call.getAQlClass() and
tag = "qlclass"
value = call.(DataFlowDispatch::NormalCall).getCallType().toString() and
tag = "callType"
or
exists(int n, DataFlow::Node arg | arg = call.getArg(n) |
exists(DataFlowDispatch::ArgumentPosition pos, DataFlow::Node arg |
arg = call.getArgument(pos)
|
value = prettyNodeForInlineTest(arg) and
tag = "arg_" + n
tag = "arg[" + pos + "]"
)
)
}

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -0,0 +1,16 @@
# We want to ensure that the __new__ method is considered a classmethod even though it
# doesn't have a decorator. This means that the `cls` parameter should be considered a
# reference to the class (or subclass), and not an instance of the class. We can detect
# this from looking at the arguments passed in the `cls.foo` call. if we see a `self`
# argument, this means it has correct behavior (because we're targeting a classmethod),
# if there is no `self` argument, this means we've only considered `cls` to be a class
# instance, since we don't want to pass that to the `cls` parameter of the classmethod `WithNewImpl.foo`.
class WithNewImpl(object):
def __new__(cls):
print("WithNewImpl.foo")
cls.foo() # $ call=cls.foo() callType=CallTypeClassMethod arg[self]=cls
@classmethod
def foo(cls):
print("WithNewImpl.foo")

View File

@@ -14,24 +14,69 @@ class MyClass(object):
def my_method(self, arg):
pass
def other_method(self):
self.my_method(42) # $ arg[self]=self call=self.my_method(..) callType=CallTypeNormalMethod arg[position 0]=42
self.sm(42) # $ call=self.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
@staticmethod
def sm(arg):
pass
@classmethod
def cm(cls, arg):
pass
@classmethod
def other_classmethod(cls):
cls.cm(42) # $ call=cls.cm(..) callType=CallTypeClassMethod arg[position 0]=42 arg[self]=cls
cls.sm(42) # $ call=cls.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
def __getitem__(self, key):
pass
func(0) # $ call=func(..) arg[position 0]=0 callType=CallTypePlainFunction
func("foo") # $ call=func(..) qlclass=FunctionCall arg_0="foo"
x = MyClass(1) # $ call=MyClass(..) qlclass=ClassCall arg_0=[pre]MyClass(..) arg_1=1
x.my_method(2) # $ call=x.my_method(..) qlclass=MethodCall arg_0=x arg_1=2
x = MyClass(1) # $ call=MyClass(..) arg[self]=[pre]MyClass(..) arg[position 0]=1 callType=CallTypeClass
x.my_method(2) # $ call=x.my_method(..) arg[self]=x arg[position 0]=2 callType=CallTypeNormalMethod
mm = x.my_method
mm(2) # $ call=mm(..) qlclass=MethodCall arg_1=2 MISSING: arg_0=x
x[3] # $ call=x[3] qlclass=SpecialCall arg_0=x arg_1=3
mm(2) # $ call=mm(..) arg[self]=x arg[position 0]=2 callType=CallTypeNormalMethod
MyClass.my_method(x, 2) # $ call=MyClass.my_method(..) arg[position 0]=2 arg[self]=x callType=CallTypeMethodAsPlainFunction
x.sm(3) # $ call=x.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
MyClass.sm(3) # $ call=MyClass.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
x.cm(4) # $ call=x.cm(..) arg[position 0]=4 callType=CallTypeClassMethod
MyClass.cm(4) # $ call=MyClass.cm(..) arg[position 0]=4 arg[self]=MyClass callType=CallTypeClassMethod
x[5] # $ MISSING: call=x[5] arg[self]=x arg[position 0]=5
class Subclass(MyClass):
pass
y = Subclass(1) # $ call=Subclass(..) arg[self]=[pre]Subclass(..) arg[position 0]=1 callType=CallTypeClass
y.my_method(2) # $ call=y.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
mm = y.my_method
mm(2) # $ call=mm(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
Subclass.my_method(y, 2) # $ call=Subclass.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeMethodAsPlainFunction
y.sm(3) # $ call=y.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
Subclass.sm(3) # $ call=Subclass.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
y.cm(4) # $ call=y.cm(..) arg[position 0]=4 callType=CallTypeClassMethod
Subclass.cm(4) # $ call=Subclass.cm(..) arg[self]=Subclass arg[position 0]=4 callType=CallTypeClassMethod
y[5] # $ MISSING: call=y[5] arg[self]=y arg[position 0]=5
try:
# These are included to show how we handle absent things with points-to where
# `mypkg.foo` is a `missing module variable`, but `mypkg.subpkg.bar` is compeltely
# ignored.
# These are included to show whether we have a DataFlowCall for things we can't
# resolve. Both are interesting since with points-to we used to have a DataFlowCall
# for _one_ but not the other
import mypkg
mypkg.foo(42) # $ call=mypkg.foo(..) qlclass=NormalCall
mypkg.subpkg.bar(43) # $ call=mypkg.subpkg.bar(..) qlclass=LibraryCall arg_0=43
mypkg.foo(42)
mypkg.subpkg.bar(43)
except:
pass

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1 +0,0 @@
| test.py:239:27:239:27 | Parameter | There is no `ParameterNode` associated with this parameter. |

View File

@@ -38,6 +38,14 @@ SINK5 = functools.partial(SINK, expected=arg5)
SINK6 = functools.partial(SINK, expected=arg6)
SINK7 = functools.partial(SINK, expected=arg7)
SINK1_F = functools.partial(SINK_F, unexpected=arg1)
SINK2_F = functools.partial(SINK_F, unexpected=arg2)
SINK3_F = functools.partial(SINK_F, unexpected=arg3)
SINK4_F = functools.partial(SINK_F, unexpected=arg4)
SINK5_F = functools.partial(SINK_F, unexpected=arg5)
SINK6_F = functools.partial(SINK_F, unexpected=arg6)
SINK7_F = functools.partial(SINK_F, unexpected=arg7)
def argument_passing(
a,
@@ -64,12 +72,12 @@ def argument_passing(
@expects(7)
def test_argument_passing1():
argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg7 func=argument_passing MISSING: arg2 arg3="arg3 arg4 arg5 arg6
argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 arg6 arg7 func=argument_passing MISSING: arg2 arg3 arg4
@expects(7)
def test_argument_passing2():
argument_passing(arg1, arg2, arg3, f=arg6) #$ arg1 arg2 arg3
argument_passing(arg1, arg2, arg3, f=arg6) #$ arg1 arg2 arg3 arg6
def with_pos_only(a, /, b):
@@ -94,7 +102,7 @@ def with_multiple_kw_args(a, b, c):
def test_multiple_kw_args():
with_multiple_kw_args(b=arg2, c=arg3, a=arg1) #$ arg1 arg2 arg3
with_multiple_kw_args(arg1, *(arg2,), arg3) #$ arg1 MISSING: arg2 arg3
with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 arg3 func=with_multiple_kw_args MISSING:
with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 arg3 func=with_multiple_kw_args
with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ arg1 arg2 arg3 func=with_multiple_kw_args
@@ -112,32 +120,6 @@ def test_default_arguments():
with_default_arguments(**{"c": arg3}) #$ arg3 func=with_default_arguments
# Nested constructor pattern
def grab_foo_bar_baz(foo, **kwargs):
SINK1(foo)
grab_bar_baz(**kwargs)
# It is not possible to pass `bar` into `kwargs`,
# since `bar` is a valid keyword argument.
def grab_bar_baz(bar, **kwargs):
SINK2(bar)
try:
SINK2_F(kwargs["bar"])
except:
print("OK")
grab_baz(**kwargs)
def grab_baz(baz):
SINK3(baz)
@expects(4)
def test_grab():
grab_foo_bar_baz(baz=arg3, bar=arg2, foo=arg1) #$ arg1 arg2 arg3 func=grab_bar_baz func=grab_baz
# All combinations
def test_pos_pos():
def with_pos(a):
@@ -183,7 +165,95 @@ def test_kw_kw():
def test_kw_doublestar():
def with_doublestar(**a):
SINK1(a["a"])
def with_doublestar(**kwargs):
SINK1(kwargs["a"])
with_doublestar(a=arg1) #$ arg1 func=test_kw_doublestar.with_doublestar
def only_kwargs(**kwargs):
SINK1(kwargs["a"])
SINK2(kwargs["b"])
# testing precise content tracking, that content from `a` or `b` does not end up here.
SINK3_F(kwargs["c"])
@expects(3)
def test_kwargs():
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=only_kwargs
only_kwargs(**args)
def mixed(a, **kwargs):
SINK1(a)
try:
SINK1_F(kwargs["a"]) # since 'a' is a keyword argument, it cannot be part of **kwargs
except KeyError:
print("OK")
SINK2(kwargs["b"])
# testing precise content tracking, that content from `a` or `b` does not end up here.
SINK3_F(kwargs["c"])
@expects(4*3)
def test_mixed():
mixed(a=arg1, b=arg2, c="safe") # $ arg1 arg2
args = {"b": arg2, "c": "safe"} # $ arg2 func=mixed
mixed(a=arg1, **args) # $ arg1
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=mixed
mixed(**args)
def starargs_only(*args):
SINK1(args[0])
SINK2(args[1])
SINK3_F(args[2])
@expects(5*3)
def test_only_starargs():
starargs_only(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad2,bad3="arg1" bad1,bad3="arg2"
args = (arg2, "safe") # $ MISSING: arg2
starargs_only(arg1, *args) # $ arg1 SPURIOUS: bad2,bad3="arg1"
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
starargs_only(*args)
empty_args = ()
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
starargs_only(*args, *empty_args)
args = (arg1, arg2, "safe") # $ MISSING: arg1 arg2 func=starargs_only
starargs_only(*empty_args, *args)
def starargs_mixed(a, *args):
SINK1(a)
SINK2(args[0])
SINK3_F(args[1])
@expects(3*8)
def test_stararg_mixed():
starargs_mixed(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad3="arg2"
args = (arg2, "safe") # $ arg2 func=starargs_mixed
starargs_mixed(arg1, *args) # $ arg1
args = (arg1, arg2, "safe")
starargs_mixed(*args) # $ MISSING: arg1 arg2
args = (arg1, arg2, "safe")
more_args = ("foo", "bar")
starargs_mixed(*args, *more_args) # $ MISSING: arg1 arg2
empty_args = ()
# adding first/last
starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 arg2 SPURIOUS: bad3="arg2"
starargs_mixed(*empty_args, arg1, arg2, "safe") # $ MISSING: arg1 arg2
# adding before/after *args
args = (arg2, "safe") # $ arg2 func=starargs_mixed
starargs_mixed(arg1, *args, *empty_args) # $ arg1
args = (arg2, "safe")
starargs_mixed(arg1, *empty_args, *args) # $ arg1 MISSING: arg2

View File

@@ -0,0 +1,63 @@
import sys
import os
import functools
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
arg = "source"
arg1 = "source1"
arg2 = "source2"
arg3 = "source3"
arg4 = "source4"
arg5 = "source5"
arg6 = "source6"
arg7 = "source7"
def SINK_TEST(x, test):
if test(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK(x, expected=arg):
SINK_TEST(x, test=lambda x: x == expected)
def SINK_F(x, unexpected=arg):
SINK_TEST(x, test=lambda x: x != unexpected)
SINK1 = functools.partial(SINK, expected=arg1)
SINK2 = functools.partial(SINK, expected=arg2)
SINK3 = functools.partial(SINK, expected=arg3)
SINK4 = functools.partial(SINK, expected=arg4)
SINK5 = functools.partial(SINK, expected=arg5)
SINK6 = functools.partial(SINK, expected=arg6)
SINK7 = functools.partial(SINK, expected=arg7)
SINK1_F = functools.partial(SINK_F, unexpected=arg1)
SINK2_F = functools.partial(SINK_F, unexpected=arg2)
SINK3_F = functools.partial(SINK_F, unexpected=arg3)
SINK4_F = functools.partial(SINK_F, unexpected=arg4)
SINK5_F = functools.partial(SINK_F, unexpected=arg5)
SINK6_F = functools.partial(SINK_F, unexpected=arg6)
SINK7_F = functools.partial(SINK_F, unexpected=arg7)
def bad_argument_flow_func(arg):
SINK1_F(arg)
def bad_argument_flow_func2(arg):
SINK2(arg)
def test_bad_argument_flow():
# this is just a test to show that the testing setup works
# in the first one, we pretend we expected no flow for arg1
bad_argument_flow_func(arg1) # $ bad1="arg1"
# in the second one, we pretend we wanted flow for arg2 instead
bad_argument_flow_func2(arg1) # $ bad2="arg1"

View File

@@ -9,23 +9,64 @@ class Argument1RoutingTest extends RoutingTest {
override string flowTag() { result = "arg1" }
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
exists(Argument1RoutingConfig cfg | cfg.hasFlow(source, sink))
exists(Argument1ExtraRoutingConfig cfg | cfg.hasFlow(source, sink))
or
exists(ArgumentRoutingConfig cfg |
cfg.hasFlow(source, sink) and
cfg.isArgSource(source, 1) and
cfg.isGoodSink(sink, 1)
)
}
}
/**
* A configuration to check routing of arguments through magic methods.
*/
class Argument1RoutingConfig extends DataFlow::Configuration {
Argument1RoutingConfig() { this = "Argument1RoutingConfig" }
class ArgNumber extends int {
ArgNumber() { this in [1 .. 7] }
}
class ArgumentRoutingConfig extends DataFlow::Configuration {
ArgumentRoutingConfig() { this = "ArgumentRoutingConfig" }
predicate isArgSource(DataFlow::Node node, ArgNumber argNumber) {
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg" + argNumber
}
override predicate isSource(DataFlow::Node node) { this.isArgSource(node, _) }
predicate isGoodSink(DataFlow::Node node, ArgNumber argNumber) {
exists(CallNode call |
call.getFunction().(NameNode).getId() = "SINK" + argNumber and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
predicate isBadSink(DataFlow::Node node, ArgNumber argNumber) {
exists(CallNode call |
call.getFunction().(NameNode).getId() = "SINK" + argNumber + "_F" and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
override predicate isSink(DataFlow::Node node) {
this.isGoodSink(node, _) or this.isBadSink(node, _)
}
/**
* We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
* Use-use flow lets the argument to the first call reach the sink inside the second call,
* making it seem like we handle all cases even if we only handle the last one.
* We make the test honest by preventing flow into source nodes.
*/
override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
}
class Argument1ExtraRoutingConfig extends DataFlow::Configuration {
Argument1ExtraRoutingConfig() { this = "Argument1ExtraRoutingConfig" }
override predicate isSource(DataFlow::Node node) {
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg1"
or
exists(AssignmentDefinition def, DataFlowPrivate::DataFlowCall call |
exists(AssignmentDefinition def, DataFlow::CallCfgNode call |
def.getVariable() = node.(DataFlow::EssaNode).getVar() and
def.getValue() = call.getNode() and
call.getNode().(CallNode).getFunction().(NameNode).getId().matches("With\\_%")
call.getFunction().asCfgNode().(NameNode).getId().matches("With\\_%")
) and
node.(DataFlow::EssaNode).getVar().getName().matches("with\\_%")
}
@@ -46,57 +87,59 @@ class Argument1RoutingConfig extends DataFlow::Configuration {
override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
}
// for argument 2 and up, we use a generic approach. Change `maxNumArgs` below if we
// need to increase the maximum number of arguments.
private int maxNumArgs() { result = 7 }
class RestArgumentRoutingTest extends RoutingTest {
int argNumber;
ArgNumber argNumber;
RestArgumentRoutingTest() {
argNumber in [2 .. maxNumArgs()] and
argNumber > 1 and
this = "Argument" + argNumber + "RoutingTest"
}
override string flowTag() { result = "arg" + argNumber }
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
exists(RestArgumentRoutingConfig cfg | cfg.getArgNumber() = argNumber |
cfg.hasFlow(source, sink)
exists(ArgumentRoutingConfig cfg |
cfg.hasFlow(source, sink) and
cfg.isArgSource(source, argNumber) and
cfg.isGoodSink(sink, argNumber)
)
}
}
/**
* A configuration to check routing of arguments through magic methods.
*/
class RestArgumentRoutingConfig extends DataFlow::Configuration {
int argNumber;
/** Bad flow from `arg<n>` to `SINK<N>_F` */
class BadArgumentRoutingTestSinkF extends RoutingTest {
ArgNumber argNumber;
RestArgumentRoutingConfig() {
argNumber in [2 .. maxNumArgs()] and
this = "Argument" + argNumber + "RoutingConfig"
}
BadArgumentRoutingTestSinkF() { this = "BadArgumentRoutingTestSinkF" + argNumber }
/** Gets the argument number this configuration is for. */
int getArgNumber() { result = argNumber }
override string flowTag() { result = "bad" + argNumber }
override predicate isSource(DataFlow::Node node) {
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg" + argNumber
}
override predicate isSink(DataFlow::Node node) {
exists(CallNode call |
call.getFunction().(NameNode).getId() = "SINK" + argNumber and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
exists(ArgumentRoutingConfig cfg |
cfg.hasFlow(source, sink) and
cfg.isArgSource(source, argNumber) and
cfg.isBadSink(sink, argNumber)
)
}
}
/** Bad flow from `arg<n>` to `SINK<M>` or `SINK<M>_F`, where `n != m`. */
class BadArgumentRoutingTestWrongSink extends RoutingTest {
ArgNumber argNumber;
BadArgumentRoutingTestWrongSink() { this = "BadArgumentRoutingTestWrongSink" + argNumber }
override string flowTag() { result = "bad" + argNumber }
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
exists(ArgumentRoutingConfig cfg |
cfg.hasFlow(source, sink) and
cfg.isArgSource(source, any(ArgNumber i | not i = argNumber)) and
(
cfg.isGoodSink(sink, argNumber)
or
cfg.isBadSink(sink, argNumber)
)
)
}
/**
* We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
* Use-use flow lets the argument to the first call reach the sink inside the second call,
* making it seem like we handle all cases even if we only handle the last one.
* We make the test honest by preventing flow into source nodes.
*/
override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
}

View File

@@ -506,7 +506,7 @@ class With_call:
def test_call():
with_call = With_call() #$ MISSING: arg1="SSA variable with_call" func=With_call.__call__
with_call = With_call() #$ arg1="SSA variable with_call" func=With_call.__call__
with_call()
@@ -560,9 +560,9 @@ class With_getitem:
def test_getitem():
with_getitem = With_getitem() #$ arg1="SSA variable with_getitem" func=With_getitem.__getitem__
with_getitem = With_getitem() #$ MISSING: arg1="SSA variable with_getitem" func=With_getitem.__getitem__
arg2 = 0
with_getitem[arg2] #$ arg2 func=With_getitem.__getitem__
with_getitem[arg2] #$ MISSING: arg2 func=With_getitem.__getitem__
# object.__setitem__(self, key, value)
@@ -575,10 +575,10 @@ class With_setitem:
def test_setitem():
with_setitem = With_setitem() #$ arg1="SSA variable with_setitem" func=With_setitem.__setitem__
with_setitem = With_setitem() #$ MISSING: arg1="SSA variable with_setitem" func=With_setitem.__setitem__
arg2 = 0
arg3 = ""
with_setitem[arg2] = arg3 #$ arg2 arg3 func=With_setitem.__setitem__
with_setitem[arg2] = arg3 #$ MISSING: arg2 arg3 func=With_setitem.__setitem__
# object.__delitem__(self, key)
@@ -590,9 +590,9 @@ class With_delitem:
def test_delitem():
with_delitem = With_delitem() #$ arg1="SSA variable with_delitem" func=With_delitem.__delitem__
with_delitem = With_delitem() #$ MISSING: arg1="SSA variable with_delitem" func=With_delitem.__delitem__
arg2 = 0
del with_delitem[arg2] #$ arg2 func=With_delitem.__delitem__
del with_delitem[arg2] #$ MISSING: arg2 func=With_delitem.__delitem__
# object.__missing__(self, key)
@@ -662,9 +662,9 @@ class With_add:
def test_add():
with_add = With_add() #$ arg1="SSA variable with_add" func=With_add.__add__
with_add = With_add() #$ MISSING: arg1="SSA variable with_add" func=With_add.__add__
arg2 = with_add
with_add + arg2 #$ arg2 func=With_add.__add__
with_add + arg2 #$ MISSING: arg2 func=With_add.__add__
# object.__sub__(self, other)
@@ -677,9 +677,9 @@ class With_sub:
def test_sub():
with_sub = With_sub() #$ arg1="SSA variable with_sub" func=With_sub.__sub__
with_sub = With_sub() #$ MISSING: arg1="SSA variable with_sub" func=With_sub.__sub__
arg2 = with_sub
with_sub - arg2 #$ arg2 func=With_sub.__sub__
with_sub - arg2 #$ MISSING: arg2 func=With_sub.__sub__
# object.__mul__(self, other)
@@ -692,9 +692,9 @@ class With_mul:
def test_mul():
with_mul = With_mul() #$ arg1="SSA variable with_mul" func=With_mul.__mul__
with_mul = With_mul() #$ MISSING: arg1="SSA variable with_mul" func=With_mul.__mul__
arg2 = with_mul
with_mul * arg2 #$ arg2 func=With_mul.__mul__
with_mul * arg2 #$ MISSING: arg2 func=With_mul.__mul__
# object.__matmul__(self, other)
@@ -707,9 +707,9 @@ class With_matmul:
def test_matmul():
with_matmul = With_matmul() #$ arg1="SSA variable with_matmul" func=With_matmul.__matmul__
with_matmul = With_matmul() #$ MISSING: arg1="SSA variable with_matmul" func=With_matmul.__matmul__
arg2 = with_matmul
with_matmul @ arg2 #$ arg2 func=With_matmul.__matmul__
with_matmul @ arg2 #$ MISSING: arg2 func=With_matmul.__matmul__
# object.__truediv__(self, other)
@@ -722,9 +722,9 @@ class With_truediv:
def test_truediv():
with_truediv = With_truediv() #$ arg1="SSA variable with_truediv" func=With_truediv.__truediv__
with_truediv = With_truediv() #$ MISSING: arg1="SSA variable with_truediv" func=With_truediv.__truediv__
arg2 = with_truediv
with_truediv / arg2 #$ arg2 func=With_truediv.__truediv__
with_truediv / arg2 #$ MISSING: arg2 func=With_truediv.__truediv__
# object.__floordiv__(self, other)
@@ -737,9 +737,9 @@ class With_floordiv:
def test_floordiv():
with_floordiv = With_floordiv() #$ arg1="SSA variable with_floordiv" func=With_floordiv.__floordiv__
with_floordiv = With_floordiv() #$ MISSING: arg1="SSA variable with_floordiv" func=With_floordiv.__floordiv__
arg2 = with_floordiv
with_floordiv // arg2 #$ arg2 func=With_floordiv.__floordiv__
with_floordiv // arg2 #$ MISSING: arg2 func=With_floordiv.__floordiv__
# object.__mod__(self, other)
@@ -752,9 +752,9 @@ class With_mod:
def test_mod():
with_mod = With_mod() #$ arg1="SSA variable with_mod" func=With_mod.__mod__
with_mod = With_mod() #$ MISSING: arg1="SSA variable with_mod" func=With_mod.__mod__
arg2 = with_mod
with_mod % arg2 #$ arg2 func=With_mod.__mod__
with_mod % arg2 #$ MISSING: arg2 func=With_mod.__mod__
# object.__divmod__(self, other)
@@ -788,9 +788,9 @@ def test_pow():
def test_pow_op():
with_pow = With_pow() #$ arg1="SSA variable with_pow" func=With_pow.__pow__
with_pow = With_pow() #$ MISSING: arg1="SSA variable with_pow" func=With_pow.__pow__
arg2 = with_pow
with_pow ** arg2 #$ arg2 func=With_pow.__pow__
with_pow ** arg2 #$ MISSING: arg2 func=With_pow.__pow__
# object.__lshift__(self, other)
@@ -803,9 +803,9 @@ class With_lshift:
def test_lshift():
with_lshift = With_lshift() #$ arg1="SSA variable with_lshift" func=With_lshift.__lshift__
with_lshift = With_lshift() #$ MISSING: arg1="SSA variable with_lshift" func=With_lshift.__lshift__
arg2 = with_lshift
with_lshift << arg2 #$ arg2 func=With_lshift.__lshift__
with_lshift << arg2 #$ MISSING: arg2 func=With_lshift.__lshift__
# object.__rshift__(self, other)
@@ -818,9 +818,9 @@ class With_rshift:
def test_rshift():
with_rshift = With_rshift() #$ arg1="SSA variable with_rshift" func=With_rshift.__rshift__
with_rshift = With_rshift() #$ MISSING: arg1="SSA variable with_rshift" func=With_rshift.__rshift__
arg2 = with_rshift
with_rshift >> arg2 #$ arg2 func=With_rshift.__rshift__
with_rshift >> arg2 #$ MISSING: arg2 func=With_rshift.__rshift__
# object.__and__(self, other)
@@ -833,9 +833,9 @@ class With_and:
def test_and():
with_and = With_and() #$ arg1="SSA variable with_and" func=With_and.__and__
with_and = With_and() #$ MISSING: arg1="SSA variable with_and" func=With_and.__and__
arg2 = with_and
with_and & arg2 #$ arg2 func=With_and.__and__
with_and & arg2 #$ MISSING: arg2 func=With_and.__and__
# object.__xor__(self, other)
@@ -848,9 +848,9 @@ class With_xor:
def test_xor():
with_xor = With_xor() #$ arg1="SSA variable with_xor" func=With_xor.__xor__
with_xor = With_xor() #$ MISSING: arg1="SSA variable with_xor" func=With_xor.__xor__
arg2 = with_xor
with_xor ^ arg2 #$ arg2 func=With_xor.__xor__
with_xor ^ arg2 #$ MISSING: arg2 func=With_xor.__xor__
# object.__or__(self, other)
@@ -863,9 +863,9 @@ class With_or:
def test_or():
with_or = With_or() #$ arg1="SSA variable with_or" func=With_or.__or__
with_or = With_or() #$ MISSING: arg1="SSA variable with_or" func=With_or.__or__
arg2 = with_or
with_or | arg2 #$ arg2 func=With_or.__or__
with_or | arg2 #$ MISSING: arg2 func=With_or.__or__
# object.__radd__(self, other)

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -124,6 +124,40 @@ def test_staticmethod_call():
C.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
# subclass
class SC(C):
pass
sc = SC()
@expects(6)
def test_subclass_method_call():
func_obj = sc.method.__func__
sc.method(arg1, arg2) # $ func=C.method arg1 arg2
SC.method(sc, arg1, arg2) # $ func=C.method arg1 arg2
func_obj(sc, arg1, arg2) # $ MISSING: func=C.method arg1 arg2
@expects(6)
def test_subclass_classmethod_call():
c_func_obj = SC.classmethod.__func__
sc.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
SC.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
c_func_obj(SC, arg1, arg2) # $ MISSING: func=C.classmethod arg1 arg2
@expects(5)
def test_subclass_staticmethod_call():
try:
SC.staticmethod.__func__
except AttributeError:
print("OK")
sc.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
SC.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
# Generator functions
# A function or method which uses the yield statement (see section The yield statement) is called a generator function. Such a function, when called, always returns an iterator object which can be used to execute the body of the function: calling the iterators iterator.__next__() method will cause the function to execute until it provides a value using the yield statement. When the function executes a return statement or falls off the end, a StopIteration exception is raised and the iterator will have reached the end of the set of values to be returned.
def gen(x, count):
@@ -198,5 +232,16 @@ class Customized:
customized = Customized()
SINK(Customized.a) #$ MISSING:flow="SOURCE, l:-8 -> customized.a"
SINK_F(Customized.b)
SINK(customized.a) #$ MISSING:flow="SOURCE, l:-10 -> customized.a"
SINK(customized.a) #$ MISSING: flow="SOURCE, l:-10 -> customized.a"
SINK(customized.b) #$ flow="SOURCE, l:-7 -> customized.b"
class Test2:
def __init__(self, arg):
self.x = SOURCE
self.y = arg
t = Test2(SOURCE)
SINK(t.x) # $ flow="SOURCE, l:-4 -> t.x"
SINK(t.y) # $ flow="SOURCE, l:-2 -> t.y"

View File

@@ -4,5 +4,5 @@ import semmle.python.dataflow.new.DataFlow
from DataFlow::Node nodeFrom, DataFlow::Node nodeTo
where
DataFlow::localFlowStep(nodeFrom, nodeTo) and
nodeFrom.getEnclosingCallable().getName().matches("%\\_with\\_local\\_flow")
nodeFrom.getEnclosingCallable().getQualifiedName().matches("%\\_with\\_local\\_flow")
select nodeFrom, nodeTo

View File

@@ -697,9 +697,16 @@ def test_overflow_iteration():
s = SOURCE
iterate_star_args(NONSOURCE, NONSOURCE, SOURCE, s)
@expects(6)
def test_deep_callgraph():
# port of python/ql/test/library-tests/taint/general/deep.py
# based on the fact that `test_deep_callgraph_defined_in_module` works the problem
# seems to be that we're defining these functions inside another function and that
# the flow of these function definitions DOESN'T flow into the body of the `f<n>`
# functions (they DO flow into the body of `test_deep_callgraph`, otherwise the
# `f1` call wouldn't work).
def f1(arg):
return arg
@@ -720,8 +727,51 @@ def test_deep_callgraph():
x = f6(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
x = f5(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
x = f4(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
x = f3(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
x = f2(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
x = f1(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
def wat_f1(arg):
return arg
def wat_f2(arg):
return wat_f1(arg)
def wat_f3(arg):
return wat_f2(arg)
def wat_f4(arg):
return wat_f3(arg)
def wat_f5(arg):
return wat_f4(arg)
def wat_f6(arg):
return wat_f5(arg)
@expects(6)
def test_deep_callgraph_defined_in_module():
x = wat_f6(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = wat_f5(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = wat_f4(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = wat_f3(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = wat_f2(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = wat_f1(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
@expects(2)
def test_dynamic_tuple_creation_1():
tup = tuple()

View File

@@ -1,24 +1,24 @@
| file://:0:0:0:0 | Function generator_func | generator.py:1:20:1:21 | ControlFlowNode for xs |
| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp |
| file://:0:0:0:0 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for Yield |
| file://:0:0:0:0 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for x |
| file://:0:0:0:0 | Function generator_func | generator.py:2:19:2:19 | ControlFlowNode for x |
| file://:0:0:0:0 | Function generator_func | generator.py:2:24:2:25 | ControlFlowNode for xs |
| file://:0:0:0:0 | Module class_example | class_example.py:1:1:1:3 | ControlFlowNode for wat |
| file://:0:0:0:0 | Module class_example | class_example.py:1:7:1:7 | ControlFlowNode for IntegerLiteral |
| file://:0:0:0:0 | Module class_example | class_example.py:3:1:3:10 | ControlFlowNode for ClassExpr |
| file://:0:0:0:0 | Module class_example | class_example.py:3:7:3:9 | ControlFlowNode for Wat |
| file://:0:0:0:0 | Module class_example | class_example.py:4:5:4:7 | ControlFlowNode for wat |
| file://:0:0:0:0 | Module class_example | class_example.py:4:11:4:11 | ControlFlowNode for IntegerLiteral |
| file://:0:0:0:0 | Module class_example | class_example.py:5:5:5:9 | ControlFlowNode for print |
| file://:0:0:0:0 | Module class_example | class_example.py:5:5:5:26 | ControlFlowNode for print() |
| file://:0:0:0:0 | Module class_example | class_example.py:5:11:5:20 | ControlFlowNode for Str |
| file://:0:0:0:0 | Module class_example | class_example.py:5:23:5:25 | ControlFlowNode for wat |
| file://:0:0:0:0 | Module class_example | class_example.py:7:1:7:5 | ControlFlowNode for print |
| file://:0:0:0:0 | Module class_example | class_example.py:7:1:7:23 | ControlFlowNode for print() |
| file://:0:0:0:0 | Module class_example | class_example.py:7:7:7:17 | ControlFlowNode for Str |
| file://:0:0:0:0 | Module class_example | class_example.py:7:20:7:22 | ControlFlowNode for wat |
| file://:0:0:0:0 | Module generator | generator.py:1:1:1:23 | ControlFlowNode for FunctionExpr |
| file://:0:0:0:0 | Module generator | generator.py:1:5:1:18 | ControlFlowNode for generator_func |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:1:1:1:3 | ControlFlowNode for wat |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:1:7:1:7 | ControlFlowNode for IntegerLiteral |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:3:1:3:10 | ControlFlowNode for ClassExpr |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:3:7:3:9 | ControlFlowNode for Wat |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:4:5:4:7 | ControlFlowNode for wat |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:4:11:4:11 | ControlFlowNode for IntegerLiteral |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:5:5:9 | ControlFlowNode for print |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:5:5:26 | ControlFlowNode for print() |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:11:5:20 | ControlFlowNode for Str |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:23:5:25 | ControlFlowNode for wat |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:1:7:5 | ControlFlowNode for print |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:1:7:23 | ControlFlowNode for print() |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:7:7:17 | ControlFlowNode for Str |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:20:7:22 | ControlFlowNode for wat |
| generator.py:0:0:0:0 | Module generator | generator.py:1:1:1:23 | ControlFlowNode for FunctionExpr |
| generator.py:0:0:0:0 | Module generator | generator.py:1:5:1:18 | ControlFlowNode for generator_func |
| generator.py:1:1:1:23 | Function generator_func | generator.py:1:20:1:21 | ControlFlowNode for xs |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for Yield |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for x |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:19:2:19 | ControlFlowNode for x |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:24:2:25 | ControlFlowNode for xs |

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -84,10 +84,10 @@ def test_indirect_assign_bound_method():
sf = myobj.setFoo
sf(SOURCE)
SINK(myobj.foo) # $ MISSING: flow="SOURCE, l:-1 -> myobj.foo"
SINK(myobj.foo) # $ flow="SOURCE, l:-1 -> myobj.foo"
sf(NONSOURCE)
SINK_F(myobj.foo)
SINK_F(myobj.foo) # $ SPURIOUS: flow="SOURCE, l:-4 -> myobj.foo"
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
@@ -167,6 +167,17 @@ def fields_with_local_flow(x):
def test_fields():
SINK(fields_with_local_flow(SOURCE)) # $ flow="SOURCE -> fields_with_local_flow(..)"
def call_with_source(func):
func(SOURCE)
def test_bound_method_passed_as_arg():
myobj = MyObj(NONSOURCE)
call_with_source(myobj.setFoo)
SINK(myobj.foo) # $ MISSING: flow="SOURCE, l:-5 -> foo.x"
# ------------------------------------------------------------------------------
# Nested Object
# ------------------------------------------------------------------------------
@@ -244,6 +255,9 @@ class CrosstalkTestX:
def setvalue(self, value):
self.x = value
def do_nothing(self, value):
pass
class CrosstalkTestY:
def __init__(self):
@@ -295,10 +309,10 @@ def test_potential_crosstalk_different_name(cond=True):
func(SOURCE)
SINK(objx.x) # $ MISSING: flow="SOURCE, l:-2 -> objx.x"
SINK(objx.x) # $ flow="SOURCE, l:-2 -> objx.x"
SINK_F(objx.y)
SINK_F(objy.x)
SINK(objy.y, not_present_at_runtime=True) # $ MISSING: flow="SOURCE, l:-5 -> objy.y"
SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"
@expects(8) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
@@ -318,10 +332,10 @@ def test_potential_crosstalk_same_name(cond=True):
func(SOURCE)
SINK(objx.x) # $ MISSING: flow="SOURCE, l:-2 -> objx.x"
SINK(objx.x) # $ flow="SOURCE, l:-2 -> objx.x"
SINK_F(objx.y)
SINK_F(objy.x)
SINK(objy.y, not_present_at_runtime=True) # $ MISSING: flow="SOURCE, l:-5 -> objy.y"
SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"
@expects(10) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
@@ -350,6 +364,53 @@ def test_potential_crosstalk_same_name_object_reference(cond=True):
SINK(obj.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-8 -> obj.y"
@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_potential_crosstalk_same_class(cond=True):
objx1 = CrosstalkTestX()
SINK_F(objx1.x)
objx2 = CrosstalkTestX()
SINK_F(objx2.x)
if cond:
func = objx1.setvalue
else:
func = objx2.do_nothing
# We want to ensure that objx2.x does not end up getting tainted, since that would
# be cross-talk between the self arguments are their functions.
func(SOURCE)
SINK(objx1.x) # $ flow="SOURCE, l:-2 -> objx1.x"
SINK_F(objx2.x)
class NewTest(object):
def __new__(cls, arg):
cls.foo = arg
return super().__new__(cls) # $ unresolved_call=super().__new__(..)
@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test__new__():
# we want to make sure that we DON'T pass the synthetic pre-update node for
# the class instance to __new__, like we do for __init__.
nt = NewTest(SOURCE)
# the __new__ implementation sets the foo attribute on THE CLASS itself. The
# attribute lookup on the class instance will go to the class itself when the
# attribute isn't defined on the class instance, so we will actually see `nt.foo`
# contain the source, but the point of this test is that we should see identical
# behavior between NewTest.foo and nt.foo, which we dont!
#
# Also note that we currently (October 2022) dont' model writes to classes very
# well.
SINK(NewTest.foo) # $ MISSING: flow="SOURCE, l:-10 -> NewTest.foo"
SINK(nt.foo) # $ MISSING: flow="SOURCE, l:-11 -> nt.foo"
NewTest.foo = NONSOURCE
SINK_F(NewTest.foo)
SINK_F(nt.foo)
# ------------------------------------------------------------------------------
# Global scope
# ------------------------------------------------------------------------------
@@ -400,7 +461,7 @@ SINK(obj2.foo) # $ flow="SOURCE, l:-1 -> obj2.foo"
# apparently these if statements below makes a difference :O
# but one is not enough
cond = os.urandom(1)[0] > 128
cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
if cond:
pass

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1 +1,2 @@
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,96 +0,0 @@
While working on the field-flow tests, I encountered some very strange behavior. By moving some tests into a new file, they suddenly started working :O
This folder contains the artifacts from investigating this problem, so we can recall the facts (but besides that, don't have much value in itself).
The test files can be found in `src/`, and I have set of a bunch of different tests with different extractor options in the `test-*` folders.
The core of the problem is that in _some_ configuration of extractor options, after seeing the code below, points-to gives up trying to resolve calls :flushed:
```py
import os
cond = os.urandom(1)[0] > 128
if cond:
pass
if cond:
pass
```
This seems to have been caused by not allowing enough imports to be resolved. There is also some interaction with splitting, since turning that off also removes the problem.
But allowing our test to see more imports is more representative of what happens when analyzing real code, so that's the better approach :+1: (and going above 3 does not seem to change anything in this case).
I've thought about whether we can write a query to reliably cases such as this, but I don't see any solutions. However, we can easily try running all our tests with `--max-import-depth=100` and see if anything changes from this.
# Seeing the solutions work
Doing `diff -u -r test-1-normal/ test-5-max-import-depth-3/` shows that all the calls we should be able to resolve, are now resolved properly. and critically this line is added:
```diff
+| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
```
<details>
<summary>full diff</summary>
```diff
diff '--color=auto' -u -r test-1-normal/NormalDataflowTest.expected test-5-max-import-depth-3/NormalDataflowTest.expected
--- test-1-normal/NormalDataflowTest.expected 2022-02-27 10:33:00.603882599 +0100
+++ test-5-max-import-depth-3/NormalDataflowTest.expected 2022-02-28 10:10:08.930743800 +0100
@@ -1,2 +1,3 @@
missingAnnotationOnSink
failures
+| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
diff '--color=auto' -u -r test-1-normal/options test-5-max-import-depth-3/options
--- test-1-normal/options 2022-02-27 10:36:51.124793909 +0100
+++ test-5-max-import-depth-3/options 2022-02-27 11:01:43.908098372 +0100
@@ -1 +1 @@
-semmle-extractor-options: --max-import-depth=1 -R ../src
+semmle-extractor-options: --max-import-depth=3 -R ../src
diff '--color=auto' -u -r test-1-normal/UnresolvedCalls.expected test-5-max-import-depth-3/UnresolvedCalls.expected
--- test-1-normal/UnresolvedCalls.expected 2022-02-28 10:09:19.213742437 +0100
+++ test-5-max-import-depth-3/UnresolvedCalls.expected 2022-02-28 10:10:08.638737921 +0100
@@ -0,0 +1,5 @@
+| ../src/isfile_no_problem.py:34:33:34:70 | Comment # $ unresolved_call=os.path.isfile(..) | Missing result:unresolved_call=os.path.isfile(..) |
+| ../src/urandom_no_if_no_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
+| ../src/urandom_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
+| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
+| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
diff '--color=auto' -u -r test-1-normal/UnresolvedPointsToCalls.expected test-5-max-import-depth-3/UnresolvedPointsToCalls.expected
--- test-1-normal/UnresolvedPointsToCalls.expected 2022-02-28 10:09:19.033738812 +0100
+++ test-5-max-import-depth-3/UnresolvedPointsToCalls.expected 2022-02-28 10:12:48.572752108 +0100
@@ -1,5 +1 @@
-| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
-| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |
-| ../src/urandom_problem.py:42:7:42:16 | ../src/urandom_problem.py:42 | give_src() |
-| ../src/urandom_problem.py:43:1:43:9 | ../src/urandom_problem.py:43 | SINK(..) |
```
</details>
There are no benefit in increasing import depth above 3 for this test-example:
```diff
$ diff -u -r test-4-max-import-depth-100/ test-5-max-import-depth-3/
--- test-4-max-import-depth-100/options 2022-02-28 10:02:09.269071781 +0100
+++ test-5-max-import-depth-3/options 2022-02-27 11:01:43.908098372 +0100
@@ -1 +1 @@
-semmle-extractor-options: --max-import-depth=100 -R ../src
+semmle-extractor-options: --max-import-depth=3 -R ../src
```
Also notice that using import depth 2 actually makes things worse, as we no longer handle the `isfile_no_problem.py` file properly :facepalm: :sweat_smile: NOTE: This was only for Python 3, for Python 2 there was no change :flushed:
```diff
diff '--color=auto' -u -r test-4-max-import-depth-100/NormalDataflowTest.expected test-6-max-import-depth-2/NormalDataflowTest.expected
--- test-4-max-import-depth-100/NormalDataflowTest.expected 2022-02-28 10:10:02.206608379 +0100
+++ test-6-max-import-depth-2/NormalDataflowTest.expected 2022-02-28 10:10:13.882716665 +0100
@@ -1,3 +1,5 @@
missingAnnotationOnSink
+| ../src/isfile_no_problem.py:43:6:43:8 | ../src/isfile_no_problem.py:43 | ERROR, you should add `# $ MISSING: flow` annotation | foo |
failures
+| ../src/isfile_no_problem.py:43:11:43:41 | Comment # $ flow="SOURCE, l:-15 -> foo" | Missing result:flow="SOURCE, l:-15 -> foo" |
| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
```

View File

@@ -1,43 +0,0 @@
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
def give_src():
return SOURCE
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
import os
cond = eval("False")
if cond:
pass
if cond:
pass
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-15 -> foo"

View File

@@ -1,43 +0,0 @@
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
def give_src():
return SOURCE
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
import os
cond = os.path.isfile(__file__) # $ unresolved_call=os.path.isfile(..)
if cond:
pass
if cond:
pass
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-15 -> foo"

View File

@@ -1,43 +0,0 @@
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
def give_src():
return SOURCE
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
import os
cond = 1 + 1 == 2
if cond:
pass
if cond:
pass
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-15 -> foo"

View File

@@ -1,43 +0,0 @@
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
def give_src():
return SOURCE
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
import os
cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
# if cond:
# pass
#
# if cond:
# pass
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-15 -> foo"

View File

@@ -1,43 +0,0 @@
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
def give_src():
return SOURCE
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
# import os
cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
# if cond:
# pass
#
# if cond:
# pass
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-15 -> foo"

View File

@@ -1,43 +0,0 @@
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
def give_src():
return SOURCE
foo = give_src()
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
import os
cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
if cond:
pass
if cond:
pass
foo = give_src() # $ unresolved_call=give_src()
SINK(foo) # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo"

View File

@@ -1,6 +0,0 @@
| ../src/eval_no_problem.py | has splitting |
| ../src/isfile_no_problem.py | has splitting |
| ../src/simple_no_problem.py | has splitting |
| ../src/urandom_no_if_no_problem.py | does not have splitting |
| ../src/urandom_no_import_no_problem.py | does not have splitting |
| ../src/urandom_problem.py | has splitting |

View File

@@ -1,16 +0,0 @@
import python
// this can be quick-eval to see which ones have splitting. But that's basically just
// anything from line 39 and further.
predicate exprWithSplitting(Expr e) {
exists(e.getLocation().getFile().getRelativePath()) and
1 < count(ControlFlowNode cfn | cfn.getNode() = e)
}
from File f, string msg
where
exists(f.getRelativePath()) and
if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
then msg = "has splitting"
else msg = "does not have splitting"
select f.toString(), msg

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.UnresolvedCalls

View File

@@ -1,5 +0,0 @@
| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |
| ../src/urandom_problem.py:42:7:42:16 | ../src/urandom_problem.py:42 | give_src() |
| ../src/urandom_problem.py:43:1:43:9 | ../src/urandom_problem.py:43 | SINK(..) |

View File

@@ -1,10 +0,0 @@
import python
private import semmle.python.dataflow.new.internal.PrintNode
from CallNode call
where
exists(call.getLocation().getFile().getRelativePath()) and
not exists(Value value | call = value.getACall()) and
// somehow print is not resolved, but that is not the focus right now
not call.getFunction().(NameNode).getId() = "print"
select call.getLocation(), prettyExpr(call.getNode())

View File

@@ -1 +0,0 @@
semmle-extractor-options: --lang=3 --max-import-depth=1 -R ../src

View File

@@ -1,3 +0,0 @@
missingAnnotationOnSink
failures
| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.NormalDataflowTest

View File

@@ -1,6 +0,0 @@
| ../src/eval_no_problem.py | does not have splitting |
| ../src/isfile_no_problem.py | does not have splitting |
| ../src/simple_no_problem.py | does not have splitting |
| ../src/urandom_no_if_no_problem.py | does not have splitting |
| ../src/urandom_no_import_no_problem.py | does not have splitting |
| ../src/urandom_problem.py | does not have splitting |

View File

@@ -1,16 +0,0 @@
import python
// this can be quick-eval to see which ones have splitting. But that's basically just
// anything from line 39 and further.
predicate exprWithSplitting(Expr e) {
exists(e.getLocation().getFile().getRelativePath()) and
1 < count(ControlFlowNode cfn | cfn.getNode() = e)
}
from File f, string msg
where
exists(f.getRelativePath()) and
if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
then msg = "has splitting"
else msg = "does not have splitting"
select f.toString(), msg

View File

@@ -1,2 +0,0 @@
| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.UnresolvedCalls

View File

@@ -1,3 +0,0 @@
| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |

View File

@@ -1,10 +0,0 @@
import python
private import semmle.python.dataflow.new.internal.PrintNode
from CallNode call
where
exists(call.getLocation().getFile().getRelativePath()) and
not exists(Value value | call = value.getACall()) and
// somehow print is not resolved, but that is not the focus right now
not call.getFunction().(NameNode).getId() = "print"
select call.getLocation(), prettyExpr(call.getNode())

View File

@@ -1 +0,0 @@
semmle-extractor-options: --lang=3 --dont-split-graph --max-import-depth=1 -R ../src

View File

@@ -1,3 +0,0 @@
missingAnnotationOnSink
failures
| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.NormalDataflowTest

View File

@@ -1,6 +0,0 @@
| ../src/eval_no_problem.py | has splitting |
| ../src/isfile_no_problem.py | has splitting |
| ../src/simple_no_problem.py | has splitting |
| ../src/urandom_no_if_no_problem.py | does not have splitting |
| ../src/urandom_no_import_no_problem.py | does not have splitting |
| ../src/urandom_problem.py | has splitting |

View File

@@ -1,16 +0,0 @@
import python
// this can be quick-eval to see which ones have splitting. But that's basically just
// anything from line 39 and further.
predicate exprWithSplitting(Expr e) {
exists(e.getLocation().getFile().getRelativePath()) and
1 < count(ControlFlowNode cfn | cfn.getNode() = e)
}
from File f, string msg
where
exists(f.getRelativePath()) and
if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
then msg = "has splitting"
else msg = "does not have splitting"
select f.toString(), msg

View File

@@ -1,2 +0,0 @@
| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.UnresolvedCalls

Some files were not shown because too many files have changed in this diff Show More