Python: Support flow to keyword param from **kwargs arg

When resolving merge conflict after flow-summaries was merged, this is
the original commit where I introduced ParameterNodeImpl, so this is the
commit where differences in that implementation was committed...

I removed TParameterNode, since I could not see we we gain anything from
having it.
This commit is contained in:
Rasmus Wriedt Larsen
2022-09-09 17:05:08 +02:00
parent 215a03d948
commit c687df4ddc
10 changed files with 101 additions and 17 deletions

View File

@@ -199,6 +199,8 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
or
ppos.isDictSplat() and result.getParameter() = func.getKwarg()
or
ppos.isDictSplat() and result = TSynthDictSplatParameterNode(this)
}
}
@@ -1194,7 +1196,9 @@ abstract class ParameterNodeImpl extends Node {
* Holds if this node is the parameter of callable `c` at the
* position `ppos`.
*/
abstract predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos);
predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
this = c.getParameter(ppos)
}
}
/** A parameter for a library callable with a flow summary. */

View File

@@ -39,6 +39,9 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
//--------
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
// =============================================================================
// SyntheticPreUpdateNode
// =============================================================================
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
CallNode node;
@@ -54,6 +57,9 @@ class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
override Location getLocation() { result = node.getLocation() }
}
// =============================================================================
// **kwargs (DictSplat) related
// =============================================================================
/**
* A (synthetic) data-flow node that represents all keyword arguments, as if they had
* been passed in a `**kwargs` argument.
@@ -98,11 +104,70 @@ private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryEle
exists(DataFlowCallable callable, ParameterPosition dictSplatPos, ParameterPosition keywordPos |
dictSplatPos.isDictSplat() and
n = callable.getParameter(dictSplatPos) and
not n instanceof SynthDictSplatParameterNode and
exists(callable.getParameter(keywordPos)) and
keywordPos.isKeyword(c.getKey())
)
}
/**
* A synthetic data-flow node to allow flow to keyword parameters from a `**kwargs` argument.
*
* Take the code snippet below as an example. Since the call only has a `**kwargs` argument,
* with a `**` argument position, we add this synthetic parameter node with `**` parameter position,
* and a read step to the `p1` parameter.
*
* ```py
* def foo(p1): ...
*
* kwargs = {"p1": 42}
* foo(**kwargs)
* ```
*
*
* Note that this will introduce a bit of redundancy in cases like
*
* ```py
* foo(p1=taint(1), p2=taint(2))
* ```
*
* where direct keyword matching is possible, since we construct a synthesized dict
* splat argument (`SynthDictSplatArgumentNode`) at the call site, which means that
* `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
* nodes (and similarly for `p2`). However, this redundancy is OK since
* (a) it means that type-tracking through keyword arguments also works in most cases,
* (b) read/store steps can be avoided when direct keyword matching is possible, and
* hence access path limits are not a concern, and
* (c) since the synthesized nodes are hidden, the reported data-flow paths will be
* collapsed anyway.
*/
class SynthDictSplatParameterNode extends ParameterNodeImpl, TSynthDictSplatParameterNode {
DataFlowCallable callable;
SynthDictSplatParameterNode() { this = TSynthDictSplatParameterNode(callable) }
override string toString() { result = "SynthDictSplatParameterNode" }
override Scope getScope() { result = callable.getScope() }
override Location getLocation() { result = callable.getLocation() }
override Parameter getParameter() { none() }
}
predicate synthDictSplatParameterNodeReadStep(
SynthDictSplatParameterNode nodeFrom, DictionaryElementContent c, ParameterNode nodeTo
) {
exists(DataFlowCallable callable, ParameterPosition ppos |
nodeFrom = TSynthDictSplatParameterNode(callable) and
nodeTo = callable.getParameter(ppos) and
ppos.isKeyword(c.getKey())
)
}
// =============================================================================
// PostUpdateNode
// =============================================================================
abstract class PostUpdateNodeImpl extends Node {
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
@@ -624,6 +689,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
attributeReadStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
or
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
}
/** Data flows from a sequence to a subscript of the sequence. */
@@ -783,6 +850,8 @@ predicate nodeIsHidden(Node n) {
n instanceof SummaryParameterNode
or
n instanceof SynthDictSplatArgumentNode
or
n instanceof SynthDictSplatParameterNode
}
class LambdaCallKind = Unit;

View File

@@ -64,7 +64,7 @@ newtype TNode =
exists(Class cls, Function func, ParameterDefinition def |
func = cls.getAMethod() and
not hasStaticmethodDecorator(func) and
// this matches what we do in ParameterNode
// this matches what we do in ExtractedParameterNode
def.getDefiningNode() = node and
def.getParameter() = func.getArg(0)
)
@@ -113,9 +113,12 @@ newtype TNode =
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
} or
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) }
class TParameterNode = TCfgNode or TSummaryParameterNode;
/** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
TSynthDictSplatParameterNode(DataFlowCallable callable) {
exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
}
/** Helper for `Node::getEnclosingCallable`. */
private DataFlowCallable getCallableScope(Scope s) {
@@ -292,7 +295,7 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
class ParameterNode extends Node, TParameterNode instanceof ParameterNodeImpl {
class ParameterNode extends Node instanceof ParameterNodeImpl {
/** Gets the parameter corresponding to this node, if any. */
final Parameter getParameter() { result = super.getParameter() }
}
@@ -304,10 +307,6 @@ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
ExtractedParameterNode() { node = def.getDefiningNode() }
override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
this = c.getParameter(ppos)
}
override Parameter getParameter() { result = def.getParameter() }
}

View File

@@ -8,4 +8,12 @@ private class MyConsistencyConfiguration extends ConsistencyConfiguration {
override predicate argHasPostUpdateExclude(ArgumentNode n) {
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
}
override predicate reverseReadExclude(Node n) {
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
// parameter, but dataflow-consistency queries should _not_ complain about there not
// being a post-update node for the synthetic `**kwargs` parameter.
n instanceof SynthDictSplatParameterNode
}
}

View File

@@ -1,2 +1,3 @@
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -5,6 +5,7 @@
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:1:1:21 | SynthDictSplatParameterNode | test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

View File

@@ -3,6 +3,7 @@
| test.py:0:0:0:0 | GSSA Variable b |
| test.py:0:0:0:0 | SSA variable $ |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x |

View File

@@ -3,6 +3,7 @@
| test.py:0:0:0:0 | GSSA Variable b |
| test.py:0:0:0:0 | SSA variable $ |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x |

View File

@@ -72,7 +72,7 @@ def argument_passing(
@expects(7)
def test_argument_passing1():
argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 arg7 func=argument_passing MISSING: arg2 arg3 arg4 arg6
argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 arg6 arg7 func=argument_passing MISSING: arg2 arg3 arg4
@expects(7)
@@ -102,8 +102,8 @@ def with_multiple_kw_args(a, b, c):
def test_multiple_kw_args():
with_multiple_kw_args(b=arg2, c=arg3, a=arg1) #$ arg1 arg2 arg3
with_multiple_kw_args(arg1, *(arg2,), arg3) #$ arg1 MISSING: arg2 arg3
with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 MISSING: arg3
with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ MISSING: arg1 arg2 arg3
with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 arg3 func=with_multiple_kw_args
with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ arg1 arg2 arg3 func=with_multiple_kw_args
def with_default_arguments(a=arg1, b=arg2, c=arg3): #$ arg1 arg2 arg3 func=with_default_arguments
@@ -117,7 +117,7 @@ def test_default_arguments():
with_default_arguments()
with_default_arguments(arg1) #$ arg1
with_default_arguments(b=arg2) #$ arg2
with_default_arguments(**{"c": arg3}) #$ MISSING: arg3
with_default_arguments(**{"c": arg3}) #$ arg3 func=with_default_arguments
# All combinations
@@ -198,5 +198,5 @@ def test_mixed():
args = {"b": arg2, "c": "safe"} # $ arg2 func=mixed
mixed(a=arg1, **args) # $ arg1
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg2 func=mixed MISSING: arg1
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=mixed
mixed(**args)

View File

@@ -393,7 +393,7 @@ def test_call_unpack_iterable():
def test_call_unpack_mapping():
SINK(second(NONSOURCE, **{"b": SOURCE})) #$ MISSING: flow="SOURCE -> second(..)"
SINK(second(NONSOURCE, **{"b": SOURCE})) #$ flow="SOURCE -> second(..)"
def f_extra_pos(a, *b):
@@ -509,7 +509,7 @@ def test_lambda_unpack_mapping():
def second(a, b):
return b
SINK(second(NONSOURCE, **{"b": SOURCE})) #$ MISSING: flow="SOURCE -> second(..)"
SINK(second(NONSOURCE, **{"b": SOURCE})) #$ flow="SOURCE -> second(..)"
def test_lambda_extra_pos():