mirror of
https://github.com/github/codeql.git
synced 2025-12-24 04:36:35 +01:00
Python: Support flow to keyword param from **kwargs arg
When resolving merge conflict after flow-summaries was merged, this is the original commit where I introduced ParameterNodeImpl, so this is the commit where differences in that implementation was committed... I removed TParameterNode, since I could not see we we gain anything from having it.
This commit is contained in:
@@ -199,6 +199,8 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
|
||||
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
|
||||
or
|
||||
ppos.isDictSplat() and result.getParameter() = func.getKwarg()
|
||||
or
|
||||
ppos.isDictSplat() and result = TSynthDictSplatParameterNode(this)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1194,7 +1196,9 @@ abstract class ParameterNodeImpl extends Node {
|
||||
* Holds if this node is the parameter of callable `c` at the
|
||||
* position `ppos`.
|
||||
*/
|
||||
abstract predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos);
|
||||
predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
|
||||
this = c.getParameter(ppos)
|
||||
}
|
||||
}
|
||||
|
||||
/** A parameter for a library callable with a flow summary. */
|
||||
|
||||
@@ -39,6 +39,9 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
|
||||
//--------
|
||||
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
|
||||
|
||||
// =============================================================================
|
||||
// SyntheticPreUpdateNode
|
||||
// =============================================================================
|
||||
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
|
||||
CallNode node;
|
||||
|
||||
@@ -54,6 +57,9 @@ class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
|
||||
override Location getLocation() { result = node.getLocation() }
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// **kwargs (DictSplat) related
|
||||
// =============================================================================
|
||||
/**
|
||||
* A (synthetic) data-flow node that represents all keyword arguments, as if they had
|
||||
* been passed in a `**kwargs` argument.
|
||||
@@ -98,11 +104,70 @@ private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryEle
|
||||
exists(DataFlowCallable callable, ParameterPosition dictSplatPos, ParameterPosition keywordPos |
|
||||
dictSplatPos.isDictSplat() and
|
||||
n = callable.getParameter(dictSplatPos) and
|
||||
not n instanceof SynthDictSplatParameterNode and
|
||||
exists(callable.getParameter(keywordPos)) and
|
||||
keywordPos.isKeyword(c.getKey())
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthetic data-flow node to allow flow to keyword parameters from a `**kwargs` argument.
|
||||
*
|
||||
* Take the code snippet below as an example. Since the call only has a `**kwargs` argument,
|
||||
* with a `**` argument position, we add this synthetic parameter node with `**` parameter position,
|
||||
* and a read step to the `p1` parameter.
|
||||
*
|
||||
* ```py
|
||||
* def foo(p1): ...
|
||||
*
|
||||
* kwargs = {"p1": 42}
|
||||
* foo(**kwargs)
|
||||
* ```
|
||||
*
|
||||
*
|
||||
* Note that this will introduce a bit of redundancy in cases like
|
||||
*
|
||||
* ```py
|
||||
* foo(p1=taint(1), p2=taint(2))
|
||||
* ```
|
||||
*
|
||||
* where direct keyword matching is possible, since we construct a synthesized dict
|
||||
* splat argument (`SynthDictSplatArgumentNode`) at the call site, which means that
|
||||
* `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
|
||||
* nodes (and similarly for `p2`). However, this redundancy is OK since
|
||||
* (a) it means that type-tracking through keyword arguments also works in most cases,
|
||||
* (b) read/store steps can be avoided when direct keyword matching is possible, and
|
||||
* hence access path limits are not a concern, and
|
||||
* (c) since the synthesized nodes are hidden, the reported data-flow paths will be
|
||||
* collapsed anyway.
|
||||
*/
|
||||
class SynthDictSplatParameterNode extends ParameterNodeImpl, TSynthDictSplatParameterNode {
|
||||
DataFlowCallable callable;
|
||||
|
||||
SynthDictSplatParameterNode() { this = TSynthDictSplatParameterNode(callable) }
|
||||
|
||||
override string toString() { result = "SynthDictSplatParameterNode" }
|
||||
|
||||
override Scope getScope() { result = callable.getScope() }
|
||||
|
||||
override Location getLocation() { result = callable.getLocation() }
|
||||
|
||||
override Parameter getParameter() { none() }
|
||||
}
|
||||
|
||||
predicate synthDictSplatParameterNodeReadStep(
|
||||
SynthDictSplatParameterNode nodeFrom, DictionaryElementContent c, ParameterNode nodeTo
|
||||
) {
|
||||
exists(DataFlowCallable callable, ParameterPosition ppos |
|
||||
nodeFrom = TSynthDictSplatParameterNode(callable) and
|
||||
nodeTo = callable.getParameter(ppos) and
|
||||
ppos.isKeyword(c.getKey())
|
||||
)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// PostUpdateNode
|
||||
// =============================================================================
|
||||
abstract class PostUpdateNodeImpl extends Node {
|
||||
/** Gets the node before the state update. */
|
||||
abstract Node getPreUpdateNode();
|
||||
@@ -624,6 +689,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
attributeReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
|
||||
}
|
||||
|
||||
/** Data flows from a sequence to a subscript of the sequence. */
|
||||
@@ -783,6 +850,8 @@ predicate nodeIsHidden(Node n) {
|
||||
n instanceof SummaryParameterNode
|
||||
or
|
||||
n instanceof SynthDictSplatArgumentNode
|
||||
or
|
||||
n instanceof SynthDictSplatParameterNode
|
||||
}
|
||||
|
||||
class LambdaCallKind = Unit;
|
||||
|
||||
@@ -64,7 +64,7 @@ newtype TNode =
|
||||
exists(Class cls, Function func, ParameterDefinition def |
|
||||
func = cls.getAMethod() and
|
||||
not hasStaticmethodDecorator(func) and
|
||||
// this matches what we do in ParameterNode
|
||||
// this matches what we do in ExtractedParameterNode
|
||||
def.getDefiningNode() = node and
|
||||
def.getParameter() = func.getArg(0)
|
||||
)
|
||||
@@ -113,9 +113,12 @@ newtype TNode =
|
||||
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
|
||||
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
|
||||
} or
|
||||
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) }
|
||||
|
||||
class TParameterNode = TCfgNode or TSummaryParameterNode;
|
||||
/** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
|
||||
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
|
||||
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
|
||||
TSynthDictSplatParameterNode(DataFlowCallable callable) {
|
||||
exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
|
||||
}
|
||||
|
||||
/** Helper for `Node::getEnclosingCallable`. */
|
||||
private DataFlowCallable getCallableScope(Scope s) {
|
||||
@@ -292,7 +295,7 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
|
||||
* The value of a parameter at function entry, viewed as a node in a data
|
||||
* flow graph.
|
||||
*/
|
||||
class ParameterNode extends Node, TParameterNode instanceof ParameterNodeImpl {
|
||||
class ParameterNode extends Node instanceof ParameterNodeImpl {
|
||||
/** Gets the parameter corresponding to this node, if any. */
|
||||
final Parameter getParameter() { result = super.getParameter() }
|
||||
}
|
||||
@@ -304,10 +307,6 @@ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
|
||||
|
||||
ExtractedParameterNode() { node = def.getDefiningNode() }
|
||||
|
||||
override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
|
||||
this = c.getParameter(ppos)
|
||||
}
|
||||
|
||||
override Parameter getParameter() { result = def.getParameter() }
|
||||
}
|
||||
|
||||
|
||||
@@ -8,4 +8,12 @@ private class MyConsistencyConfiguration extends ConsistencyConfiguration {
|
||||
override predicate argHasPostUpdateExclude(ArgumentNode n) {
|
||||
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
|
||||
}
|
||||
|
||||
override predicate reverseReadExclude(Node n) {
|
||||
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
|
||||
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
|
||||
// parameter, but dataflow-consistency queries should _not_ complain about there not
|
||||
// being a post-update node for the synthetic `**kwargs` parameter.
|
||||
n instanceof SynthDictSplatParameterNode
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
|
||||
| test.py:1:19:1:19 | ControlFlowNode for x |
|
||||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:1:1:21 | SynthDictSplatParameterNode | test.py:1:1:1:21 | SynthDictSplatParameterNode |
|
||||
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
| test.py:0:0:0:0 | GSSA Variable b |
|
||||
| test.py:0:0:0:0 | SSA variable $ |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
|
||||
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
|
||||
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:19:1:19 | ControlFlowNode for x |
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
| test.py:0:0:0:0 | GSSA Variable b |
|
||||
| test.py:0:0:0:0 | SSA variable $ |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
|
||||
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
|
||||
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:19:1:19 | ControlFlowNode for x |
|
||||
|
||||
@@ -72,7 +72,7 @@ def argument_passing(
|
||||
|
||||
@expects(7)
|
||||
def test_argument_passing1():
|
||||
argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 arg7 func=argument_passing MISSING: arg2 arg3 arg4 arg6
|
||||
argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 arg6 arg7 func=argument_passing MISSING: arg2 arg3 arg4
|
||||
|
||||
|
||||
@expects(7)
|
||||
@@ -102,8 +102,8 @@ def with_multiple_kw_args(a, b, c):
|
||||
def test_multiple_kw_args():
|
||||
with_multiple_kw_args(b=arg2, c=arg3, a=arg1) #$ arg1 arg2 arg3
|
||||
with_multiple_kw_args(arg1, *(arg2,), arg3) #$ arg1 MISSING: arg2 arg3
|
||||
with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 MISSING: arg3
|
||||
with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ MISSING: arg1 arg2 arg3
|
||||
with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 arg3 func=with_multiple_kw_args
|
||||
with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ arg1 arg2 arg3 func=with_multiple_kw_args
|
||||
|
||||
|
||||
def with_default_arguments(a=arg1, b=arg2, c=arg3): #$ arg1 arg2 arg3 func=with_default_arguments
|
||||
@@ -117,7 +117,7 @@ def test_default_arguments():
|
||||
with_default_arguments()
|
||||
with_default_arguments(arg1) #$ arg1
|
||||
with_default_arguments(b=arg2) #$ arg2
|
||||
with_default_arguments(**{"c": arg3}) #$ MISSING: arg3
|
||||
with_default_arguments(**{"c": arg3}) #$ arg3 func=with_default_arguments
|
||||
|
||||
|
||||
# All combinations
|
||||
@@ -198,5 +198,5 @@ def test_mixed():
|
||||
args = {"b": arg2, "c": "safe"} # $ arg2 func=mixed
|
||||
mixed(a=arg1, **args) # $ arg1
|
||||
|
||||
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg2 func=mixed MISSING: arg1
|
||||
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=mixed
|
||||
mixed(**args)
|
||||
|
||||
@@ -393,7 +393,7 @@ def test_call_unpack_iterable():
|
||||
|
||||
|
||||
def test_call_unpack_mapping():
|
||||
SINK(second(NONSOURCE, **{"b": SOURCE})) #$ MISSING: flow="SOURCE -> second(..)"
|
||||
SINK(second(NONSOURCE, **{"b": SOURCE})) #$ flow="SOURCE -> second(..)"
|
||||
|
||||
|
||||
def f_extra_pos(a, *b):
|
||||
@@ -509,7 +509,7 @@ def test_lambda_unpack_mapping():
|
||||
def second(a, b):
|
||||
return b
|
||||
|
||||
SINK(second(NONSOURCE, **{"b": SOURCE})) #$ MISSING: flow="SOURCE -> second(..)"
|
||||
SINK(second(NONSOURCE, **{"b": SOURCE})) #$ flow="SOURCE -> second(..)"
|
||||
|
||||
|
||||
def test_lambda_extra_pos():
|
||||
|
||||
Reference in New Issue
Block a user