From 3f718123a687bf43d6bdc1c108a23e30f5503548 Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 29 Jan 2026 16:09:43 +0000 Subject: [PATCH] Python: Make capturing closure arguments synthetic and non-global Uses the same trick as for `ExtractedArgumentNode`, wherein we postpone the global restriction on the charpred to instead be in the `argumentOf` predicate (which is global anyway). In addition to this, we also converted `CapturedVariablesArgumentNode` into a proper synthetic node, and added an explicit post-update node for it. These nodes just act as wrappers for the function part of call nodes. Thus, to make them work with the variable capture machinery, we simply map them to the closure node for the corresponding control-flow or post-update node. --- .../new/internal/DataFlowDispatch.qll | 74 ++++++++++++++----- .../dataflow/new/internal/DataFlowPrivate.qll | 8 ++ .../dataflow/new/internal/DataFlowPublic.qll | 14 ++++ .../dataflow/new/internal/VariableCapture.qll | 6 ++ 4 files changed, 82 insertions(+), 20 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index c717cd7bc97..b04b83be83e 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -1714,36 +1714,66 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl * This is also known as the environment part of a closure. * * This is used for tracking flow through captured variables. - * - * TODO: - * We might want a synthetic node here, but currently that incurs problems - * with non-monotonic recursion, because of the use of `resolveCall` in the - * char pred. This may be solvable by using - * `CallGraphConstruction::Make` in stead of - * `CallGraphConstruction::Simple::Make` appropriately. */ -class CapturedVariablesArgumentNode extends CfgNode, ArgumentNode { - CallNode callNode; +class SynthCapturedVariablesArgumentNode extends Node, TSynthCapturedVariablesArgumentNode { + ControlFlowNode callable; - CapturedVariablesArgumentNode() { - node = callNode.getFunction() and - exists(Function target | resolveCall(callNode, target, _) | - target = any(VariableCapture::CapturedVariable v).getACapturingScope() - ) - } + SynthCapturedVariablesArgumentNode() { this = TSynthCapturedVariablesArgumentNode(callable) } + + /** Gets the `CallNode` corresponding to this captured variables argument node. */ + CallNode getCallNode() { result.getFunction() = callable } + + /** Gets the `CfgNode` that corresponds to this synthetic node. */ + CfgNode getUnderlyingNode() { result.asCfgNode() = callable } + + override Scope getScope() { result = callable.getScope() } + + override Location getLocation() { result = callable.getLocation() } override string toString() { result = "Capturing closure argument" } +} +/** A captured variables argument node viewed as an argument node. Needed because `argumentOf` is a global predicate. */ +class CapturedVariablesArgumentNodeAsArgumentNode extends ArgumentNode, + SynthCapturedVariablesArgumentNode +{ override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { - callNode = call.getNode() and - pos.isLambdaSelf() + exists(CallNode callNode | callNode = this.getCallNode() | + callNode = call.getNode() and + exists(Function target | resolveCall(callNode, target, _) | + target = any(VariableCapture::CapturedVariable v).getACapturingScope() + ) and + pos.isLambdaSelf() + ) + } +} + +/** A synthetic node representing the values of captured variables after the output has been computed. */ +class SynthCapturedVariablesArgumentPostUpdateNode extends PostUpdateNodeImpl, + TSynthCapturedVariablesArgumentPostUpdateNode +{ + ControlFlowNode callable; + + SynthCapturedVariablesArgumentPostUpdateNode() { + this = TSynthCapturedVariablesArgumentPostUpdateNode(callable) + } + + /** Gets the `PostUpdateNode` (for a `CfgNode`) that corresponds to this synthetic node. */ + PostUpdateNode getUnderlyingNode() { result.getPreUpdateNode().asCfgNode() = callable } + + override string toString() { result = "[post] Capturing closure argument" } + + override Scope getScope() { result = callable.getScope() } + + override Location getLocation() { result = callable.getLocation() } + + override SynthCapturedVariablesArgumentNode getPreUpdateNode() { + result = TSynthCapturedVariablesArgumentNode(callable) } } /** A synthetic node representing the values of variables captured by a comprehension. */ -class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode, - ArgumentNode -{ +class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode { Comp comp; SynthCompCapturedVariablesArgumentNode() { this = TSynthCompCapturedVariablesArgumentNode(comp) } @@ -1755,7 +1785,11 @@ class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVar override Location getLocation() { result = comp.getLocation() } Comp getComprehension() { result = comp } +} +class SynthCompCapturedVariablesArgumentNodeAsArgumentNode extends SynthCompCapturedVariablesArgumentNode, + ArgumentNode +{ override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { call.(ComprehensionCall).getComprehension() = comp and pos.isLambdaSelf() diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 2322539995b..9866bd00964 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -1128,6 +1128,14 @@ predicate nodeIsHidden(Node n) { n instanceof SynthCaptureNode or n instanceof SynthCapturedVariablesParameterNode + or + n instanceof SynthCapturedVariablesArgumentNode + or + n instanceof SynthCapturedVariablesArgumentPostUpdateNode + or + n instanceof SynthCompCapturedVariablesArgumentNode + or + n instanceof SynthCompCapturedVariablesArgumentPostUpdateNode } class LambdaCallKind = Unit; diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index d14cac5a4cd..532f7b23e4c 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -121,6 +121,20 @@ newtype TNode = f = any(VariableCapture::CapturedVariable v).getACapturingScope() and exists(TFunction(f)) } or + /** + * A synthetic node representing the values of the variables captured + * by the callable being called. + */ + TSynthCapturedVariablesArgumentNode(ControlFlowNode callable) { + callable = any(CallNode c).getFunction() + } or + /** + * A synthetic node representing the values of the variables captured + * by the callable being called, after the output has been computed. + */ + TSynthCapturedVariablesArgumentPostUpdateNode(ControlFlowNode callable) { + callable = any(CallNode c).getFunction() + } or /** A synthetic node representing the values of variables captured by a comprehension. */ TSynthCompCapturedVariablesArgumentNode(Comp comp) { comp.getFunction() = any(VariableCapture::CapturedVariable v).getACapturingScope() diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll b/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll index a7b3b9ceaeb..5ed365a8e56 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll @@ -114,6 +114,12 @@ private Flow::ClosureNode asClosureNode(Node n) { result.(Flow::ExprNode).getExpr().getNode() = comp ) or + // For captured variable argument nodes (and their post-update variants), we use the closure node + // for the underlying node. + result = asClosureNode(n.(SynthCapturedVariablesArgumentNode).getUnderlyingNode()) + or + result = asClosureNode(n.(SynthCapturedVariablesArgumentPostUpdateNode).getUnderlyingNode()) + or // TODO: Should the `Comp`s above be excluded here? result.(Flow::ExprNode).getExpr() = n.(CfgNode).getNode() or