Python: migrate src queries to new shared CFG types + reformat

Migrate 27 queries under python/ql/src/ from legacy CFG types
(CallNode/AttrNode/NameNode/etc.) to the shared-CFG-based 'Cfg::'
namespace, matching the dataflow API surface introduced earlier on
this branch. ModificationOfParameterWithDefaultCustomizations.qll
is rewritten on top of BarrierGuard, removing the last legacy ESSA
dependency in that file. UnguardedNextInGenerator.ql still uses
ESSA and bridges to the new CFG via Cfg::CallNode.getNode().

Also reformat 14 library and query files that had drifted from
the formatter.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
yoff
2026-05-26 21:35:39 +00:00
parent cd703dcf0c
commit 0a9946121b
40 changed files with 172 additions and 138 deletions

View File

@@ -28,7 +28,9 @@ class Expr extends Expr_, AstNode {
/** Whether this expression may have a side effect (as determined purely from its syntax) */
predicate hasSideEffects() {
/* If an exception raised by this expression handled, count that as a side effect */
exists(ControlFlowNode n | n.getNode() = this | n.getASuccessor().getNode() instanceof ExceptStmt)
exists(ControlFlowNode n | n.getNode() = this |
n.getASuccessor().getNode() instanceof ExceptStmt
)
or
this.getASubExpression().hasSideEffects()
}
@@ -94,7 +96,6 @@ class Subscript extends Subscript_ {
}
Expr getObject() { result = Subscript_.super.getValue() }
}
/** A call expression, such as `func(...)` */
@@ -110,7 +111,6 @@ class Call extends Call_ {
override string toString() { result = this.getFunc().toString() + "()" }
/** Gets a tuple (*) argument of this call. */
Expr getStarargs() { result = this.getAPositionalArg().(Starred).getValue() }
@@ -196,7 +196,6 @@ class IfExp extends IfExp_ {
override Expr getASubExpression() {
result = this.getTest() or result = this.getBody() or result = this.getOrelse()
}
}
/** A starred expression, such as the `*rest` in the assignment `first, *rest = seq` */
@@ -405,7 +404,6 @@ class PlaceHolder extends PlaceHolder_ {
override Expr getASubExpression() { none() }
override string toString() { result = "$" + this.getId() }
}
/** A tuple expression such as `( 1, 3, 5, 7, 9 )` */
@@ -472,7 +470,6 @@ class Name extends Name_ {
override string toString() { result = this.getId() }
override predicate isArtificial() {
/* Artificial variable names in comprehensions all start with "." */
this.getId().charAt(0) = "."
@@ -578,7 +575,6 @@ abstract class NameConstant extends Name, ImmutableLiteral {
override predicate isConstant() { any() }
override predicate isArtificial() { none() }
}

View File

@@ -726,7 +726,9 @@ private Py::AstNode assigned_value(Py::Expr lhs) {
exists(Py::Alias a | a.getAsname() = lhs and result = a.getValue())
or
/* lhs += x => result = (lhs + x) */
exists(Py::AugAssign a, Py::BinaryExpr b | b = a.getOperation() and result = b and lhs = b.getLeft())
exists(Py::AugAssign a, Py::BinaryExpr b |
b = a.getOperation() and result = b and lhs = b.getLeft()
)
or
/*
* ..., lhs, ... = ..., result, ...
@@ -868,7 +870,9 @@ class NameNode extends ControlFlowNode {
/** Whether this is a use of a global (including builtin) variable. */
predicate isGlobal() { Scopes::use_of_global_variable(this, _, _) }
predicate isSelf() { exists(Py::SsaVariable selfvar | selfvar.isSelf() and selfvar.getAUse() = this) }
predicate isSelf() {
exists(Py::SsaVariable selfvar | selfvar.isSelf() and selfvar.getAUse() = this)
}
}
/** A control flow node corresponding to a named constant, one of `None`, `True` or `False`. */

View File

@@ -162,7 +162,6 @@ class ImportMember extends ImportMember_ {
string getImportedModuleName() {
result = this.getModule().(ImportExpr).getImportedModuleName() + "." + this.getName()
}
}
/** An import statement */

View File

@@ -287,9 +287,7 @@ predicate isClassmethod(Function func) {
/** Holds if the function `func` has a `property` decorator. */
overlay[local]
predicate hasPropertyDecorator(Function func) {
func.getADecorator().(Name).getId() = "property"
}
predicate hasPropertyDecorator(Function func) { func.getADecorator().(Name).getId() = "property" }
/**
* Holds if the function `func` has a `contextlib.contextmanager`.
@@ -297,9 +295,11 @@ predicate hasPropertyDecorator(Function func) {
overlay[local]
predicate hasContextmanagerDecorator(Function func) {
exists(Cfg::ControlFlowNode contextmanager |
contextmanager.(Cfg::NameNode).getId() = "contextmanager" and contextmanager.(Cfg::NameNode).isGlobal()
contextmanager.(Cfg::NameNode).getId() = "contextmanager" and
contextmanager.(Cfg::NameNode).isGlobal()
or
contextmanager.(Cfg::AttrNode).getObject("contextmanager").(Cfg::NameNode).getId() = "contextlib"
contextmanager.(Cfg::AttrNode).getObject("contextmanager").(Cfg::NameNode).getId() =
"contextlib"
|
func.getADecorator() = contextmanager.getNode()
)
@@ -1348,7 +1348,9 @@ predicate normalCallArg(Cfg::CallNode call, Node arg, ArgumentPosition apos) {
* translated into `l.clear()`, and we can still have use-use flow.
*/
cached
predicate getCallArg(Cfg::CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) {
predicate getCallArg(
Cfg::CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos
) {
Stages::DataFlow::ref() and
resolveCall(call, target, type) and
(
@@ -1441,7 +1443,9 @@ private predicate sameEnclosingCallable(Node node1, Node node2) {
// DataFlowCall
// =============================================================================
newtype TDataFlowCall =
TNormalCall(Cfg::CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or
TNormalCall(Cfg::CallNode call, Function target, CallType type) {
resolveCall(call, target, type)
} or
/** A call to the generated function inside a comprehension */
TComprehensionCall(Comp c) or
TPotentialLibraryCall(Cfg::CallNode call) or

View File

@@ -37,7 +37,9 @@ newtype TNode =
* A node corresponding to a scope entry definition. That is, the value of a variable
* as it enters a scope.
*/
TScopeEntryDefinitionNode(SsaImpl::ScopeEntryDefinition def) { not def.getScope() instanceof Module } or
TScopeEntryDefinitionNode(SsaImpl::ScopeEntryDefinition def) {
not def.getScope() instanceof Module
} or
/**
* A synthetic node representing the value of an object before a state change.
*
@@ -656,11 +658,15 @@ private predicate outcomeOfGuard(
)
or
// Recursive: comparisons against a boolean literal.
exists(Cfg::CompareNode cmpNode, Cmpop op, Cfg::ControlFlowNode otherOperand,
Cfg::ControlFlowNode guardOperand, boolean polarity, boolean cmpBranch
exists(
Cfg::CompareNode cmpNode, Cmpop op, Cfg::ControlFlowNode otherOperand,
Cfg::ControlFlowNode guardOperand, boolean polarity, boolean cmpBranch
|
guardOperand.getNode() = guard.getNode() and
(cmpNode.operands(guardOperand, op, otherOperand) or cmpNode.operands(otherOperand, op, guardOperand)) and
(
cmpNode.operands(guardOperand, op, otherOperand) or
cmpNode.operands(otherOperand, op, guardOperand)
) and
not guard.getNode() instanceof BooleanLiteral and
(
(op instanceof Eq or op instanceof Is) and
@@ -692,7 +698,9 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
result = ParameterizedBarrierGuard<Unit, extendedGuardChecks/4>::getABarrierNode(_)
}
private predicate extendedGuardChecks(GuardNode g, Cfg::ControlFlowNode node, boolean branch, Unit u) {
private predicate extendedGuardChecks(
GuardNode g, Cfg::ControlFlowNode node, boolean branch, Unit u
) {
guardChecks(g, node, branch) and
u = u
}
@@ -790,7 +798,11 @@ newtype TContent =
or
// d["key"] = ...
key =
any(Cfg::SubscriptNode sub | sub.isStore() | sub.getIndex().getNode().(StringLiteral).getText())
any(Cfg::SubscriptNode sub |
sub.isStore()
|
sub.getIndex().getNode().(StringLiteral).getText()
)
or
// d.setdefault("key", ...)
exists(Cfg::CallNode call | call.getFunction().(Cfg::AttrNode).getName() = "setdefault" |

View File

@@ -233,7 +233,8 @@ class UnpackingAssignmentTarget extends Cfg::ControlFlowNode {
}
/** A (possibly recursive) target of an unpacking assignment which is also a sequence. */
class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof Cfg::SequenceNode {
class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof Cfg::SequenceNode
{
Cfg::ControlFlowNode getElement(int i) { result = super.getElement(i) }
Cfg::ControlFlowNode getAnElement() { result = this.getElement(_) }

View File

@@ -97,8 +97,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
return = FlowSummaryImpl::Private::SummaryComponent::return() and
// `result` should be the return value of a callable expression (lambda or function) referenced by `callable`
exists(Return ret |
ret.getScope() =
callable.getALocalSource().asExpr().(CallableExpr).getInnerScope() and
ret.getScope() = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope() and
result.asCfgNode().getNode() = ret.getValue()
)
}
@@ -311,7 +310,9 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
//
// nodeFrom is `expr`
// nodeTo is entry node for `f`
exists(SsaImpl::ScopeEntryDefinition e, SsaImpl::SsaSourceVariable var, Cfg::DefinitionNode def |
exists(
SsaImpl::ScopeEntryDefinition e, SsaImpl::SsaSourceVariable var, Cfg::DefinitionNode def
|
e.getSourceVariable() = var and
def.getNode() = var.getVariable().getAStore()
|

View File

@@ -448,8 +448,7 @@ class TaintTrackingImplementation extends string instanceof TaintTracking::Confi
context = TNoParam() and
src = TTaintTrackingNode_(retval, TNoParam(), path, kind, this) and
node.asCfgNode() = call and
retval.asCfgNode().getNode() =
any(Return ret | ret.getScope() = pyfunc.getScope()).getValue()
retval.asCfgNode().getNode() = any(Return ret | ret.getScope() = pyfunc.getScope()).getValue()
) and
edgeLabel = "return"
}
@@ -471,8 +470,7 @@ class TaintTrackingImplementation extends string instanceof TaintTracking::Confi
this.callContexts(call, src, pyfunc, context, callee) and
retnode = TTaintTrackingNode_(retval, callee, path, kind, this) and
node.asCfgNode() = call and
retval.asCfgNode().getNode() =
any(Return ret | ret.getScope() = pyfunc.getScope()).getValue()
retval.asCfgNode().getNode() = any(Return ret | ret.getScope() = pyfunc.getScope()).getValue()
) and
edgeLabel = "call"
}

View File

@@ -12,6 +12,8 @@
import python
private import semmle.python.ApiGraphs
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.Flow as Flow
API::Node iter() { result = API::builtin("iter") }
@@ -19,17 +21,17 @@ API::Node next() { result = API::builtin("next") }
API::Node stopIteration() { result = API::builtin("StopIteration") }
predicate call_to_iter(CallNode call, EssaVariable sequence) {
call = iter().getACall().asCfgNode() and
predicate call_to_iter(Flow::CallNode call, EssaVariable sequence) {
call.getNode() = iter().getACall().asCfgNode().(Cfg::CallNode).getNode() and
call.getArg(0) = sequence.getAUse()
}
predicate call_to_next(CallNode call, ControlFlowNode iter) {
call = next().getACall().asCfgNode() and
predicate call_to_next(Flow::CallNode call, Flow::ControlFlowNode iter) {
call.getNode() = next().getACall().asCfgNode().(Cfg::CallNode).getNode() and
call.getArg(0) = iter
}
predicate call_to_next_has_default(CallNode call) {
predicate call_to_next_has_default(Flow::CallNode call) {
exists(call.getArg(1)) or exists(call.getArgByName("default"))
}
@@ -49,14 +51,14 @@ predicate iter_not_exhausted(EssaVariable iterator) {
)
}
predicate stop_iteration_handled(CallNode call) {
predicate stop_iteration_handled(Flow::CallNode call) {
exists(Try t |
t.containsInScope(call.getNode()) and
t.getAHandler().getType() = stopIteration().getAValueReachableFromSource().asExpr()
)
}
from CallNode call
from Flow::CallNode call
where
call_to_next(call, _) and
not call_to_next_has_default(call) and

View File

@@ -116,9 +116,7 @@ FunctionValue get_function_or_initializer(Value func_or_cls) {
predicate illegally_named_parameter_objectapi(Call call, Object func, string name) {
not func.isC() and
name = call.getANamedArgumentName() and
exists(ControlFlowNode callCfg | callCfg.getNode() = call |
callCfg = get_a_call_objectapi(func)
) and
exists(ControlFlowNode callCfg | callCfg.getNode() = call | callCfg = get_a_call_objectapi(func)) and
not get_function_or_initializer_objectapi(func).isLegalArgumentName(name)
}

View File

@@ -41,7 +41,9 @@ where
i1 < i2
)
or
exists(ControlFlowNode k1Cfg, ControlFlowNode k2Cfg | k1Cfg.getNode() = k1 and k2Cfg.getNode() = k2 |
exists(ControlFlowNode k1Cfg, ControlFlowNode k2Cfg |
k1Cfg.getNode() = k1 and k2Cfg.getNode() = k2
|
k1Cfg.getBasicBlock().strictlyDominates(k2Cfg.getBasicBlock())
)
)

View File

@@ -98,7 +98,9 @@ private predicate brace_pair(PossibleAdvancedFormatString fmt, int start, int en
}
private predicate advanced_format_call(Call format_expr, PossibleAdvancedFormatString fmt, int args) {
exists(CallNode call, ControlFlowNode fmtCfg | call.getNode() = format_expr and fmtCfg.getNode() = fmt |
exists(CallNode call, ControlFlowNode fmtCfg |
call.getNode() = format_expr and fmtCfg.getNode() = fmt
|
call.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(Value::named("format")) and
call.getArg(0).(ControlFlowNodeWithPointsTo).pointsTo(_, fmtCfg) and
args = count(format_expr.getAnArg()) - 1

View File

@@ -11,8 +11,9 @@
import python
private import semmle.python.ApiGraphs
private import semmle.python.controlflow.internal.Cfg as Cfg
from CallNode call
from Cfg::CallNode call
where
major_version() = 2 and
call = API::builtin("apply").getACall().asCfgNode()

View File

@@ -15,6 +15,7 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.internal.DataFlowDispatch
private import semmle.python.controlflow.internal.Cfg as Cfg
import codeql.util.Option
/** Holds if `base` is overridden by `sub` */
@@ -143,7 +144,7 @@ predicate ignore(Function f) {
/** Gets a function that `call` may resolve to. */
Function resolveCall(Call call) {
exists(DataFlowCall dfc | call = dfc.getNode().(CallNode).getNode() |
exists(DataFlowCall dfc | call = dfc.getNode().(Cfg::CallNode).getNode() |
result = viableCallable(dfc).(DataFlowFunction).getScope()
)
}

View File

@@ -3,6 +3,7 @@
import python
import semmle.python.dataflow.new.internal.DataFlowDispatch
import semmle.python.ApiGraphs
private import semmle.python.controlflow.internal.Cfg as Cfg
/** A CFG node where a file is opened. */
abstract class FileOpenSource extends DataFlow::CfgNode { }
@@ -64,12 +65,14 @@ abstract class FileClose extends DataFlow::CfgNode {
}
}
private predicate bbSuccessor(BasicBlock src, BasicBlock sink) { sink = src.getASuccessor() }
private predicate bbSuccessor(Cfg::BasicBlock src, Cfg::BasicBlock sink) {
sink = src.getASuccessor()
}
private predicate bbReachableStrict(BasicBlock src, BasicBlock sink) =
private predicate bbReachableStrict(Cfg::BasicBlock src, Cfg::BasicBlock sink) =
fastTC(bbSuccessor/2)(src, sink)
private predicate bbReachableRefl(BasicBlock src, BasicBlock sink) {
private predicate bbReachableRefl(Cfg::BasicBlock src, Cfg::BasicBlock sink) {
bbReachableStrict(src, sink) or src = sink
}

View File

@@ -10,6 +10,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
private import semmle.python.controlflow.internal.Cfg as Cfg
/**
* An external API that is considered "safe" from a security perspective.
@@ -71,7 +72,7 @@ string apiNodeToStringRepr(API::Node node) {
)
}
predicate resolvedCall(CallNode call) {
predicate resolvedCall(Cfg::CallNode call) {
DataFlowPrivate::resolveCall(call, _, _) or
DataFlowPrivate::resolveClassCall(call, _)
}

View File

@@ -14,6 +14,7 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.ApiGraphs
private import semmle.python.controlflow.internal.Cfg as Cfg
/*
* Jinja 2 Docs:
@@ -36,8 +37,8 @@ private API::Node jinja2EnvironmentOrTemplate() {
from API::CallNode call
where
call = jinja2EnvironmentOrTemplate().getACall() and
not exists(call.asCfgNode().(CallNode).getNode().getStarargs()) and
not exists(call.asCfgNode().(CallNode).getNode().getKwargs()) and
not exists(call.asCfgNode().(Cfg::CallNode).getNode().getStarargs()) and
not exists(call.asCfgNode().(Cfg::CallNode).getNode().getKwargs()) and
(
not exists(call.getArgByName("autoescape"))
or

View File

@@ -5,6 +5,7 @@
private import python
private import semmle.python.ApiGraphs
private import semmle.python.controlflow.internal.Cfg as Cfg
import TlsLibraryModel
class PyOpenSslContextCreation extends ContextCreation, DataFlow::CallCfgNode {
@@ -37,10 +38,10 @@ class ConnectionCall extends ConnectionCreation, DataFlow::CallCfgNode {
// This cannot be used to unrestrict,
// see https://www.pyopenssl.org/en/stable/api/ssl.html#OpenSSL.SSL.Context.set_options
class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode {
SetOptionsCall() { node.getFunction().(AttrNode).getName() = "set_options" }
SetOptionsCall() { node.getFunction().(Cfg::AttrNode).getName() = "set_options" }
override DataFlow::CfgNode getContext() {
result.getNode() = node.getFunction().(AttrNode).getObject()
result.getNode() = node.getFunction().(Cfg::AttrNode).getObject()
}
override ProtocolVersion getRestriction() {

View File

@@ -5,6 +5,7 @@
private import python
private import semmle.python.ApiGraphs
private import semmle.python.controlflow.internal.Cfg as Cfg
import TlsLibraryModel
class SslContextCreation extends ContextCreation, DataFlow::CallCfgNode {
@@ -53,7 +54,7 @@ class OptionsAugOr extends ProtocolRestriction, DataFlow::CfgNode {
ProtocolVersion restriction;
OptionsAugOr() {
exists(AugAssign aa, AttrNode attr, Expr flag |
exists(AugAssign aa, Cfg::AttrNode attr, Expr flag |
aa.getOperation().getOp() instanceof BitOr and
aa.getTarget() = attr.getNode() and
attr.getName() = "options" and
@@ -80,7 +81,7 @@ class OptionsAugAndNot extends ProtocolUnrestriction, DataFlow::CfgNode {
ProtocolVersion restriction;
OptionsAugAndNot() {
exists(AugAssign aa, AttrNode attr, Expr flag, UnaryExpr notFlag |
exists(AugAssign aa, Cfg::AttrNode attr, Expr flag, UnaryExpr notFlag |
aa.getOperation().getOp() instanceof BitAnd and
aa.getTarget() = attr.getNode() and
attr.getName() = "options" and

View File

@@ -19,6 +19,7 @@ import semmle.python.filters.Tests
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
private import semmle.python.dataflow.new.internal.Builtins::Builtins as Builtins
private import semmle.python.frameworks.data.ModelsAsData
private import semmle.python.controlflow.internal.Cfg as Cfg
bindingset[char, fraction]
predicate fewer_characters_than(StringLiteral str, string char, float fraction) {
@@ -48,7 +49,7 @@ predicate capitalized_word(StringLiteral str) { str.getText().regexpMatch("[A-Z]
predicate format_string(StringLiteral str) { str.getText().matches("%{%}%") }
predicate maybeCredential(ControlFlowNode f) {
predicate maybeCredential(Cfg::ControlFlowNode f) {
/* A string that is not too short and unlikely to be text or an identifier. */
exists(StringLiteral str | str = f.getNode() |
/* At least 10 characters */
@@ -94,9 +95,9 @@ class CredentialSink extends DataFlow::Node {
this.(DataFlow::ArgumentNode).argumentOf(_, pos)
)
or
exists(Keyword k | k.getArg() = name and this.getNode() = k.getValue().asCfgNode())
exists(Keyword k | k.getArg() = name and this.asCfgNode().getNode() = k.getValue())
or
exists(CompareNode cmp, NameNode n | n.getId() = name |
exists(Cfg::CompareNode cmp, Cfg::NameNode n | n.getId() = name |
cmp.operands(this.asCfgNode(), any(Eq eq), n)
or
cmp.operands(n, any(Eq eq), this.asCfgNode())

View File

@@ -13,24 +13,25 @@
import python
private import semmle.python.ApiGraphs
private import semmle.python.controlflow.internal.Cfg as Cfg
predicate originIsLocals(ControlFlowNode n) {
predicate originIsLocals(Cfg::ControlFlowNode n) {
API::builtin("locals").getReturn().getAValueReachableFromSource().asCfgNode() = n
}
predicate modification_of_locals(ControlFlowNode f) {
originIsLocals(f.(SubscriptNode).getObject()) and
predicate modification_of_locals(Cfg::ControlFlowNode f) {
originIsLocals(f.(Cfg::SubscriptNode).getObject()) and
(f.isStore() or f.isDelete())
or
exists(string mname, AttrNode attr |
attr = f.(CallNode).getFunction() and
exists(string mname, Cfg::AttrNode attr |
attr = f.(Cfg::CallNode).getFunction() and
originIsLocals(attr.getObject(mname))
|
mname in ["pop", "popitem", "update", "clear"]
)
}
from AstNode a, ControlFlowNode f
from AstNode a, Cfg::ControlFlowNode f
where
modification_of_locals(f) and
a = f.getNode() and

View File

@@ -14,6 +14,7 @@
import python
private import semmle.python.ApiGraphs
private import semmle.python.controlflow.internal.Cfg as Cfg
predicate func_with_side_effects(Expr e) {
exists(string name | name = e.(Attribute).getName() or name = e.(Name).getId() |
@@ -24,7 +25,7 @@ predicate func_with_side_effects(Expr e) {
}
predicate call_with_side_effect(Call e) {
exists(ControlFlowNode eCfg | eCfg.getNode() = e |
exists(Cfg::ControlFlowNode eCfg | eCfg.getNode() = e |
eCfg =
API::moduleImport("subprocess")
.getMember(["call", "check_call", "check_output"])

View File

@@ -13,8 +13,9 @@
import python
private import semmle.python.ApiGraphs
private import semmle.python.controlflow.internal.Cfg as Cfg
from CallNode call, string name
from Cfg::CallNode call, string name
where
name = ["exit", "quit"] and
call = API::builtin(name).getACall().asCfgNode()

View File

@@ -13,7 +13,8 @@
import python
import Definition
from ListComprehensionDeclaration l, Name use, Name defn, ControlFlowNode lCfg, ControlFlowNode useCfg
from
ListComprehensionDeclaration l, Name use, Name defn, ControlFlowNode lCfg, ControlFlowNode useCfg
where
use = l.getALeakedVariableUse() and
defn = l.getDefinition() and

View File

@@ -21,6 +21,7 @@ import semmle.python.ApiGraphs
import semmle.python.dataflow.new.internal.Attributes
import semmle.python.dataflow.new.BarrierGuards
import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.controlflow.internal.Cfg as Cfg
/**
* Handle those three cases of Tarfile opens:
@@ -75,8 +76,8 @@ private module TarSlipImprovConfig implements DataFlow::ConfigSig {
call = atfo.getReturn().getMember("extractall").getACall() and
arg = call.getArgByName("members") and
if
arg.asCfgNode() instanceof NameConstantNode or
arg.asCfgNode() instanceof ListNode
arg.asCfgNode() instanceof Cfg::NameConstantNode or
arg.asCfgNode() instanceof Cfg::ListNode
then sink = call.getObject()
else
if arg.(MethodCallNode).getMethodName() = "getmembers"

View File

@@ -16,6 +16,7 @@ import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.TaintTracking
private import semmle.python.controlflow.internal.Cfg as Cfg
class PredictableResultSource extends DataFlow::Node {
PredictableResultSource() {
@@ -32,7 +33,9 @@ class PredictableResultSource extends DataFlow::Node {
class TokenAssignmentValueSink extends DataFlow::Node {
TokenAssignmentValueSink() {
exists(string name | name.toLowerCase().matches(["%token", "%code"]) |
exists(DefinitionNode n | n.getValue() = this.asCfgNode() | name = n.(NameNode).getId())
exists(Cfg::DefinitionNode n | n.getValue() = this.asCfgNode() |
name = n.(Cfg::NameNode).getId()
)
or
exists(DataFlow::AttrWrite aw | aw.getValue() = this | name = aw.getAttributeName())
)

View File

@@ -11,25 +11,25 @@
import python
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Flow
import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.controlflow.internal.Cfg as Cfg
/**
* Returns true if the control flow node may be useful in the current context.
*
* Ideally for more completeness, we should alert on every `startswith` call and every remote flow source which gets partailly checked. But, as this can lead to lots of FPs, we apply heuristics to filter some calls. This predicate provides logic for this filteration.
*/
private predicate maybeInteresting(ControlFlowNode c) {
private predicate maybeInteresting(Cfg::ControlFlowNode c) {
// Check if the name of the variable which calls the function matches the heuristic.
// This would typically occur at the sink.
// This should deal with cases like
// `origin.startswith("bla")`
heuristics(c.(CallNode).getFunction().(AttrNode).getObject().(NameNode).getId())
heuristics(c.(Cfg::CallNode).getFunction().(Cfg::AttrNode).getObject().(Cfg::NameNode).getId())
or
// Check if the name of the variable passed as an argument to the functions matches the heuristic. This would typically occur at the sink.
// This should deal with cases like
// `bla.startswith(origin)`
heuristics(c.(CallNode).getArg(0).(NameNode).getId())
heuristics(c.(Cfg::CallNode).getArg(0).(Cfg::NameNode).getId())
or
// Check if the value gets written to any interesting variable. This would typically occur at the source.
// This should deal with cases like
@@ -37,8 +37,10 @@ private predicate maybeInteresting(ControlFlowNode c) {
exists(Variable v | heuristics(v.getId()) | c.getASuccessor*().getNode() = v.getAStore())
}
private class StringStartswithCall extends ControlFlowNode {
StringStartswithCall() { this.(CallNode).getFunction().(AttrNode).getName() = "startswith" }
private class StringStartswithCall extends Cfg::ControlFlowNode {
StringStartswithCall() {
this.(Cfg::CallNode).getFunction().(Cfg::AttrNode).getName() = "startswith"
}
}
bindingset[s]
@@ -66,8 +68,8 @@ module CorsBypassConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node node) {
exists(StringStartswithCall s |
node.asCfgNode() = s.(CallNode).getArg(0) or
node.asCfgNode() = s.(CallNode).getFunction().(AttrNode).getObject()
node.asCfgNode() = s.(Cfg::CallNode).getArg(0) or
node.asCfgNode() = s.(Cfg::CallNode).getFunction().(Cfg::AttrNode).getObject()
)
}

View File

@@ -16,6 +16,7 @@ import semmle.python.Concepts
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.internal.DataFlowPublic
import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.controlflow.internal.Cfg as Cfg
// The Unicode compatibility normalization calls from unicodedata, unidecode, pyunormalize
// and textnorm modules. The use of argIdx is to constraint the argument being normalized.
@@ -52,8 +53,8 @@ class UnicodeCompatibilityNormalize extends API::CallNode {
DataFlow::Node getPathArg() { result = this.getArg(argIdx) }
}
predicate underAValue(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
exists(CompareNode cn | cn = g |
predicate underAValue(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) {
exists(Cfg::CompareNode cn | cn = g |
exists(API::CallNode lenCall, Cmpop op, Node n |
lenCall = n.getALocalSource() and
(

View File

@@ -9,6 +9,7 @@ import semmle.python.ApiGraphs
import semmle.python.dataflow.new.TaintTracking
import semmle.python.frameworks.Stdlib
import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.controlflow.internal.Cfg as Cfg
/**
* Handle those three cases of Tarfile opens:
@@ -111,8 +112,8 @@ module UnsafeUnpackConfig implements DataFlow::ConfigSig {
call = atfo.getReturn().getMember("extractall").getACall() and
arg = call.getArgByName("members") and
if
arg.asCfgNode() instanceof NameConstantNode or
arg.asCfgNode() instanceof ListNode
arg.asCfgNode() instanceof Cfg::NameConstantNode or
arg.asCfgNode() instanceof Cfg::ListNode
then sink = call.getObject()
else
if arg.(MethodCallNode).getMethodName() = "getmembers"

View File

@@ -4,6 +4,7 @@ import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.BarrierGuards
import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.controlflow.internal.Cfg as Cfg
/**
* A taint-tracking configuration for tracking untrusted user input used in file read.
@@ -21,7 +22,7 @@ private module CsvInjectionConfig implements DataFlow::ConfigSig {
predicate observeDiffInformedIncrementalMode() { any() }
}
private predicate startsWithCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
private predicate startsWithCheck(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) {
exists(DataFlow::MethodCallNode mc |
g = mc.asCfgNode() and
mc.calls(_, "startswith") and

View File

@@ -6,6 +6,7 @@
import python
import meta.MetaMetrics
private import LegacyPointsTo
private import semmle.python.controlflow.internal.Cfg as Cfg
newtype TTarget =
TFunction(Function func) or
@@ -50,7 +51,7 @@ class TargetClass extends Target, TClass {
* A call that is (possibly) relevant for analysis quality.
* See `IgnoredFile` for details on what is excluded.
*/
class RelevantCall extends CallNode {
class RelevantCall extends Cfg::CallNode {
RelevantCall() { not this.getLocation().getFile() instanceof IgnoredFile }
}
@@ -60,7 +61,7 @@ module PointsToBasedCallGraph {
class ResolvableCall extends RelevantCall {
Value targetValue;
ResolvableCall() { targetValue.getACall() = this }
ResolvableCall() { targetValue.getACall().getNode() = this.getNode() }
/** Gets a resolved target of this call. */
Target getTarget() {

View File

@@ -8,8 +8,9 @@
import python
import CallGraphQuality
private import semmle.python.controlflow.internal.Cfg as Cfg
from CallNode call, Target target
from Cfg::CallNode call, Target target
where
target.isRelevant() and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target

View File

@@ -8,8 +8,9 @@
import python
import CallGraphQuality
private import semmle.python.controlflow.internal.Cfg as Cfg
from CallNode call, Target target
from Cfg::CallNode call, Target target
where
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and

View File

@@ -8,8 +8,9 @@
import python
import CallGraphQuality
private import semmle.python.controlflow.internal.Cfg as Cfg
from CallNode call, Target target
from Cfg::CallNode call, Target target
where
target.isRelevant() and
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and

View File

@@ -8,8 +8,9 @@
import python
import CallGraphQuality
private import semmle.python.controlflow.internal.Cfg as Cfg
from CallNode call, Target target
from Cfg::CallNode call, Target target
where
target.isRelevant() and
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and

View File

@@ -6,12 +6,13 @@
import python
import CallGraphQuality
private import semmle.python.controlflow.internal.Cfg as Cfg
from string tag, int c
where
tag = "SHARED" and
c =
count(CallNode call, Target target |
count(Cfg::CallNode call, Target target |
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
@@ -19,7 +20,7 @@ where
or
tag = "NEW" and
c =
count(CallNode call, Target target |
count(Cfg::CallNode call, Target target |
target.isRelevant() and
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
@@ -27,7 +28,7 @@ where
or
tag = "MISSING" and
c =
count(CallNode call, Target target |
count(Cfg::CallNode call, Target target |
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target

View File

@@ -8,8 +8,9 @@
import python
import CallGraphQuality
private import semmle.python.controlflow.internal.Cfg as Cfg
from CallNode call, Target target
from Cfg::CallNode call, Target target
where
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and

View File

@@ -6,6 +6,7 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.controlflow.internal.Cfg as Cfg
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -76,7 +77,7 @@ module ModificationOfParameterWithDefault {
boolean nonEmpty;
MutableDefaultValue() {
nonEmpty = mutableDefaultValue(this.asCfgNode().(NameNode).getNode()) and
nonEmpty = mutableDefaultValue(this.asCfgNode().(Cfg::NameNode).getNode()) and
// Ignore sources inside the standard library. These are unlikely to be true positives.
exists(this.getLocation().getFile().getRelativePath())
}
@@ -125,10 +126,10 @@ module ModificationOfParameterWithDefault {
class Mutation extends Sink {
Mutation() {
// assignment to a subscript (includes slices)
exists(DefinitionNode d | d.(SubscriptNode).getObject() = this.asCfgNode())
exists(Cfg::DefinitionNode d | d.(Cfg::SubscriptNode).getObject() = this.asCfgNode())
or
// deletion of a subscript
exists(DeletionNode d | d.getTarget().(SubscriptNode).getObject() = this.asCfgNode())
exists(Cfg::DeletionNode d | d.(Cfg::SubscriptNode).getObject() = this.asCfgNode())
or
// augmented assignment to the value
exists(AugAssign a | this.asCfgNode().getNode() = a.getTarget())
@@ -141,54 +142,33 @@ module ModificationOfParameterWithDefault {
}
}
// This to reimplement some of the functionality of the DataFlow::BarrierGuard
private import semmle.python.essa.SsaCompute
/**
* A data-flow node that is known to be either truthy or falsey.
* Holds if `g` validates `node` as truthy when evaluating to `branch`.
*
* It handles the cases `if x` and `if not x`.
*
* For example, in the following code, `this` will be the `x` that is printed,
* which we will know is truthy:
*
* ```py
* if x:
* print(x)
* ```
* The new shared CFG's `GuardNode`/`outcomeOfGuard` already unwraps
* `not x` wrappers, so we only need the direct case: a guard `g`
* controls a block where the guarded value (also `g`) is known to
* have the matching truthiness for the taken branch.
*/
private class MustBe extends DataFlow::Node {
boolean truthy;
MustBe() {
exists(DataFlow::GuardNode guard, NameNode guarded, boolean branch |
// case: if x
guard = guarded and
branch = truthy
or
// case: if not x
guard.(UnaryExprNode).getNode().getOp() instanceof Not and
guarded = guard.(UnaryExprNode).getOperand() and
branch = truthy.booleanNot()
|
// guard controls this
guard.controlsBlock(this.asCfgNode().getBasicBlock(), branch) and
// there is a definition tying the guarded value to this
exists(EssaDefinition def |
AdjacentUses::useOfDef(def, this.asCfgNode()) and
AdjacentUses::useOfDef(def, guarded)
)
)
}
private predicate truthinessGuard(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) {
node = g and branch in [true, false]
}
/** Simple guard detecting truthy values. */
private class MustBeTruthy extends MustBe, MustBeNonEmpty {
MustBeTruthy() { truthy = true }
private class MustBeTruthy extends MustBeNonEmpty {
MustBeTruthy() {
this = DataFlow::BarrierGuard<truthinessGuard/3>::getABarrierNode() and
// truthy = true branch
exists(DataFlow::GuardNode g | g.controlsBlock(this.asCfgNode().getBasicBlock(), true))
}
}
/** Simple guard detecting falsey values. */
private class MustBeFalsey extends MustBe, MustBeEmpty {
MustBeFalsey() { truthy = false }
private class MustBeFalsey extends MustBeEmpty {
MustBeFalsey() {
this = DataFlow::BarrierGuard<truthinessGuard/3>::getABarrierNode() and
// truthy = false branch
exists(DataFlow::GuardNode g | g.controlsBlock(this.asCfgNode().getBasicBlock(), false))
}
}
}

View File

@@ -12,7 +12,9 @@ private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.DataFlow
module CustomTestConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node.asCfgNode().(Cfg::NameNode).getId() = "CUSTOM_SOURCE" }
predicate isSource(DataFlow::Node node) {
node.asCfgNode().(Cfg::NameNode).getId() = "CUSTOM_SOURCE"
}
predicate isSink(DataFlow::Node node) {
exists(Cfg::CallNode call |

View File

@@ -11,7 +11,10 @@ where
m = v.getScope().getEnclosingModule() and
not m.getName() in ["pkg.use", "pkg.foo_def"] and
v.getName() = "foo" and
if exists(Cfg::ControlFlowNode exit | exit.isNormalExit() and exit.getScope() = m and v.getAUse() = exit)
if
exists(Cfg::ControlFlowNode exit |
exit.isNormalExit() and exit.getScope() = m and v.getAUse() = exit
)
then useToNormalExit = "use to normal exit"
else useToNormalExit = "no use to normal exit"
select m, v, useToNormalExit