From 0a9946121b273c46174270d73f7bf12e6b8c021c Mon Sep 17 00:00:00 2001 From: yoff Date: Tue, 26 May 2026 21:35:39 +0000 Subject: [PATCH] Python: migrate src queries to new shared CFG types + reformat Migrate 27 queries under python/ql/src/ from legacy CFG types (CallNode/AttrNode/NameNode/etc.) to the shared-CFG-based 'Cfg::' namespace, matching the dataflow API surface introduced earlier on this branch. ModificationOfParameterWithDefaultCustomizations.qll is rewritten on top of BarrierGuard, removing the last legacy ESSA dependency in that file. UnguardedNextInGenerator.ql still uses ESSA and bridges to the new CFG via Cfg::CallNode.getNode(). Also reformat 14 library and query files that had drifted from the formatter. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/ql/lib/semmle/python/Exprs.qll | 10 +-- python/ql/lib/semmle/python/Flow.qll | 8 ++- python/ql/lib/semmle/python/Import.qll | 1 - .../new/internal/DataFlowDispatch.qll | 18 +++-- .../dataflow/new/internal/DataFlowPublic.qll | 24 +++++-- .../new/internal/IterableUnpacking.qll | 3 +- .../new/internal/TypeTrackingImpl.qll | 7 +- .../python/dataflow/old/Implementation.qll | 6 +- .../Exceptions/UnguardedNextInGenerator.ql | 16 +++-- python/ql/src/Expressions/CallArgs.qll | 4 +- .../DuplicateKeyInDictionaryLiteral.ql | 4 +- .../Formatting/AdvancedFormatting.qll | 4 +- python/ql/src/Expressions/UseofApply.ql | 3 +- .../Functions/SignatureOverriddenMethod.ql | 3 +- .../Resources/FileNotAlwaysClosedQuery.qll | 9 ++- .../CWE-020-ExternalAPIs/ExternalAPIs.qll | 3 +- .../Security/CWE-079/Jinja2WithoutEscaping.ql | 5 +- python/ql/src/Security/CWE-327/PyOpenSSL.qll | 5 +- python/ql/src/Security/CWE-327/Ssl.qll | 5 +- .../Security/CWE-798/HardcodedCredentials.ql | 7 +- .../ql/src/Statements/ModificationOfLocals.ql | 13 ++-- .../ql/src/Statements/SideEffectInAssert.ql | 3 +- python/ql/src/Statements/UseOfExit.ql | 3 +- .../src/Variables/LeakingListComprehension.ql | 3 +- .../Security/CWE-022bis/TarSlipImprov.ql | 5 +- .../Security/CWE-340/TokenBuiltFromUUID.ql | 5 +- .../Security/CWE-346/CorsBypass.ql | 18 ++--- .../Security/CWE-770/UnicodeDoS.ql | 5 +- .../Security/UnsafeUnpackQuery.qll | 5 +- .../security/injection/CsvInjection.qll | 3 +- .../analysis-quality/CallGraphQuality.qll | 5 +- .../src/meta/analysis-quality/TTCallGraph.ql | 3 +- .../analysis-quality/TTCallGraphMissing.ql | 3 +- .../meta/analysis-quality/TTCallGraphNew.ql | 3 +- .../TTCallGraphNewAmbiguous.ql | 3 +- .../analysis-quality/TTCallGraphOverview.ql | 7 +- .../analysis-quality/TTCallGraphShared.ql | 3 +- ...onOfParameterWithDefaultCustomizations.qll | 66 +++++++------------ .../dataflow/regression/custom_dataflow.ql | 4 +- .../typetracking_imports/highlight_problem.ql | 5 +- 40 files changed, 172 insertions(+), 138 deletions(-) diff --git a/python/ql/lib/semmle/python/Exprs.qll b/python/ql/lib/semmle/python/Exprs.qll index 6f462f714eb..9ce6f9e6680 100644 --- a/python/ql/lib/semmle/python/Exprs.qll +++ b/python/ql/lib/semmle/python/Exprs.qll @@ -28,7 +28,9 @@ class Expr extends Expr_, AstNode { /** Whether this expression may have a side effect (as determined purely from its syntax) */ predicate hasSideEffects() { /* If an exception raised by this expression handled, count that as a side effect */ - exists(ControlFlowNode n | n.getNode() = this | n.getASuccessor().getNode() instanceof ExceptStmt) + exists(ControlFlowNode n | n.getNode() = this | + n.getASuccessor().getNode() instanceof ExceptStmt + ) or this.getASubExpression().hasSideEffects() } @@ -94,7 +96,6 @@ class Subscript extends Subscript_ { } Expr getObject() { result = Subscript_.super.getValue() } - } /** A call expression, such as `func(...)` */ @@ -110,7 +111,6 @@ class Call extends Call_ { override string toString() { result = this.getFunc().toString() + "()" } - /** Gets a tuple (*) argument of this call. */ Expr getStarargs() { result = this.getAPositionalArg().(Starred).getValue() } @@ -196,7 +196,6 @@ class IfExp extends IfExp_ { override Expr getASubExpression() { result = this.getTest() or result = this.getBody() or result = this.getOrelse() } - } /** A starred expression, such as the `*rest` in the assignment `first, *rest = seq` */ @@ -405,7 +404,6 @@ class PlaceHolder extends PlaceHolder_ { override Expr getASubExpression() { none() } override string toString() { result = "$" + this.getId() } - } /** A tuple expression such as `( 1, 3, 5, 7, 9 )` */ @@ -472,7 +470,6 @@ class Name extends Name_ { override string toString() { result = this.getId() } - override predicate isArtificial() { /* Artificial variable names in comprehensions all start with "." */ this.getId().charAt(0) = "." @@ -578,7 +575,6 @@ abstract class NameConstant extends Name, ImmutableLiteral { override predicate isConstant() { any() } - override predicate isArtificial() { none() } } diff --git a/python/ql/lib/semmle/python/Flow.qll b/python/ql/lib/semmle/python/Flow.qll index a48fcf7c3e2..0f84f3f367a 100644 --- a/python/ql/lib/semmle/python/Flow.qll +++ b/python/ql/lib/semmle/python/Flow.qll @@ -726,7 +726,9 @@ private Py::AstNode assigned_value(Py::Expr lhs) { exists(Py::Alias a | a.getAsname() = lhs and result = a.getValue()) or /* lhs += x => result = (lhs + x) */ - exists(Py::AugAssign a, Py::BinaryExpr b | b = a.getOperation() and result = b and lhs = b.getLeft()) + exists(Py::AugAssign a, Py::BinaryExpr b | + b = a.getOperation() and result = b and lhs = b.getLeft() + ) or /* * ..., lhs, ... = ..., result, ... @@ -868,7 +870,9 @@ class NameNode extends ControlFlowNode { /** Whether this is a use of a global (including builtin) variable. */ predicate isGlobal() { Scopes::use_of_global_variable(this, _, _) } - predicate isSelf() { exists(Py::SsaVariable selfvar | selfvar.isSelf() and selfvar.getAUse() = this) } + predicate isSelf() { + exists(Py::SsaVariable selfvar | selfvar.isSelf() and selfvar.getAUse() = this) + } } /** A control flow node corresponding to a named constant, one of `None`, `True` or `False`. */ diff --git a/python/ql/lib/semmle/python/Import.qll b/python/ql/lib/semmle/python/Import.qll index 5256403c8b9..d4f5109ed47 100644 --- a/python/ql/lib/semmle/python/Import.qll +++ b/python/ql/lib/semmle/python/Import.qll @@ -162,7 +162,6 @@ class ImportMember extends ImportMember_ { string getImportedModuleName() { result = this.getModule().(ImportExpr).getImportedModuleName() + "." + this.getName() } - } /** An import statement */ diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index ba6d134f83b..06cada5a669 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -287,9 +287,7 @@ predicate isClassmethod(Function func) { /** Holds if the function `func` has a `property` decorator. */ overlay[local] -predicate hasPropertyDecorator(Function func) { - func.getADecorator().(Name).getId() = "property" -} +predicate hasPropertyDecorator(Function func) { func.getADecorator().(Name).getId() = "property" } /** * Holds if the function `func` has a `contextlib.contextmanager`. @@ -297,9 +295,11 @@ predicate hasPropertyDecorator(Function func) { overlay[local] predicate hasContextmanagerDecorator(Function func) { exists(Cfg::ControlFlowNode contextmanager | - contextmanager.(Cfg::NameNode).getId() = "contextmanager" and contextmanager.(Cfg::NameNode).isGlobal() + contextmanager.(Cfg::NameNode).getId() = "contextmanager" and + contextmanager.(Cfg::NameNode).isGlobal() or - contextmanager.(Cfg::AttrNode).getObject("contextmanager").(Cfg::NameNode).getId() = "contextlib" + contextmanager.(Cfg::AttrNode).getObject("contextmanager").(Cfg::NameNode).getId() = + "contextlib" | func.getADecorator() = contextmanager.getNode() ) @@ -1348,7 +1348,9 @@ predicate normalCallArg(Cfg::CallNode call, Node arg, ArgumentPosition apos) { * translated into `l.clear()`, and we can still have use-use flow. */ cached -predicate getCallArg(Cfg::CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) { +predicate getCallArg( + Cfg::CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos +) { Stages::DataFlow::ref() and resolveCall(call, target, type) and ( @@ -1441,7 +1443,9 @@ private predicate sameEnclosingCallable(Node node1, Node node2) { // DataFlowCall // ============================================================================= newtype TDataFlowCall = - TNormalCall(Cfg::CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or + TNormalCall(Cfg::CallNode call, Function target, CallType type) { + resolveCall(call, target, type) + } or /** A call to the generated function inside a comprehension */ TComprehensionCall(Comp c) or TPotentialLibraryCall(Cfg::CallNode call) or diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 8ed76588004..0a59b66f07f 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -37,7 +37,9 @@ newtype TNode = * A node corresponding to a scope entry definition. That is, the value of a variable * as it enters a scope. */ - TScopeEntryDefinitionNode(SsaImpl::ScopeEntryDefinition def) { not def.getScope() instanceof Module } or + TScopeEntryDefinitionNode(SsaImpl::ScopeEntryDefinition def) { + not def.getScope() instanceof Module + } or /** * A synthetic node representing the value of an object before a state change. * @@ -656,11 +658,15 @@ private predicate outcomeOfGuard( ) or // Recursive: comparisons against a boolean literal. - exists(Cfg::CompareNode cmpNode, Cmpop op, Cfg::ControlFlowNode otherOperand, - Cfg::ControlFlowNode guardOperand, boolean polarity, boolean cmpBranch + exists( + Cfg::CompareNode cmpNode, Cmpop op, Cfg::ControlFlowNode otherOperand, + Cfg::ControlFlowNode guardOperand, boolean polarity, boolean cmpBranch | guardOperand.getNode() = guard.getNode() and - (cmpNode.operands(guardOperand, op, otherOperand) or cmpNode.operands(otherOperand, op, guardOperand)) and + ( + cmpNode.operands(guardOperand, op, otherOperand) or + cmpNode.operands(otherOperand, op, guardOperand) + ) and not guard.getNode() instanceof BooleanLiteral and ( (op instanceof Eq or op instanceof Is) and @@ -692,7 +698,9 @@ module BarrierGuard { result = ParameterizedBarrierGuard::getABarrierNode(_) } - private predicate extendedGuardChecks(GuardNode g, Cfg::ControlFlowNode node, boolean branch, Unit u) { + private predicate extendedGuardChecks( + GuardNode g, Cfg::ControlFlowNode node, boolean branch, Unit u + ) { guardChecks(g, node, branch) and u = u } @@ -790,7 +798,11 @@ newtype TContent = or // d["key"] = ... key = - any(Cfg::SubscriptNode sub | sub.isStore() | sub.getIndex().getNode().(StringLiteral).getText()) + any(Cfg::SubscriptNode sub | + sub.isStore() + | + sub.getIndex().getNode().(StringLiteral).getText() + ) or // d.setdefault("key", ...) exists(Cfg::CallNode call | call.getFunction().(Cfg::AttrNode).getName() = "setdefault" | diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll b/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll index 0704763bd89..ac7200115ce 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll @@ -233,7 +233,8 @@ class UnpackingAssignmentTarget extends Cfg::ControlFlowNode { } /** A (possibly recursive) target of an unpacking assignment which is also a sequence. */ -class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof Cfg::SequenceNode { +class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof Cfg::SequenceNode +{ Cfg::ControlFlowNode getElement(int i) { result = super.getElement(i) } Cfg::ControlFlowNode getAnElement() { result = this.getElement(_) } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 195d0c4da05..7fa1d4e573a 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -97,8 +97,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { return = FlowSummaryImpl::Private::SummaryComponent::return() and // `result` should be the return value of a callable expression (lambda or function) referenced by `callable` exists(Return ret | - ret.getScope() = - callable.getALocalSource().asExpr().(CallableExpr).getInnerScope() and + ret.getScope() = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope() and result.asCfgNode().getNode() = ret.getValue() ) } @@ -311,7 +310,9 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { // // nodeFrom is `expr` // nodeTo is entry node for `f` - exists(SsaImpl::ScopeEntryDefinition e, SsaImpl::SsaSourceVariable var, Cfg::DefinitionNode def | + exists( + SsaImpl::ScopeEntryDefinition e, SsaImpl::SsaSourceVariable var, Cfg::DefinitionNode def + | e.getSourceVariable() = var and def.getNode() = var.getVariable().getAStore() | diff --git a/python/ql/lib/semmle/python/dataflow/old/Implementation.qll b/python/ql/lib/semmle/python/dataflow/old/Implementation.qll index 00f44656372..6e1314ff9e9 100644 --- a/python/ql/lib/semmle/python/dataflow/old/Implementation.qll +++ b/python/ql/lib/semmle/python/dataflow/old/Implementation.qll @@ -448,8 +448,7 @@ class TaintTrackingImplementation extends string instanceof TaintTracking::Confi context = TNoParam() and src = TTaintTrackingNode_(retval, TNoParam(), path, kind, this) and node.asCfgNode() = call and - retval.asCfgNode().getNode() = - any(Return ret | ret.getScope() = pyfunc.getScope()).getValue() + retval.asCfgNode().getNode() = any(Return ret | ret.getScope() = pyfunc.getScope()).getValue() ) and edgeLabel = "return" } @@ -471,8 +470,7 @@ class TaintTrackingImplementation extends string instanceof TaintTracking::Confi this.callContexts(call, src, pyfunc, context, callee) and retnode = TTaintTrackingNode_(retval, callee, path, kind, this) and node.asCfgNode() = call and - retval.asCfgNode().getNode() = - any(Return ret | ret.getScope() = pyfunc.getScope()).getValue() + retval.asCfgNode().getNode() = any(Return ret | ret.getScope() = pyfunc.getScope()).getValue() ) and edgeLabel = "call" } diff --git a/python/ql/src/Exceptions/UnguardedNextInGenerator.ql b/python/ql/src/Exceptions/UnguardedNextInGenerator.ql index a6969218fdd..437a3f208df 100644 --- a/python/ql/src/Exceptions/UnguardedNextInGenerator.ql +++ b/python/ql/src/Exceptions/UnguardedNextInGenerator.ql @@ -12,6 +12,8 @@ import python private import semmle.python.ApiGraphs +private import semmle.python.controlflow.internal.Cfg as Cfg +private import semmle.python.Flow as Flow API::Node iter() { result = API::builtin("iter") } @@ -19,17 +21,17 @@ API::Node next() { result = API::builtin("next") } API::Node stopIteration() { result = API::builtin("StopIteration") } -predicate call_to_iter(CallNode call, EssaVariable sequence) { - call = iter().getACall().asCfgNode() and +predicate call_to_iter(Flow::CallNode call, EssaVariable sequence) { + call.getNode() = iter().getACall().asCfgNode().(Cfg::CallNode).getNode() and call.getArg(0) = sequence.getAUse() } -predicate call_to_next(CallNode call, ControlFlowNode iter) { - call = next().getACall().asCfgNode() and +predicate call_to_next(Flow::CallNode call, Flow::ControlFlowNode iter) { + call.getNode() = next().getACall().asCfgNode().(Cfg::CallNode).getNode() and call.getArg(0) = iter } -predicate call_to_next_has_default(CallNode call) { +predicate call_to_next_has_default(Flow::CallNode call) { exists(call.getArg(1)) or exists(call.getArgByName("default")) } @@ -49,14 +51,14 @@ predicate iter_not_exhausted(EssaVariable iterator) { ) } -predicate stop_iteration_handled(CallNode call) { +predicate stop_iteration_handled(Flow::CallNode call) { exists(Try t | t.containsInScope(call.getNode()) and t.getAHandler().getType() = stopIteration().getAValueReachableFromSource().asExpr() ) } -from CallNode call +from Flow::CallNode call where call_to_next(call, _) and not call_to_next_has_default(call) and diff --git a/python/ql/src/Expressions/CallArgs.qll b/python/ql/src/Expressions/CallArgs.qll index 1c354ad9f94..43782a903dc 100644 --- a/python/ql/src/Expressions/CallArgs.qll +++ b/python/ql/src/Expressions/CallArgs.qll @@ -116,9 +116,7 @@ FunctionValue get_function_or_initializer(Value func_or_cls) { predicate illegally_named_parameter_objectapi(Call call, Object func, string name) { not func.isC() and name = call.getANamedArgumentName() and - exists(ControlFlowNode callCfg | callCfg.getNode() = call | - callCfg = get_a_call_objectapi(func) - ) and + exists(ControlFlowNode callCfg | callCfg.getNode() = call | callCfg = get_a_call_objectapi(func)) and not get_function_or_initializer_objectapi(func).isLegalArgumentName(name) } diff --git a/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql b/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql index bc9fb968dbb..0c55a2ece58 100644 --- a/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql +++ b/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql @@ -41,7 +41,9 @@ where i1 < i2 ) or - exists(ControlFlowNode k1Cfg, ControlFlowNode k2Cfg | k1Cfg.getNode() = k1 and k2Cfg.getNode() = k2 | + exists(ControlFlowNode k1Cfg, ControlFlowNode k2Cfg | + k1Cfg.getNode() = k1 and k2Cfg.getNode() = k2 + | k1Cfg.getBasicBlock().strictlyDominates(k2Cfg.getBasicBlock()) ) ) diff --git a/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll b/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll index a860f96061f..a5e0379685a 100644 --- a/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll +++ b/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll @@ -98,7 +98,9 @@ private predicate brace_pair(PossibleAdvancedFormatString fmt, int start, int en } private predicate advanced_format_call(Call format_expr, PossibleAdvancedFormatString fmt, int args) { - exists(CallNode call, ControlFlowNode fmtCfg | call.getNode() = format_expr and fmtCfg.getNode() = fmt | + exists(CallNode call, ControlFlowNode fmtCfg | + call.getNode() = format_expr and fmtCfg.getNode() = fmt + | call.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(Value::named("format")) and call.getArg(0).(ControlFlowNodeWithPointsTo).pointsTo(_, fmtCfg) and args = count(format_expr.getAnArg()) - 1 diff --git a/python/ql/src/Expressions/UseofApply.ql b/python/ql/src/Expressions/UseofApply.ql index f1068eca837..6fa5c981722 100644 --- a/python/ql/src/Expressions/UseofApply.ql +++ b/python/ql/src/Expressions/UseofApply.ql @@ -11,8 +11,9 @@ import python private import semmle.python.ApiGraphs +private import semmle.python.controlflow.internal.Cfg as Cfg -from CallNode call +from Cfg::CallNode call where major_version() = 2 and call = API::builtin("apply").getACall().asCfgNode() diff --git a/python/ql/src/Functions/SignatureOverriddenMethod.ql b/python/ql/src/Functions/SignatureOverriddenMethod.ql index 15b3fa70640..7ecced29c60 100644 --- a/python/ql/src/Functions/SignatureOverriddenMethod.ql +++ b/python/ql/src/Functions/SignatureOverriddenMethod.ql @@ -15,6 +15,7 @@ import python import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.controlflow.internal.Cfg as Cfg import codeql.util.Option /** Holds if `base` is overridden by `sub` */ @@ -143,7 +144,7 @@ predicate ignore(Function f) { /** Gets a function that `call` may resolve to. */ Function resolveCall(Call call) { - exists(DataFlowCall dfc | call = dfc.getNode().(CallNode).getNode() | + exists(DataFlowCall dfc | call = dfc.getNode().(Cfg::CallNode).getNode() | result = viableCallable(dfc).(DataFlowFunction).getScope() ) } diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index 9d91e4f523c..c5c4795eecc 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -3,6 +3,7 @@ import python import semmle.python.dataflow.new.internal.DataFlowDispatch import semmle.python.ApiGraphs +private import semmle.python.controlflow.internal.Cfg as Cfg /** A CFG node where a file is opened. */ abstract class FileOpenSource extends DataFlow::CfgNode { } @@ -64,12 +65,14 @@ abstract class FileClose extends DataFlow::CfgNode { } } -private predicate bbSuccessor(BasicBlock src, BasicBlock sink) { sink = src.getASuccessor() } +private predicate bbSuccessor(Cfg::BasicBlock src, Cfg::BasicBlock sink) { + sink = src.getASuccessor() +} -private predicate bbReachableStrict(BasicBlock src, BasicBlock sink) = +private predicate bbReachableStrict(Cfg::BasicBlock src, Cfg::BasicBlock sink) = fastTC(bbSuccessor/2)(src, sink) -private predicate bbReachableRefl(BasicBlock src, BasicBlock sink) { +private predicate bbReachableRefl(Cfg::BasicBlock src, Cfg::BasicBlock sink) { bbReachableStrict(src, sink) or src = sink } diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll index 9c12845a0ca..1aa1e74cf13 100644 --- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll +++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll @@ -10,6 +10,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.ApiGraphs private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate +private import semmle.python.controlflow.internal.Cfg as Cfg /** * An external API that is considered "safe" from a security perspective. @@ -71,7 +72,7 @@ string apiNodeToStringRepr(API::Node node) { ) } -predicate resolvedCall(CallNode call) { +predicate resolvedCall(Cfg::CallNode call) { DataFlowPrivate::resolveCall(call, _, _) or DataFlowPrivate::resolveClassCall(call, _) } diff --git a/python/ql/src/Security/CWE-079/Jinja2WithoutEscaping.ql b/python/ql/src/Security/CWE-079/Jinja2WithoutEscaping.ql index fd03ba433a1..fb5ef283e65 100644 --- a/python/ql/src/Security/CWE-079/Jinja2WithoutEscaping.ql +++ b/python/ql/src/Security/CWE-079/Jinja2WithoutEscaping.ql @@ -14,6 +14,7 @@ import python import semmle.python.dataflow.new.DataFlow import semmle.python.ApiGraphs +private import semmle.python.controlflow.internal.Cfg as Cfg /* * Jinja 2 Docs: @@ -36,8 +37,8 @@ private API::Node jinja2EnvironmentOrTemplate() { from API::CallNode call where call = jinja2EnvironmentOrTemplate().getACall() and - not exists(call.asCfgNode().(CallNode).getNode().getStarargs()) and - not exists(call.asCfgNode().(CallNode).getNode().getKwargs()) and + not exists(call.asCfgNode().(Cfg::CallNode).getNode().getStarargs()) and + not exists(call.asCfgNode().(Cfg::CallNode).getNode().getKwargs()) and ( not exists(call.getArgByName("autoescape")) or diff --git a/python/ql/src/Security/CWE-327/PyOpenSSL.qll b/python/ql/src/Security/CWE-327/PyOpenSSL.qll index e2571195bfa..7ce3276598c 100644 --- a/python/ql/src/Security/CWE-327/PyOpenSSL.qll +++ b/python/ql/src/Security/CWE-327/PyOpenSSL.qll @@ -5,6 +5,7 @@ private import python private import semmle.python.ApiGraphs +private import semmle.python.controlflow.internal.Cfg as Cfg import TlsLibraryModel class PyOpenSslContextCreation extends ContextCreation, DataFlow::CallCfgNode { @@ -37,10 +38,10 @@ class ConnectionCall extends ConnectionCreation, DataFlow::CallCfgNode { // This cannot be used to unrestrict, // see https://www.pyopenssl.org/en/stable/api/ssl.html#OpenSSL.SSL.Context.set_options class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode { - SetOptionsCall() { node.getFunction().(AttrNode).getName() = "set_options" } + SetOptionsCall() { node.getFunction().(Cfg::AttrNode).getName() = "set_options" } override DataFlow::CfgNode getContext() { - result.getNode() = node.getFunction().(AttrNode).getObject() + result.getNode() = node.getFunction().(Cfg::AttrNode).getObject() } override ProtocolVersion getRestriction() { diff --git a/python/ql/src/Security/CWE-327/Ssl.qll b/python/ql/src/Security/CWE-327/Ssl.qll index c3fd0366436..5f35590bacf 100644 --- a/python/ql/src/Security/CWE-327/Ssl.qll +++ b/python/ql/src/Security/CWE-327/Ssl.qll @@ -5,6 +5,7 @@ private import python private import semmle.python.ApiGraphs +private import semmle.python.controlflow.internal.Cfg as Cfg import TlsLibraryModel class SslContextCreation extends ContextCreation, DataFlow::CallCfgNode { @@ -53,7 +54,7 @@ class OptionsAugOr extends ProtocolRestriction, DataFlow::CfgNode { ProtocolVersion restriction; OptionsAugOr() { - exists(AugAssign aa, AttrNode attr, Expr flag | + exists(AugAssign aa, Cfg::AttrNode attr, Expr flag | aa.getOperation().getOp() instanceof BitOr and aa.getTarget() = attr.getNode() and attr.getName() = "options" and @@ -80,7 +81,7 @@ class OptionsAugAndNot extends ProtocolUnrestriction, DataFlow::CfgNode { ProtocolVersion restriction; OptionsAugAndNot() { - exists(AugAssign aa, AttrNode attr, Expr flag, UnaryExpr notFlag | + exists(AugAssign aa, Cfg::AttrNode attr, Expr flag, UnaryExpr notFlag | aa.getOperation().getOp() instanceof BitAnd and aa.getTarget() = attr.getNode() and attr.getName() = "options" and diff --git a/python/ql/src/Security/CWE-798/HardcodedCredentials.ql b/python/ql/src/Security/CWE-798/HardcodedCredentials.ql index ab21c106348..28b0e5da8c5 100644 --- a/python/ql/src/Security/CWE-798/HardcodedCredentials.ql +++ b/python/ql/src/Security/CWE-798/HardcodedCredentials.ql @@ -19,6 +19,7 @@ import semmle.python.filters.Tests private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch private import semmle.python.dataflow.new.internal.Builtins::Builtins as Builtins private import semmle.python.frameworks.data.ModelsAsData +private import semmle.python.controlflow.internal.Cfg as Cfg bindingset[char, fraction] predicate fewer_characters_than(StringLiteral str, string char, float fraction) { @@ -48,7 +49,7 @@ predicate capitalized_word(StringLiteral str) { str.getText().regexpMatch("[A-Z] predicate format_string(StringLiteral str) { str.getText().matches("%{%}%") } -predicate maybeCredential(ControlFlowNode f) { +predicate maybeCredential(Cfg::ControlFlowNode f) { /* A string that is not too short and unlikely to be text or an identifier. */ exists(StringLiteral str | str = f.getNode() | /* At least 10 characters */ @@ -94,9 +95,9 @@ class CredentialSink extends DataFlow::Node { this.(DataFlow::ArgumentNode).argumentOf(_, pos) ) or - exists(Keyword k | k.getArg() = name and this.getNode() = k.getValue().asCfgNode()) + exists(Keyword k | k.getArg() = name and this.asCfgNode().getNode() = k.getValue()) or - exists(CompareNode cmp, NameNode n | n.getId() = name | + exists(Cfg::CompareNode cmp, Cfg::NameNode n | n.getId() = name | cmp.operands(this.asCfgNode(), any(Eq eq), n) or cmp.operands(n, any(Eq eq), this.asCfgNode()) diff --git a/python/ql/src/Statements/ModificationOfLocals.ql b/python/ql/src/Statements/ModificationOfLocals.ql index f32ddcf7884..29e08f80776 100644 --- a/python/ql/src/Statements/ModificationOfLocals.ql +++ b/python/ql/src/Statements/ModificationOfLocals.ql @@ -13,24 +13,25 @@ import python private import semmle.python.ApiGraphs +private import semmle.python.controlflow.internal.Cfg as Cfg -predicate originIsLocals(ControlFlowNode n) { +predicate originIsLocals(Cfg::ControlFlowNode n) { API::builtin("locals").getReturn().getAValueReachableFromSource().asCfgNode() = n } -predicate modification_of_locals(ControlFlowNode f) { - originIsLocals(f.(SubscriptNode).getObject()) and +predicate modification_of_locals(Cfg::ControlFlowNode f) { + originIsLocals(f.(Cfg::SubscriptNode).getObject()) and (f.isStore() or f.isDelete()) or - exists(string mname, AttrNode attr | - attr = f.(CallNode).getFunction() and + exists(string mname, Cfg::AttrNode attr | + attr = f.(Cfg::CallNode).getFunction() and originIsLocals(attr.getObject(mname)) | mname in ["pop", "popitem", "update", "clear"] ) } -from AstNode a, ControlFlowNode f +from AstNode a, Cfg::ControlFlowNode f where modification_of_locals(f) and a = f.getNode() and diff --git a/python/ql/src/Statements/SideEffectInAssert.ql b/python/ql/src/Statements/SideEffectInAssert.ql index 55c34144dce..c58d3947424 100644 --- a/python/ql/src/Statements/SideEffectInAssert.ql +++ b/python/ql/src/Statements/SideEffectInAssert.ql @@ -14,6 +14,7 @@ import python private import semmle.python.ApiGraphs +private import semmle.python.controlflow.internal.Cfg as Cfg predicate func_with_side_effects(Expr e) { exists(string name | name = e.(Attribute).getName() or name = e.(Name).getId() | @@ -24,7 +25,7 @@ predicate func_with_side_effects(Expr e) { } predicate call_with_side_effect(Call e) { - exists(ControlFlowNode eCfg | eCfg.getNode() = e | + exists(Cfg::ControlFlowNode eCfg | eCfg.getNode() = e | eCfg = API::moduleImport("subprocess") .getMember(["call", "check_call", "check_output"]) diff --git a/python/ql/src/Statements/UseOfExit.ql b/python/ql/src/Statements/UseOfExit.ql index 2310a839f67..88a4f1ff777 100644 --- a/python/ql/src/Statements/UseOfExit.ql +++ b/python/ql/src/Statements/UseOfExit.ql @@ -13,8 +13,9 @@ import python private import semmle.python.ApiGraphs +private import semmle.python.controlflow.internal.Cfg as Cfg -from CallNode call, string name +from Cfg::CallNode call, string name where name = ["exit", "quit"] and call = API::builtin(name).getACall().asCfgNode() diff --git a/python/ql/src/Variables/LeakingListComprehension.ql b/python/ql/src/Variables/LeakingListComprehension.ql index 34bf26a3555..a9baa21661d 100644 --- a/python/ql/src/Variables/LeakingListComprehension.ql +++ b/python/ql/src/Variables/LeakingListComprehension.ql @@ -13,7 +13,8 @@ import python import Definition -from ListComprehensionDeclaration l, Name use, Name defn, ControlFlowNode lCfg, ControlFlowNode useCfg +from + ListComprehensionDeclaration l, Name use, Name defn, ControlFlowNode lCfg, ControlFlowNode useCfg where use = l.getALeakedVariableUse() and defn = l.getDefinition() and diff --git a/python/ql/src/experimental/Security/CWE-022bis/TarSlipImprov.ql b/python/ql/src/experimental/Security/CWE-022bis/TarSlipImprov.ql index 42c0bc170fd..5f49cb21880 100755 --- a/python/ql/src/experimental/Security/CWE-022bis/TarSlipImprov.ql +++ b/python/ql/src/experimental/Security/CWE-022bis/TarSlipImprov.ql @@ -21,6 +21,7 @@ import semmle.python.ApiGraphs import semmle.python.dataflow.new.internal.Attributes import semmle.python.dataflow.new.BarrierGuards import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.controlflow.internal.Cfg as Cfg /** * Handle those three cases of Tarfile opens: @@ -75,8 +76,8 @@ private module TarSlipImprovConfig implements DataFlow::ConfigSig { call = atfo.getReturn().getMember("extractall").getACall() and arg = call.getArgByName("members") and if - arg.asCfgNode() instanceof NameConstantNode or - arg.asCfgNode() instanceof ListNode + arg.asCfgNode() instanceof Cfg::NameConstantNode or + arg.asCfgNode() instanceof Cfg::ListNode then sink = call.getObject() else if arg.(MethodCallNode).getMethodName() = "getmembers" diff --git a/python/ql/src/experimental/Security/CWE-340/TokenBuiltFromUUID.ql b/python/ql/src/experimental/Security/CWE-340/TokenBuiltFromUUID.ql index ab5a4243a74..f43b718289d 100644 --- a/python/ql/src/experimental/Security/CWE-340/TokenBuiltFromUUID.ql +++ b/python/ql/src/experimental/Security/CWE-340/TokenBuiltFromUUID.ql @@ -16,6 +16,7 @@ import python import semmle.python.dataflow.new.DataFlow import semmle.python.ApiGraphs import semmle.python.dataflow.new.TaintTracking +private import semmle.python.controlflow.internal.Cfg as Cfg class PredictableResultSource extends DataFlow::Node { PredictableResultSource() { @@ -32,7 +33,9 @@ class PredictableResultSource extends DataFlow::Node { class TokenAssignmentValueSink extends DataFlow::Node { TokenAssignmentValueSink() { exists(string name | name.toLowerCase().matches(["%token", "%code"]) | - exists(DefinitionNode n | n.getValue() = this.asCfgNode() | name = n.(NameNode).getId()) + exists(Cfg::DefinitionNode n | n.getValue() = this.asCfgNode() | + name = n.(Cfg::NameNode).getId() + ) or exists(DataFlow::AttrWrite aw | aw.getValue() = this | name = aw.getAttributeName()) ) diff --git a/python/ql/src/experimental/Security/CWE-346/CorsBypass.ql b/python/ql/src/experimental/Security/CWE-346/CorsBypass.ql index 01e661cb0bb..5ce58869a89 100644 --- a/python/ql/src/experimental/Security/CWE-346/CorsBypass.ql +++ b/python/ql/src/experimental/Security/CWE-346/CorsBypass.ql @@ -11,25 +11,25 @@ import python import semmle.python.ApiGraphs import semmle.python.dataflow.new.TaintTracking -import semmle.python.Flow import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.controlflow.internal.Cfg as Cfg /** * Returns true if the control flow node may be useful in the current context. * * Ideally for more completeness, we should alert on every `startswith` call and every remote flow source which gets partailly checked. But, as this can lead to lots of FPs, we apply heuristics to filter some calls. This predicate provides logic for this filteration. */ -private predicate maybeInteresting(ControlFlowNode c) { +private predicate maybeInteresting(Cfg::ControlFlowNode c) { // Check if the name of the variable which calls the function matches the heuristic. // This would typically occur at the sink. // This should deal with cases like // `origin.startswith("bla")` - heuristics(c.(CallNode).getFunction().(AttrNode).getObject().(NameNode).getId()) + heuristics(c.(Cfg::CallNode).getFunction().(Cfg::AttrNode).getObject().(Cfg::NameNode).getId()) or // Check if the name of the variable passed as an argument to the functions matches the heuristic. This would typically occur at the sink. // This should deal with cases like // `bla.startswith(origin)` - heuristics(c.(CallNode).getArg(0).(NameNode).getId()) + heuristics(c.(Cfg::CallNode).getArg(0).(Cfg::NameNode).getId()) or // Check if the value gets written to any interesting variable. This would typically occur at the source. // This should deal with cases like @@ -37,8 +37,10 @@ private predicate maybeInteresting(ControlFlowNode c) { exists(Variable v | heuristics(v.getId()) | c.getASuccessor*().getNode() = v.getAStore()) } -private class StringStartswithCall extends ControlFlowNode { - StringStartswithCall() { this.(CallNode).getFunction().(AttrNode).getName() = "startswith" } +private class StringStartswithCall extends Cfg::ControlFlowNode { + StringStartswithCall() { + this.(Cfg::CallNode).getFunction().(Cfg::AttrNode).getName() = "startswith" + } } bindingset[s] @@ -66,8 +68,8 @@ module CorsBypassConfig implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node node) { exists(StringStartswithCall s | - node.asCfgNode() = s.(CallNode).getArg(0) or - node.asCfgNode() = s.(CallNode).getFunction().(AttrNode).getObject() + node.asCfgNode() = s.(Cfg::CallNode).getArg(0) or + node.asCfgNode() = s.(Cfg::CallNode).getFunction().(Cfg::AttrNode).getObject() ) } diff --git a/python/ql/src/experimental/Security/CWE-770/UnicodeDoS.ql b/python/ql/src/experimental/Security/CWE-770/UnicodeDoS.ql index 61cdd34920d..84627456310 100644 --- a/python/ql/src/experimental/Security/CWE-770/UnicodeDoS.ql +++ b/python/ql/src/experimental/Security/CWE-770/UnicodeDoS.ql @@ -16,6 +16,7 @@ import semmle.python.Concepts import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.internal.DataFlowPublic import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.controlflow.internal.Cfg as Cfg // The Unicode compatibility normalization calls from unicodedata, unidecode, pyunormalize // and textnorm modules. The use of argIdx is to constraint the argument being normalized. @@ -52,8 +53,8 @@ class UnicodeCompatibilityNormalize extends API::CallNode { DataFlow::Node getPathArg() { result = this.getArg(argIdx) } } -predicate underAValue(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) { - exists(CompareNode cn | cn = g | +predicate underAValue(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) { + exists(Cfg::CompareNode cn | cn = g | exists(API::CallNode lenCall, Cmpop op, Node n | lenCall = n.getALocalSource() and ( diff --git a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll index 64da6b8d799..ecc029ca8a7 100644 --- a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll +++ b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll @@ -9,6 +9,7 @@ import semmle.python.ApiGraphs import semmle.python.dataflow.new.TaintTracking import semmle.python.frameworks.Stdlib import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.controlflow.internal.Cfg as Cfg /** * Handle those three cases of Tarfile opens: @@ -111,8 +112,8 @@ module UnsafeUnpackConfig implements DataFlow::ConfigSig { call = atfo.getReturn().getMember("extractall").getACall() and arg = call.getArgByName("members") and if - arg.asCfgNode() instanceof NameConstantNode or - arg.asCfgNode() instanceof ListNode + arg.asCfgNode() instanceof Cfg::NameConstantNode or + arg.asCfgNode() instanceof Cfg::ListNode then sink = call.getObject() else if arg.(MethodCallNode).getMethodName() = "getmembers" diff --git a/python/ql/src/experimental/semmle/python/security/injection/CsvInjection.qll b/python/ql/src/experimental/semmle/python/security/injection/CsvInjection.qll index 859f6d1e5e8..d789007b8d5 100644 --- a/python/ql/src/experimental/semmle/python/security/injection/CsvInjection.qll +++ b/python/ql/src/experimental/semmle/python/security/injection/CsvInjection.qll @@ -4,6 +4,7 @@ import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.BarrierGuards import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.controlflow.internal.Cfg as Cfg /** * A taint-tracking configuration for tracking untrusted user input used in file read. @@ -21,7 +22,7 @@ private module CsvInjectionConfig implements DataFlow::ConfigSig { predicate observeDiffInformedIncrementalMode() { any() } } -private predicate startsWithCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) { +private predicate startsWithCheck(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) { exists(DataFlow::MethodCallNode mc | g = mc.asCfgNode() and mc.calls(_, "startswith") and diff --git a/python/ql/src/meta/analysis-quality/CallGraphQuality.qll b/python/ql/src/meta/analysis-quality/CallGraphQuality.qll index 679b39ddc8d..1ac0896064b 100644 --- a/python/ql/src/meta/analysis-quality/CallGraphQuality.qll +++ b/python/ql/src/meta/analysis-quality/CallGraphQuality.qll @@ -6,6 +6,7 @@ import python import meta.MetaMetrics private import LegacyPointsTo +private import semmle.python.controlflow.internal.Cfg as Cfg newtype TTarget = TFunction(Function func) or @@ -50,7 +51,7 @@ class TargetClass extends Target, TClass { * A call that is (possibly) relevant for analysis quality. * See `IgnoredFile` for details on what is excluded. */ -class RelevantCall extends CallNode { +class RelevantCall extends Cfg::CallNode { RelevantCall() { not this.getLocation().getFile() instanceof IgnoredFile } } @@ -60,7 +61,7 @@ module PointsToBasedCallGraph { class ResolvableCall extends RelevantCall { Value targetValue; - ResolvableCall() { targetValue.getACall() = this } + ResolvableCall() { targetValue.getACall().getNode() = this.getNode() } /** Gets a resolved target of this call. */ Target getTarget() { diff --git a/python/ql/src/meta/analysis-quality/TTCallGraph.ql b/python/ql/src/meta/analysis-quality/TTCallGraph.ql index bdd63495191..4487095a6fb 100644 --- a/python/ql/src/meta/analysis-quality/TTCallGraph.ql +++ b/python/ql/src/meta/analysis-quality/TTCallGraph.ql @@ -8,8 +8,9 @@ import python import CallGraphQuality +private import semmle.python.controlflow.internal.Cfg as Cfg -from CallNode call, Target target +from Cfg::CallNode call, Target target where target.isRelevant() and call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target diff --git a/python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql b/python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql index bb28c5bf804..188f483689f 100644 --- a/python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql +++ b/python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql @@ -8,8 +8,9 @@ import python import CallGraphQuality +private import semmle.python.controlflow.internal.Cfg as Cfg -from CallNode call, Target target +from Cfg::CallNode call, Target target where target.isRelevant() and call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and diff --git a/python/ql/src/meta/analysis-quality/TTCallGraphNew.ql b/python/ql/src/meta/analysis-quality/TTCallGraphNew.ql index b9f1df54b3b..198007d0775 100644 --- a/python/ql/src/meta/analysis-quality/TTCallGraphNew.ql +++ b/python/ql/src/meta/analysis-quality/TTCallGraphNew.ql @@ -8,8 +8,9 @@ import python import CallGraphQuality +private import semmle.python.controlflow.internal.Cfg as Cfg -from CallNode call, Target target +from Cfg::CallNode call, Target target where target.isRelevant() and not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and diff --git a/python/ql/src/meta/analysis-quality/TTCallGraphNewAmbiguous.ql b/python/ql/src/meta/analysis-quality/TTCallGraphNewAmbiguous.ql index 702541ca16d..b14644f6585 100644 --- a/python/ql/src/meta/analysis-quality/TTCallGraphNewAmbiguous.ql +++ b/python/ql/src/meta/analysis-quality/TTCallGraphNewAmbiguous.ql @@ -8,8 +8,9 @@ import python import CallGraphQuality +private import semmle.python.controlflow.internal.Cfg as Cfg -from CallNode call, Target target +from Cfg::CallNode call, Target target where target.isRelevant() and not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and diff --git a/python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql b/python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql index 5a789d1be90..19660f92ec5 100644 --- a/python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql +++ b/python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql @@ -6,12 +6,13 @@ import python import CallGraphQuality +private import semmle.python.controlflow.internal.Cfg as Cfg from string tag, int c where tag = "SHARED" and c = - count(CallNode call, Target target | + count(Cfg::CallNode call, Target target | target.isRelevant() and call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target @@ -19,7 +20,7 @@ where or tag = "NEW" and c = - count(CallNode call, Target target | + count(Cfg::CallNode call, Target target | target.isRelevant() and not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target @@ -27,7 +28,7 @@ where or tag = "MISSING" and c = - count(CallNode call, Target target | + count(Cfg::CallNode call, Target target | target.isRelevant() and call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target diff --git a/python/ql/src/meta/analysis-quality/TTCallGraphShared.ql b/python/ql/src/meta/analysis-quality/TTCallGraphShared.ql index d44d1ac497f..f8275b5a965 100644 --- a/python/ql/src/meta/analysis-quality/TTCallGraphShared.ql +++ b/python/ql/src/meta/analysis-quality/TTCallGraphShared.ql @@ -8,8 +8,9 @@ import python import CallGraphQuality +private import semmle.python.controlflow.internal.Cfg as Cfg -from CallNode call, Target target +from Cfg::CallNode call, Target target where target.isRelevant() and call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 83ba4df4e29..d4cdc388b14 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -6,6 +6,7 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.BarrierGuards +private import semmle.python.controlflow.internal.Cfg as Cfg /** * Provides default sources, sinks and sanitizers for detecting @@ -76,7 +77,7 @@ module ModificationOfParameterWithDefault { boolean nonEmpty; MutableDefaultValue() { - nonEmpty = mutableDefaultValue(this.asCfgNode().(NameNode).getNode()) and + nonEmpty = mutableDefaultValue(this.asCfgNode().(Cfg::NameNode).getNode()) and // Ignore sources inside the standard library. These are unlikely to be true positives. exists(this.getLocation().getFile().getRelativePath()) } @@ -125,10 +126,10 @@ module ModificationOfParameterWithDefault { class Mutation extends Sink { Mutation() { // assignment to a subscript (includes slices) - exists(DefinitionNode d | d.(SubscriptNode).getObject() = this.asCfgNode()) + exists(Cfg::DefinitionNode d | d.(Cfg::SubscriptNode).getObject() = this.asCfgNode()) or // deletion of a subscript - exists(DeletionNode d | d.getTarget().(SubscriptNode).getObject() = this.asCfgNode()) + exists(Cfg::DeletionNode d | d.(Cfg::SubscriptNode).getObject() = this.asCfgNode()) or // augmented assignment to the value exists(AugAssign a | this.asCfgNode().getNode() = a.getTarget()) @@ -141,54 +142,33 @@ module ModificationOfParameterWithDefault { } } - // This to reimplement some of the functionality of the DataFlow::BarrierGuard - private import semmle.python.essa.SsaCompute - /** - * A data-flow node that is known to be either truthy or falsey. + * Holds if `g` validates `node` as truthy when evaluating to `branch`. * - * It handles the cases `if x` and `if not x`. - * - * For example, in the following code, `this` will be the `x` that is printed, - * which we will know is truthy: - * - * ```py - * if x: - * print(x) - * ``` + * The new shared CFG's `GuardNode`/`outcomeOfGuard` already unwraps + * `not x` wrappers, so we only need the direct case: a guard `g` + * controls a block where the guarded value (also `g`) is known to + * have the matching truthiness for the taken branch. */ - private class MustBe extends DataFlow::Node { - boolean truthy; - - MustBe() { - exists(DataFlow::GuardNode guard, NameNode guarded, boolean branch | - // case: if x - guard = guarded and - branch = truthy - or - // case: if not x - guard.(UnaryExprNode).getNode().getOp() instanceof Not and - guarded = guard.(UnaryExprNode).getOperand() and - branch = truthy.booleanNot() - | - // guard controls this - guard.controlsBlock(this.asCfgNode().getBasicBlock(), branch) and - // there is a definition tying the guarded value to this - exists(EssaDefinition def | - AdjacentUses::useOfDef(def, this.asCfgNode()) and - AdjacentUses::useOfDef(def, guarded) - ) - ) - } + private predicate truthinessGuard(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) { + node = g and branch in [true, false] } /** Simple guard detecting truthy values. */ - private class MustBeTruthy extends MustBe, MustBeNonEmpty { - MustBeTruthy() { truthy = true } + private class MustBeTruthy extends MustBeNonEmpty { + MustBeTruthy() { + this = DataFlow::BarrierGuard::getABarrierNode() and + // truthy = true branch + exists(DataFlow::GuardNode g | g.controlsBlock(this.asCfgNode().getBasicBlock(), true)) + } } /** Simple guard detecting falsey values. */ - private class MustBeFalsey extends MustBe, MustBeEmpty { - MustBeFalsey() { truthy = false } + private class MustBeFalsey extends MustBeEmpty { + MustBeFalsey() { + this = DataFlow::BarrierGuard::getABarrierNode() and + // truthy = false branch + exists(DataFlow::GuardNode g | g.controlsBlock(this.asCfgNode().getBasicBlock(), false)) + } } } diff --git a/python/ql/test/library-tests/dataflow/regression/custom_dataflow.ql b/python/ql/test/library-tests/dataflow/regression/custom_dataflow.ql index 44350378b75..b89d2ddad23 100644 --- a/python/ql/test/library-tests/dataflow/regression/custom_dataflow.ql +++ b/python/ql/test/library-tests/dataflow/regression/custom_dataflow.ql @@ -12,7 +12,9 @@ private import semmle.python.controlflow.internal.Cfg as Cfg import semmle.python.dataflow.new.DataFlow module CustomTestConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node node) { node.asCfgNode().(Cfg::NameNode).getId() = "CUSTOM_SOURCE" } + predicate isSource(DataFlow::Node node) { + node.asCfgNode().(Cfg::NameNode).getId() = "CUSTOM_SOURCE" + } predicate isSink(DataFlow::Node node) { exists(Cfg::CallNode call | diff --git a/python/ql/test/library-tests/dataflow/typetracking_imports/highlight_problem.ql b/python/ql/test/library-tests/dataflow/typetracking_imports/highlight_problem.ql index 68cc84998c2..cc8049e00d0 100644 --- a/python/ql/test/library-tests/dataflow/typetracking_imports/highlight_problem.ql +++ b/python/ql/test/library-tests/dataflow/typetracking_imports/highlight_problem.ql @@ -11,7 +11,10 @@ where m = v.getScope().getEnclosingModule() and not m.getName() in ["pkg.use", "pkg.foo_def"] and v.getName() = "foo" and - if exists(Cfg::ControlFlowNode exit | exit.isNormalExit() and exit.getScope() = m and v.getAUse() = exit) + if + exists(Cfg::ControlFlowNode exit | + exit.isNormalExit() and exit.getScope() = m and v.getAUse() = exit + ) then useToNormalExit = "use to normal exit" else useToNormalExit = "no use to normal exit" select m, v, useToNormalExit