Merge branch 'main' into azure_python_sdk_url_summary_upstream

This commit is contained in:
Ben Rodes
2026-02-02 09:00:35 -05:00
committed by GitHub
5287 changed files with 495117 additions and 207887 deletions

View File

@@ -0,0 +1 @@
import semmle.python.internal.OverlayDiscardConsistencyQuery

View File

@@ -8,7 +8,8 @@
*/
import python
private import LegacyPointsTo
from Expr e, string name
from ExprWithPointsTo e, string name
where e.pointsTo(Value::named(name)) and not name.charAt(_) = "."
select e

View File

@@ -7,6 +7,7 @@
*/
import python
private import LegacyPointsTo
from Value len, CallNode call
where len.getName() = "len" and len.getACall() = call

View File

@@ -8,9 +8,10 @@
*/
import python
private import LegacyPointsTo
from ExceptStmt ex, ClassValue cls
where
cls.getName() = "MyExceptionClass" and
ex.getType().pointsTo(cls)
ex.getType().(ExprWithPointsTo).pointsTo(cls)
select ex

View File

@@ -9,10 +9,11 @@
*/
import python
private import LegacyPointsTo
from IfExp e, ClassObject cls1, ClassObject cls2
where
e.getBody().refersTo(_, cls1, _) and
e.getOrelse().refersTo(_, cls2, _) and
e.getBody().(ExprWithPointsTo).refersTo(_, cls1, _) and
e.getOrelse().(ExprWithPointsTo).refersTo(_, cls2, _) and
cls1 != cls2
select e

View File

@@ -11,6 +11,7 @@
*/
import python
private import LegacyPointsTo
from ClassObject sub, ClassObject base
where

View File

@@ -7,6 +7,7 @@
*/
import python
private import LegacyPointsTo
from AstNode call, PythonFunctionValue method
where

View File

@@ -7,6 +7,7 @@
*/
import python
private import LegacyPointsTo
from FunctionObject m, FunctionObject n
where m != n and m.getACallee() = n and n.getACallee() = m

View File

@@ -8,9 +8,10 @@
*/
import python
private import LegacyPointsTo
from Call new, ClassValue cls
where
cls.getName() = "MyClass" and
new.getFunc().pointsTo(cls)
new.getFunc().(ExprWithPointsTo).pointsTo(cls)
select new

View File

@@ -7,6 +7,7 @@
*/
import python
private import LegacyPointsTo
from FunctionObject override, FunctionObject base
where

View File

@@ -6,6 +6,7 @@
*/
import python
private import LegacyPointsTo
from AstNode print
where
@@ -13,5 +14,5 @@ where
print instanceof Print
or
/* Python 3 or with `from __future__ import print_function` */
print.(Call).getFunc().pointsTo(Value::named("print"))
print.(Call).getFunc().(ExprWithPointsTo).pointsTo(Value::named("print"))
select print

View File

@@ -8,9 +8,10 @@
*/
import python
private import LegacyPointsTo
from Raise raise, ClassValue ex
where
ex.getName() = "AnException" and
raise.getException().pointsTo(ex.getASuperType())
raise.getException().(ExprWithPointsTo).pointsTo(ex.getASuperType())
select raise, "Don't raise instances of 'AnException'"

View File

@@ -7,6 +7,7 @@
*/
import python
private import LegacyPointsTo
from PythonFunctionValue f
where f.getACall().getScope() = f.getScope()

View File

@@ -10,9 +10,10 @@
*/
import python
private import LegacyPointsTo
from SubscriptNode store
where
store.isStore() and
store.getIndex().pointsTo(Value::named("None"))
store.getIndex().(ControlFlowNodeWithPointsTo).pointsTo(Value::named("None"))
select store

View File

@@ -87,6 +87,7 @@ ql/python/ql/src/experimental/Security/CWE-079/EmailXss.ql
ql/python/ql/src/experimental/Security/CWE-091/XsltInjection.ql
ql/python/ql/src/experimental/Security/CWE-094/Js2Py.ql
ql/python/ql/src/experimental/Security/CWE-1236/CsvInjection.ql
ql/python/ql/src/experimental/Security/CWE-1427/PromptInjection.ql
ql/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidation.ql
ql/python/ql/src/experimental/Security/CWE-208/TimingAttackAgainstHash/PossibleTimingAttackAgainstHash.ql
ql/python/ql/src/experimental/Security/CWE-208/TimingAttackAgainstHash/TimingAttackAgainstHash.ql

View File

@@ -13,8 +13,10 @@ ql/python/ql/src/Security/CWE-079/ReflectedXss.ql
ql/python/ql/src/Security/CWE-089/SqlInjection.ql
ql/python/ql/src/Security/CWE-090/LdapInjection.ql
ql/python/ql/src/Security/CWE-094/CodeInjection.ql
ql/python/ql/src/Security/CWE-1004/NonHttpOnlyCookie.ql
ql/python/ql/src/Security/CWE-113/HeaderInjection.ql
ql/python/ql/src/Security/CWE-116/BadTagFilter.ql
ql/python/ql/src/Security/CWE-1275/SameSiteNoneCookie.ql
ql/python/ql/src/Security/CWE-209/StackTraceExposure.ql
ql/python/ql/src/Security/CWE-215/FlaskDebug.ql
ql/python/ql/src/Security/CWE-285/PamAuthorization.ql

View File

@@ -106,9 +106,11 @@ ql/python/ql/src/Security/CWE-079/ReflectedXss.ql
ql/python/ql/src/Security/CWE-089/SqlInjection.ql
ql/python/ql/src/Security/CWE-090/LdapInjection.ql
ql/python/ql/src/Security/CWE-094/CodeInjection.ql
ql/python/ql/src/Security/CWE-1004/NonHttpOnlyCookie.ql
ql/python/ql/src/Security/CWE-113/HeaderInjection.ql
ql/python/ql/src/Security/CWE-116/BadTagFilter.ql
ql/python/ql/src/Security/CWE-117/LogInjection.ql
ql/python/ql/src/Security/CWE-1275/SameSiteNoneCookie.ql
ql/python/ql/src/Security/CWE-209/StackTraceExposure.ql
ql/python/ql/src/Security/CWE-215/FlaskDebug.ql
ql/python/ql/src/Security/CWE-285/PamAuthorization.ql

View File

@@ -16,9 +16,11 @@ ql/python/ql/src/Security/CWE-079/ReflectedXss.ql
ql/python/ql/src/Security/CWE-089/SqlInjection.ql
ql/python/ql/src/Security/CWE-090/LdapInjection.ql
ql/python/ql/src/Security/CWE-094/CodeInjection.ql
ql/python/ql/src/Security/CWE-1004/NonHttpOnlyCookie.ql
ql/python/ql/src/Security/CWE-113/HeaderInjection.ql
ql/python/ql/src/Security/CWE-116/BadTagFilter.ql
ql/python/ql/src/Security/CWE-117/LogInjection.ql
ql/python/ql/src/Security/CWE-1275/SameSiteNoneCookie.ql
ql/python/ql/src/Security/CWE-209/StackTraceExposure.ql
ql/python/ql/src/Security/CWE-215/FlaskDebug.ql
ql/python/ql/src/Security/CWE-285/PamAuthorization.ql

View File

@@ -1,3 +1,60 @@
## 6.0.0
### Breaking Changes
* All modules that depend on the points-to analysis have now been removed from the top level `python.qll` module. To access the points-to functionality, import the new `LegacyPointsTo` module. This also means that some predicates have been removed from various classes, for instance `Function.getFunctionObject()`. To access these predicates, import the `LegacyPointsTo` module and use the `FunctionWithPointsTo` class instead. Most cases follow this pattern, but there are a few exceptions:
* The `getLiteralObject` method on `ImmutableLiteral` subclasses has been replaced with a predicate `getLiteralObject(ImmutableLiteral l)` in the `LegacyPointsTo` module.
* The `getMetrics` method on `Function`, `Class`, and `Module` has been removed. To access metrics, import `LegacyPointsTo` and use the classes `FunctionMetrics`, etc. instead.
### New Features
* The extractor now supports the new, relaxed syntax `except A, B, C: ...` (which would previously have to be written as `except (A, B, C): ...`) as defined in [PEP-758](https://peps.python.org/pep-0758/). This may cause changes in results for code that uses Python 2-style exception binding (`except Foo, e: ...`). The more modern format, `except Foo as e: ...` (available since Python 2.6) is unaffected.
* The Python extractor now supports template strings as defined in [PEP-750](https://peps.python.org/pep-0750/), through the classes `TemplateString` and `JoinedTemplateString`.
### Minor Analysis Improvements
* When a code-scanning configuration specifies the `paths:` and/or `paths-ignore:` settings, these are now taken into account by the Python extractor's search for YAML files.
* The `compression.zstd` library (added in Python 3.14) is now supported by the `py/decompression-bomb` query.
* Added taint flow model and type model for `urllib.parse`.
* Remote flow sources for the `python-socketio` package have been modeled.
* Additional models for remote flow sources for `tornado.websocket.WebSocketHandler` have been added.
## 5.0.4
No user-facing changes.
## 5.0.3
No user-facing changes.
## 5.0.2
No user-facing changes.
## 5.0.1
### Bug Fixes
- Fixed a bug in the Python extractor's import handling where failing to find an import in `find_module` would cause a `KeyError` to be raised. (Contributed by @akoeplinger.)
## 5.0.0
### Breaking Changes
- The classes `ControlFlowNode`, `Expr`, and `Module` no longer expose predicates that invoke the points-to analysis. To access these predicates, import the module `LegacyPointsTo` and follow the instructions given therein.
## 4.1.0
### New Features
* Initial support for incremental Python databases via `codeql database create --overlay-base`/`--overlay-changes`.
## 4.0.17
### Bug Fixes
* The Python extractor no longer crashes with an `ImportError` when run using Python 3.14.
## 4.0.16
### Minor Analysis Improvements

View File

@@ -0,0 +1,432 @@
/**
* DEPRECATED: Using the methods in this module may lead to a degradation of performance. Use at
* your own peril.
*
* This module contains legacy points-to predicates and methods for various classes in the
* points-to analysis.
*
* Existing code that depends on, say, points-to predicates on `ControlFlowNode` should be modified
* to use `ControlFlowNodeWithPointsTo` instead. In particular, if inside a method call chain such
* as
*
* `someCallNode.getFunction().pointsTo(...)`
*
* an explicit cast should be added as follows
*
* `someCallNode.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(...)`
*
* Similarly, if a bound variable has type `ControlFlowNode`, and a points-to method is called on
* it, the type should be changed to `ControlFlowNodeWithPointsTo`.
*/
private import python
import semmle.python.pointsto.Base
import semmle.python.pointsto.Context
import semmle.python.pointsto.PointsTo
import semmle.python.pointsto.PointsToContext
import semmle.python.objects.ObjectAPI
import semmle.python.objects.ObjectInternal
import semmle.python.types.Object
import semmle.python.types.ClassObject
import semmle.python.types.FunctionObject
import semmle.python.types.ModuleObject
import semmle.python.types.Exceptions
import semmle.python.types.Properties
import semmle.python.types.Descriptors
import semmle.python.SelfAttribute
import semmle.python.Metrics
/**
* An extension of `ControlFlowNode` that provides points-to predicates.
*/
class ControlFlowNodeWithPointsTo extends ControlFlowNode {
/** Gets the value that this ControlFlowNode points-to. */
predicate pointsTo(Value value) { this.pointsTo(_, value, _) }
/** Gets the value that this ControlFlowNode points-to. */
Value pointsTo() { this.pointsTo(_, result, _) }
/** Gets a value that this ControlFlowNode may points-to. */
Value inferredValue() { this.pointsTo(_, result, _) }
/** Gets the value and origin that this ControlFlowNode points-to. */
predicate pointsTo(Value value, ControlFlowNode origin) { this.pointsTo(_, value, origin) }
/** Gets the value and origin that this ControlFlowNode points-to, given the context. */
predicate pointsTo(Context context, Value value, ControlFlowNode origin) {
PointsTo::pointsTo(this, context, value, origin)
}
/**
* Gets what this flow node might "refer-to". Performs a combination of localized (intra-procedural) points-to
* analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly
* precise, but may not provide information for a significant number of flow-nodes.
* If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead.
*/
pragma[nomagic]
predicate refersTo(Object obj, ClassObject cls, ControlFlowNode origin) {
this.refersTo(_, obj, cls, origin)
}
/** Gets what this expression might "refer-to" in the given `context`. */
pragma[nomagic]
predicate refersTo(Context context, Object obj, ClassObject cls, ControlFlowNode origin) {
not obj = unknownValue() and
not cls = theUnknownType() and
PointsTo::points_to(this, context, obj, cls, origin)
}
/**
* Whether this flow node might "refer-to" to `value` which is from `origin`
* Unlike `this.refersTo(value, _, origin)` this predicate includes results
* where the class cannot be inferred.
*/
pragma[nomagic]
predicate refersTo(Object obj, ControlFlowNode origin) {
not obj = unknownValue() and
PointsTo::points_to(this, _, obj, _, origin)
}
/** Equivalent to `this.refersTo(value, _)` */
predicate refersTo(Object obj) { this.refersTo(obj, _) }
/**
* Check whether this control-flow node has complete points-to information.
* This would mean that the analysis managed to infer an over approximation
* of possible values at runtime.
*/
predicate hasCompletePointsToSet() {
// If the tracking failed, then `this` will be its own "origin". In that
// case, we want to exclude nodes for which there is also a different
// origin, as that would indicate that some paths failed and some did not.
this.refersTo(_, _, this) and
not exists(ControlFlowNode other | other != this and this.refersTo(_, _, other))
or
// If `this` is a use of a variable, then we must have complete points-to
// for that variable.
exists(SsaVariable v | v.getAUse() = this | varHasCompletePointsToSet(v))
}
/** Whether it is unlikely that this ControlFlowNode can be reached */
predicate unlikelyReachable() {
not start_bb_likely_reachable(this.getBasicBlock())
or
exists(BasicBlock b |
start_bb_likely_reachable(b) and
not end_bb_likely_reachable(b) and
// If there is an unlikely successor edge earlier in the BB
// than this node, then this node must be unreachable.
exists(ControlFlowNode p, int i, int j |
p.(RaisingNode).unlikelySuccessor(_) and
p = b.getNode(i) and
this = b.getNode(j) and
i < j
)
)
}
}
/**
* Check whether a SSA variable has complete points-to information.
* This would mean that the analysis managed to infer an overapproximation
* of possible values at runtime.
*/
private predicate varHasCompletePointsToSet(SsaVariable var) {
// Global variables may be modified non-locally or concurrently.
not var.getVariable() instanceof GlobalVariable and
(
// If we have complete points-to information on the definition of
// this variable, then the variable has complete information.
var.getDefinition()
.(DefinitionNode)
.getValue()
.(ControlFlowNodeWithPointsTo)
.hasCompletePointsToSet()
or
// If this variable is a phi output, then we have complete
// points-to information about it if all phi inputs had complete
// information.
forex(SsaVariable phiInput | phiInput = var.getAPhiInput() |
varHasCompletePointsToSet(phiInput)
)
)
}
private predicate start_bb_likely_reachable(BasicBlock b) {
exists(Scope s | s.getEntryNode() = b.getNode(_))
or
exists(BasicBlock pred |
pred = b.getAPredecessor() and
end_bb_likely_reachable(pred) and
not pred.getLastNode().(RaisingNode).unlikelySuccessor(b)
)
}
private predicate end_bb_likely_reachable(BasicBlock b) {
start_bb_likely_reachable(b) and
not exists(ControlFlowNode p, ControlFlowNode s |
p.(RaisingNode).unlikelySuccessor(s) and
p = b.getNode(_) and
s = b.getNode(_) and
not p = b.getLastNode()
)
}
/**
* An extension of `BasicBlock` that provides points-to related methods.
*/
class BasicBlockWithPointsTo extends BasicBlock {
/**
* Whether (as inferred by type inference) it is highly unlikely (or impossible) for control to flow from this to succ.
*/
predicate unlikelySuccessor(BasicBlockWithPointsTo succ) {
this.getLastNode().(RaisingNode).unlikelySuccessor(succ.firstNode())
or
not end_bb_likely_reachable(this) and succ = this.getASuccessor()
}
/**
* Whether (as inferred by type inference) this basic block is likely to be reachable.
*/
predicate likelyReachable() { start_bb_likely_reachable(this) }
}
/**
* An extension of `Expr` that provides points-to predicates.
*/
class ExprWithPointsTo extends Expr {
/**
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Gets what this expression might "refer-to". Performs a combination of localized (intra-procedural) points-to
* analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly
* precise, but may not provide information for a significant number of flow-nodes.
* If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead.
* NOTE: For complex dataflow, involving multiple stages of points-to analysis, it may be more precise to use
* `ControlFlowNode.refersTo(...)` instead.
*/
predicate refersTo(Object obj, ClassObject cls, AstNode origin) {
this.refersTo(_, obj, cls, origin)
}
/**
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Gets what this expression might "refer-to" in the given `context`.
*/
predicate refersTo(Context context, Object obj, ClassObject cls, AstNode origin) {
this.getAFlowNode()
.(ControlFlowNodeWithPointsTo)
.refersTo(context, obj, cls, origin.getAFlowNode())
}
/**
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Holds if this expression might "refer-to" to `value` which is from `origin`
* Unlike `this.refersTo(value, _, origin)`, this predicate includes results
* where the class cannot be inferred.
*/
pragma[nomagic]
predicate refersTo(Object obj, AstNode origin) {
this.getAFlowNode().(ControlFlowNodeWithPointsTo).refersTo(obj, origin.getAFlowNode())
}
/**
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Equivalent to `this.refersTo(value, _)`
*/
predicate refersTo(Object obj) { this.refersTo(obj, _) }
/**
* Holds if this expression might "point-to" to `value` which is from `origin`
* in the given `context`.
*/
predicate pointsTo(Context context, Value value, AstNode origin) {
this.getAFlowNode()
.(ControlFlowNodeWithPointsTo)
.pointsTo(context, value, origin.getAFlowNode())
}
/**
* Holds if this expression might "point-to" to `value` which is from `origin`.
*/
predicate pointsTo(Value value, AstNode origin) {
this.getAFlowNode().(ControlFlowNodeWithPointsTo).pointsTo(value, origin.getAFlowNode())
}
/**
* Holds if this expression might "point-to" to `value`.
*/
predicate pointsTo(Value value) { this.pointsTo(value, _) }
/** Gets a value that this expression might "point-to". */
Value pointsTo() { this.pointsTo(result) }
override string getAQlClass() { none() }
}
/**
* An extension of `Module` that provides points-to related methods.
*/
class ModuleWithPointsTo extends Module {
/** Gets a name exported by this module, that is the names that will be added to a namespace by 'from this-module import *' */
string getAnExport() {
py_exports(this, result)
or
exists(ModuleObjectInternal mod | mod.getSource() = this.getEntryNode() |
mod.(ModuleValue).exports(result)
)
}
override string getAQlClass() { none() }
}
/**
* An extension of `Function` that provides points-to related methods.
*/
class FunctionWithPointsTo extends Function {
/** Gets the FunctionObject corresponding to this function */
FunctionObject getFunctionObject() { result.getOrigin() = this.getDefinition() }
override string getAQlClass() { none() }
}
/**
* An extension of `Class` that provides points-to related methods.
*/
class ClassWithPointsTo extends Class {
/** Gets the ClassObject corresponding to this class */
ClassObject getClassObject() { result.getOrigin() = this.getParent() }
override string getAQlClass() { none() }
}
/** Gets the `Object` corresponding to the immutable literal `l`. */
Object getLiteralObject(ImmutableLiteral l) {
l instanceof IntegerLiteral and
(
py_cobjecttypes(result, theIntType()) and py_cobjectnames(result, l.(Num).getN())
or
py_cobjecttypes(result, theLongType()) and py_cobjectnames(result, l.(Num).getN())
)
or
l instanceof FloatLiteral and
py_cobjecttypes(result, theFloatType()) and
py_cobjectnames(result, l.(Num).getN())
or
l instanceof ImaginaryLiteral and
py_cobjecttypes(result, theComplexType()) and
py_cobjectnames(result, l.(Num).getN())
or
l instanceof NegativeIntegerLiteral and
(
(py_cobjecttypes(result, theIntType()) or py_cobjecttypes(result, theLongType())) and
py_cobjectnames(result, "-" + l.(UnaryExpr).getOperand().(IntegerLiteral).getN())
)
or
l instanceof Bytes and
py_cobjecttypes(result, theBytesType()) and
py_cobjectnames(result, l.(Bytes).quotedString())
or
l instanceof Unicode and
py_cobjecttypes(result, theUnicodeType()) and
py_cobjectnames(result, l.(Unicode).quotedString())
or
l instanceof True and
name_consts(l, "True") and
result = theTrueObject()
or
l instanceof False and
name_consts(l, "False") and
result = theFalseObject()
or
l instanceof None and
name_consts(l, "None") and
result = theNoneObject()
}
private predicate gettext_installed() {
// Good enough (and fast) approximation
exists(Module m | m.getName() = "gettext")
}
private predicate builtin_constant(string name) {
exists(Object::builtin(name))
or
name = "WindowsError"
or
name = "_" and gettext_installed()
}
/** Whether this name is (almost) always defined, ie. it is a builtin or VM defined name */
predicate globallyDefinedName(string name) { builtin_constant(name) or auto_name(name) }
private predicate auto_name(string name) {
name = "__file__" or name = "__builtins__" or name = "__name__"
}
/** An extension of `SsaVariable` that provides points-to related methods. */
class SsaVariableWithPointsTo extends SsaVariable {
/** Gets an argument of the phi function defining this variable, pruned of unlikely edges. */
SsaVariable getAPrunedPhiInput() {
result = this.getAPhiInput() and
exists(BasicBlock incoming | incoming = this.getPredecessorBlockForPhiArgument(result) |
not incoming.getLastNode().(RaisingNode).unlikelySuccessor(this.getDefinition())
)
}
/** Gets the incoming edges for a Phi node, pruned of unlikely edges. */
private BasicBlockWithPointsTo getAPrunedPredecessorBlockForPhi() {
result = this.getAPredecessorBlockForPhi() and
not result.unlikelySuccessor(this.getDefinition().getBasicBlock())
}
private predicate implicitlyDefined() {
not exists(this.getDefinition()) and
not py_ssa_phi(this, _) and
exists(GlobalVariable var | this.getVariable() = var |
globallyDefinedName(var.getId())
or
var.getId() = "__path__" and var.getScope().(Module).isPackageInit()
)
}
/** Whether this variable may be undefined */
predicate maybeUndefined() {
not exists(this.getDefinition()) and not py_ssa_phi(this, _) and not this.implicitlyDefined()
or
this.getDefinition().isDelete()
or
exists(SsaVariableWithPointsTo var | var = this.getAPrunedPhiInput() | var.maybeUndefined())
or
/*
* For phi-nodes, there must be a corresponding phi-input for each control-flow
* predecessor. Otherwise, the variable will be undefined on that incoming edge.
* WARNING: the same phi-input may cover multiple predecessors, so this check
* cannot be done by counting.
*/
exists(BasicBlock incoming |
reaches_end(incoming) and
incoming = this.getAPrunedPredecessorBlockForPhi() and
not this.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming)
)
}
override string getAQlClass() { none() }
}
private predicate reaches_end(BasicBlock b) {
not exits_early(b) and
(
/* Entry point */
not exists(BasicBlock prev | prev.getASuccessor() = b)
or
exists(BasicBlock prev | prev.getASuccessor() = b | reaches_end(prev))
)
}
private predicate exits_early(BasicBlock b) {
exists(FunctionObject f |
f.neverReturns() and
f.getACall().getBasicBlock() = b
)
}

View File

@@ -3,7 +3,8 @@
*/
import python
import semmle.python.pointsto.PointsTo
private import LegacyPointsTo
private import semmle.python.types.ImportTime
import IDEContextual
private newtype TDefinition =
@@ -36,22 +37,22 @@ private predicate jump_to_defn(ControlFlowNode use, Definition defn) {
)
or
exists(PythonModuleObject mod |
use.(ImportExprNode).refersTo(mod) and
use.(ImportExprNode).(ControlFlowNodeWithPointsTo).refersTo(mod) and
defn.getAstNode() = mod.getModule()
)
or
exists(PythonModuleObject mod, string name |
use.(ImportMemberNode).getModule(name).refersTo(mod) and
use.(ImportMemberNode).getModule(name).(ControlFlowNodeWithPointsTo).refersTo(mod) and
scope_jump_to_defn_attribute(mod.getModule(), name, defn)
)
or
exists(PackageObject package |
use.(ImportExprNode).refersTo(package) and
use.(ImportExprNode).(ControlFlowNodeWithPointsTo).refersTo(package) and
defn.getAstNode() = package.getInitModule().getModule()
)
or
exists(PackageObject package, string name |
use.(ImportMemberNode).getModule(name).refersTo(package) and
use.(ImportMemberNode).getModule(name).(ControlFlowNodeWithPointsTo).refersTo(package) and
scope_jump_to_defn_attribute(package.getInitModule().getModule(), name, defn)
)
or
@@ -230,7 +231,7 @@ private predicate module_and_name_for_import_star_helper(
ModuleObject mod, string name, ImportStarNode im_star, ImportStarRefinement def
) {
im_star = def.getDefiningNode() and
im_star.getModule().refersTo(mod) and
im_star.getModule().(ControlFlowNodeWithPointsTo).refersTo(mod) and
name = def.getSourceVariable().getName()
}
@@ -239,7 +240,7 @@ pragma[noinline]
private predicate variable_not_redefined_by_import_star(EssaVariable var, ImportStarRefinement def) {
var = def.getInput() and
exists(ModuleObject mod |
def.getDefiningNode().(ImportStarNode).getModule().refersTo(mod) and
def.getDefiningNode().(ImportStarNode).getModule().(ControlFlowNodeWithPointsTo).refersTo(mod) and
not mod.exports(var.getSourceVariable().getName())
)
}
@@ -352,7 +353,9 @@ private predicate scope_jump_to_defn_attribute(ImportTimeScope s, string name, D
)
}
private predicate jump_to_defn_attribute(ControlFlowNode use, string name, Definition defn) {
private predicate jump_to_defn_attribute(
ControlFlowNodeWithPointsTo use, string name, Definition defn
) {
/* Local attribute */
exists(EssaVariable var |
use = var.getASourceUse() and
@@ -367,7 +370,7 @@ private predicate jump_to_defn_attribute(ControlFlowNode use, string name, Defin
/* Super attributes */
exists(AttrNode f, SuperBoundMethod sbm, Object function |
use = f.getObject(name) and
f.refersTo(sbm) and
f.(ControlFlowNodeWithPointsTo).refersTo(sbm) and
function = sbm.getFunction(_) and
function.getOrigin() = defn.getAstNode()
)
@@ -408,7 +411,7 @@ private predicate attribute_assignment_jump_to_defn_attribute(
private predicate sets_attribute(ArgumentRefinement def, string name) {
exists(CallNode call |
call = def.getDefiningNode() and
call.getFunction().refersTo(Object::builtin("setattr")) and
call.getFunction().(ControlFlowNodeWithPointsTo).refersTo(Object::builtin("setattr")) and
def.getInput().getAUse() = call.getArg(0) and
call.getArg(1).getNode().(StringLiteral).getText() = name
)

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Remote flow sources for the `websockets` package have been modeled.

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
* Added experimental query `py/prompt-injection` to detect potential prompt injection vulnerabilities in code using LLMs.
* Added taint flow model and type model for `agents` and `openai` modules.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The predicate `SummarizedCallable.propagatesFlow` has been extended with the columns `Provenance p` and `boolean isExact`, and as a consequence the predicates `SummarizedCallable.hasProvenance` and `SummarizedCallable.hasExactModel` have been removed.

View File

@@ -0,0 +1,4 @@
---
category: feature
---
* It is now possible to refer to list elements in the Python models-as-data language, via the `ListElement` path.

View File

@@ -0,0 +1,5 @@
## 4.0.17
### Bug Fixes
* The Python extractor no longer crashes with an `ImportError` when run using Python 3.14.

View File

@@ -0,0 +1,5 @@
## 4.1.0
### New Features
* Initial support for incremental Python databases via `codeql database create --overlay-base`/`--overlay-changes`.

View File

@@ -0,0 +1,5 @@
## 5.0.0
### Breaking Changes
- The classes `ControlFlowNode`, `Expr`, and `Module` no longer expose predicates that invoke the points-to analysis. To access these predicates, import the module `LegacyPointsTo` and follow the instructions given therein.

View File

@@ -0,0 +1,5 @@
## 5.0.1
### Bug Fixes
- Fixed a bug in the Python extractor's import handling where failing to find an import in `find_module` would cause a `KeyError` to be raised. (Contributed by @akoeplinger.)

View File

@@ -0,0 +1,3 @@
## 5.0.2
No user-facing changes.

View File

@@ -0,0 +1,3 @@
## 5.0.3
No user-facing changes.

View File

@@ -0,0 +1,3 @@
## 5.0.4
No user-facing changes.

View File

@@ -0,0 +1,20 @@
## 6.0.0
### Breaking Changes
* All modules that depend on the points-to analysis have now been removed from the top level `python.qll` module. To access the points-to functionality, import the new `LegacyPointsTo` module. This also means that some predicates have been removed from various classes, for instance `Function.getFunctionObject()`. To access these predicates, import the `LegacyPointsTo` module and use the `FunctionWithPointsTo` class instead. Most cases follow this pattern, but there are a few exceptions:
* The `getLiteralObject` method on `ImmutableLiteral` subclasses has been replaced with a predicate `getLiteralObject(ImmutableLiteral l)` in the `LegacyPointsTo` module.
* The `getMetrics` method on `Function`, `Class`, and `Module` has been removed. To access metrics, import `LegacyPointsTo` and use the classes `FunctionMetrics`, etc. instead.
### New Features
* The extractor now supports the new, relaxed syntax `except A, B, C: ...` (which would previously have to be written as `except (A, B, C): ...`) as defined in [PEP-758](https://peps.python.org/pep-0758/). This may cause changes in results for code that uses Python 2-style exception binding (`except Foo, e: ...`). The more modern format, `except Foo as e: ...` (available since Python 2.6) is unaffected.
* The Python extractor now supports template strings as defined in [PEP-750](https://peps.python.org/pep-0750/), through the classes `TemplateString` and `JoinedTemplateString`.
### Minor Analysis Improvements
* When a code-scanning configuration specifies the `paths:` and/or `paths-ignore:` settings, these are now taken into account by the Python extractor's search for YAML files.
* The `compression.zstd` library (added in Python 3.14) is now supported by the `py/decompression-bomb` query.
* Added taint flow model and type model for `urllib.parse`.
* Remote flow sources for the `python-socketio` package have been modeled.
* Additional models for remote flow sources for `tornado.websocket.WebSocketHandler` have been added.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 4.0.16
lastReleaseVersion: 6.0.0

View File

@@ -14,29 +14,30 @@ import semmle.python.Patterns
import semmle.python.Keywords
import semmle.python.Comprehensions
import semmle.python.Flow
import semmle.python.Metrics
private import semmle.python.Metrics
import semmle.python.Constants
import semmle.python.Scope
import semmle.python.Comment
import semmle.python.GuardedControlFlow
import semmle.python.types.ImportTime
import semmle.python.types.Object
import semmle.python.types.ClassObject
import semmle.python.types.FunctionObject
import semmle.python.types.ModuleObject
import semmle.python.types.Version
import semmle.python.types.Descriptors
private import semmle.python.types.ImportTime
private import semmle.python.types.Object
private import semmle.python.types.ClassObject
private import semmle.python.types.FunctionObject
private import semmle.python.types.ModuleObject
private import semmle.python.types.Version
private import semmle.python.types.Descriptors
import semmle.python.SSA
import semmle.python.SelfAttribute
import semmle.python.types.Properties
private import semmle.python.SelfAttribute
private import semmle.python.types.Properties
import semmle.python.xml.XML
import semmle.python.essa.Essa
import semmle.python.pointsto.Base
import semmle.python.pointsto.Context
import semmle.python.pointsto.CallGraph
import semmle.python.objects.ObjectAPI
private import semmle.python.pointsto.Base
private import semmle.python.pointsto.Context
private import semmle.python.pointsto.CallGraph
private import semmle.python.objects.ObjectAPI
import semmle.python.Unit
import site
private import semmle.python.Overlay
// Removing this import perturbs the compilation process enough that the points-to analysis gets
// compiled -- and cached -- differently depending on whether the data flow library is imported. By
// importing it privately here, we ensure that the points-to analysis is compiled the same way.

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 4.0.17-dev
version: 6.0.1-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python
@@ -19,3 +19,4 @@ dataExtensions:
- semmle/python/frameworks/**/*.model.yml
- ext/*.model.yml
warnOnImplicitThis: true
compileForOverlayEval: true

View File

@@ -218,6 +218,9 @@ class DictItemListParent extends DictItemListParent_ { }
/** A list of strings (the primitive type string not Bytes or Unicode) */
class StringList extends StringList_ { }
/** A list of template strings. */
class TemplateStringList extends TemplateStringList_ { }
/** A list of aliases in an import statement */
class AliasList extends AliasList_ { }
@@ -273,3 +276,9 @@ class ParamSpec extends ParamSpec_, TypeParameter {
override Expr getAChildNode() { result = this.getName() }
}
/** A template string literal. */
class TemplateString extends TemplateString_, Expr { }
/** An (implicitly) concatenated list of template strings. */
class JoinedTemplateString extends JoinedTemplateString_, Expr { }

View File

@@ -768,6 +768,20 @@ class Fstring_ extends @py_Fstring, Expr {
override string toString() { result = "Fstring" }
}
/** INTERNAL: See the class `JoinedTemplateString` for further information. */
class JoinedTemplateString_ extends @py_JoinedTemplateString, Expr {
/** Gets the strings of this joined template string. */
TemplateStringList getStrings() { py_TemplateString_lists(result, this) }
/** Gets the nth string of this joined template string. */
TemplateString getString(int index) { result = this.getStrings().getItem(index) }
/** Gets a string of this joined template string. */
TemplateString getAString() { result = this.getStrings().getAnItem() }
override string toString() { result = "JoinedTemplateString" }
}
/** INTERNAL: See the class `KeyValuePair` for further information. */
class KeyValuePair_ extends @py_KeyValuePair, DictItem {
/** Gets the location of this key-value pair. */
@@ -1373,6 +1387,48 @@ class TemplateDottedNotation_ extends @py_TemplateDottedNotation, Expr {
override string toString() { result = "TemplateDottedNotation" }
}
/** INTERNAL: See the class `TemplateString` for further information. */
class TemplateString_ extends @py_TemplateString, Expr {
/** Gets the prefix of this template string literal. */
string getPrefix() { py_strs(result, this, 2) }
/** Gets the values of this template string literal. */
ExprList getValues() { py_expr_lists(result, this, 3) }
/** Gets the nth value of this template string literal. */
Expr getValue(int index) { result = this.getValues().getItem(index) }
/** Gets a value of this template string literal. */
Expr getAValue() { result = this.getValues().getAnItem() }
override ExprParent getParent() { py_exprs(this, _, result, _) }
override string toString() { result = "TemplateString" }
}
/** INTERNAL: See the class `TemplateStringPart` for further information. */
class TemplateStringPart_ extends @py_TemplateStringPart, Expr {
/** Gets the text of this string part of a template string. */
string getText() { py_strs(result, this, 2) }
override string toString() { result = "TemplateStringPart" }
}
/** INTERNAL: See the class `TemplateStringList` for further information. */
class TemplateStringList_ extends @py_TemplateString_list {
/** Gets a parent of this template string literal list */
JoinedTemplateString getParent() { py_TemplateString_lists(this, result) }
/** Gets an item of this template string literal list */
Expr getAnItem() { py_exprs(result, _, this, _) }
/** Gets the nth item of this template string literal list */
Expr getItem(int index) { py_exprs(result, _, this, index) }
/** Gets a textual representation of this element. */
string toString() { result = "TemplateStringList" }
}
/** INTERNAL: See the class `TemplateWrite` for further information. */
class TemplateWrite_ extends @py_TemplateWrite, Stmt {
/** Gets the value of this template write statement. */

View File

@@ -141,18 +141,12 @@ class Class extends Class_, Scope, AstNode {
/** Gets the metaclass expression */
Expr getMetaClass() { result = this.getParent().getMetaClass() }
/** Gets the ClassObject corresponding to this class */
ClassObject getClassObject() { result.getOrigin() = this.getParent() }
/** Gets the nth base of this class definition. */
Expr getBase(int index) { result = this.getParent().getBase(index) }
/** Gets a base of this class definition. */
Expr getABase() { result = this.getParent().getABase() }
/** Gets the metrics for this class */
ClassMetrics getMetrics() { result = this }
/**
* Gets the qualified name for this class.
* Should return the same name as the `__qualname__` attribute on classes in Python 3.

View File

@@ -12,6 +12,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Files
private import semmle.python.Frameworks
private import semmle.python.security.internal.EncryptionKeySizes
private import semmle.python.dataflow.new.SensitiveDataSources
private import codeql.threatmodels.ThreatModels
private import codeql.concepts.ConceptsShared
@@ -115,6 +116,16 @@ module SystemCommandExecution {
class FileSystemAccess extends DataFlow::Node instanceof FileSystemAccess::Range {
/** Gets an argument to this file system access that is interpreted as a path. */
DataFlow::Node getAPathArgument() { result = super.getAPathArgument() }
/**
* Gets an argument to this file system access that is interpreted as a path
* which is vulnerable to path injection.
*
* By default all path arguments are considered vulnerable, but this can be overridden to
* exclude certain arguments that are known to be safe, for example because they are
* restricted to a specific directory.
*/
DataFlow::Node getAVulnerablePathArgument() { result = super.getAVulnerablePathArgument() }
}
/** Provides a class for modeling new file system access APIs. */
@@ -129,6 +140,16 @@ module FileSystemAccess {
abstract class Range extends DataFlow::Node {
/** Gets an argument to this file system access that is interpreted as a path. */
abstract DataFlow::Node getAPathArgument();
/**
* Gets an argument to this file system access that is interpreted as a path
* which is vulnerable to path injection.
*
* By default all path arguments are considered vulnerable, but this can be overridden to
* exclude certain arguments that are known to be safe, for example because they are
* restricted to a specific directory.
*/
DataFlow::Node getAVulnerablePathArgument() { result = this.getAPathArgument() }
}
}
@@ -1290,6 +1311,18 @@ module Http {
*/
DataFlow::Node getValueArg() { result = super.getValueArg() }
/** Holds if the name of this cookie indicates it may contain sensitive information. */
predicate isSensitive() {
exists(DataFlow::Node name |
name = [this.getNameArg(), this.getHeaderArg()] and
(
DataFlow::localFlow(any(SensitiveDataSource src), name)
or
name = sensitiveLookupStringConst(_)
)
)
}
/**
* Holds if the `Secure` flag of the cookie is known to have a value of `b`.
*/

View File

@@ -1,6 +1,4 @@
import python
private import semmle.python.pointsto.PointsTo
private import semmle.python.objects.ObjectInternal
private import python
private import semmle.python.internal.CachedStages
/** An expression */
@@ -52,67 +50,6 @@ class Expr extends Expr_, AstNode {
Expr getASubExpression() { none() }
override AstNode getAChildNode() { result = this.getASubExpression() }
/**
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Gets what this expression might "refer-to". Performs a combination of localized (intra-procedural) points-to
* analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly
* precise, but may not provide information for a significant number of flow-nodes.
* If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead.
* NOTE: For complex dataflow, involving multiple stages of points-to analysis, it may be more precise to use
* `ControlFlowNode.refersTo(...)` instead.
*/
predicate refersTo(Object obj, ClassObject cls, AstNode origin) {
this.refersTo(_, obj, cls, origin)
}
/**
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Gets what this expression might "refer-to" in the given `context`.
*/
predicate refersTo(Context context, Object obj, ClassObject cls, AstNode origin) {
this.getAFlowNode().refersTo(context, obj, cls, origin.getAFlowNode())
}
/**
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Holds if this expression might "refer-to" to `value` which is from `origin`
* Unlike `this.refersTo(value, _, origin)`, this predicate includes results
* where the class cannot be inferred.
*/
pragma[nomagic]
predicate refersTo(Object obj, AstNode origin) {
this.getAFlowNode().refersTo(obj, origin.getAFlowNode())
}
/**
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Equivalent to `this.refersTo(value, _)`
*/
predicate refersTo(Object obj) { this.refersTo(obj, _) }
/**
* Holds if this expression might "point-to" to `value` which is from `origin`
* in the given `context`.
*/
predicate pointsTo(Context context, Value value, AstNode origin) {
this.getAFlowNode().pointsTo(context, value, origin.getAFlowNode())
}
/**
* Holds if this expression might "point-to" to `value` which is from `origin`.
*/
predicate pointsTo(Value value, AstNode origin) {
this.getAFlowNode().pointsTo(value, origin.getAFlowNode())
}
/**
* Holds if this expression might "point-to" to `value`.
*/
predicate pointsTo(Value value) { this.pointsTo(value, _) }
/** Gets a value that this expression might "point-to". */
Value pointsTo() { this.pointsTo(result) }
}
/** An assignment expression, such as `x := y` */
@@ -303,17 +240,12 @@ class Bytes extends StringLiteral {
/* syntax: b"hello" */
Bytes() { not this.isUnicode() }
override Object getLiteralObject() {
py_cobjecttypes(result, theBytesType()) and
py_cobjectnames(result, this.quotedString())
}
/**
* The extractor puts quotes into the name of each string (to prevent "0" clashing with 0).
* The following predicate help us match up a string/byte literals in the source
* which the equivalent object.
*/
private string quotedString() {
string quotedString() {
exists(string b_unquoted | b_unquoted = this.getS() | result = "b'" + b_unquoted + "'")
}
}
@@ -329,11 +261,7 @@ class Ellipsis extends Ellipsis_ {
* Consists of string (both unicode and byte) literals and numeric literals.
*/
abstract class ImmutableLiteral extends Expr {
abstract Object getLiteralObject();
abstract boolean booleanValue();
final Value getLiteralValue() { result.(ConstantObjectInternal).getLiteral() = this }
}
/** A numerical constant expression, such as `7` or `4.2` */
@@ -357,12 +285,6 @@ class IntegerLiteral extends Num {
override string toString() { result = "IntegerLiteral" }
override Object getLiteralObject() {
py_cobjecttypes(result, theIntType()) and py_cobjectnames(result, this.getN())
or
py_cobjecttypes(result, theLongType()) and py_cobjectnames(result, this.getN())
}
override boolean booleanValue() {
this.getValue() = 0 and result = false
or
@@ -382,10 +304,6 @@ class FloatLiteral extends Num {
override string toString() { result = "FloatLiteral" }
override Object getLiteralObject() {
py_cobjecttypes(result, theFloatType()) and py_cobjectnames(result, this.getN())
}
override boolean booleanValue() {
this.getValue() = 0.0 and result = false
or
@@ -408,10 +326,6 @@ class ImaginaryLiteral extends Num {
override string toString() { result = "ImaginaryLiteral" }
override Object getLiteralObject() {
py_cobjecttypes(result, theComplexType()) and py_cobjectnames(result, this.getN())
}
override boolean booleanValue() {
this.getValue() = 0.0 and result = false
or
@@ -430,11 +344,6 @@ class NegativeIntegerLiteral extends ImmutableLiteral, UnaryExpr {
override boolean booleanValue() { result = this.getOperand().(IntegerLiteral).booleanValue() }
override Object getLiteralObject() {
(py_cobjecttypes(result, theIntType()) or py_cobjecttypes(result, theLongType())) and
py_cobjectnames(result, "-" + this.getOperand().(IntegerLiteral).getN())
}
/**
* Gets the (integer) value of this constant. Will not return a result if the value does not fit into
* a 32 bit signed value
@@ -450,11 +359,6 @@ class Unicode extends StringLiteral {
/* syntax: "hello" */
Unicode() { this.isUnicode() }
override Object getLiteralObject() {
py_cobjecttypes(result, theUnicodeType()) and
py_cobjectnames(result, this.quotedString())
}
/**
* Gets the quoted representation fo this string.
*
@@ -658,12 +562,11 @@ class StringLiteral extends Str_, ImmutableLiteral {
this.getText() != "" and result = true
}
override Object getLiteralObject() { none() }
override string toString() { result = "StringLiteral" }
}
private predicate name_consts(Name_ n, string id) {
/** Holds if `n` is a named constant (`True`, `False`, or `None`) with name `id`. */
predicate name_consts(Name_ n, string id) {
exists(Variable v | py_variables(v, n) and id = v.getId() |
id = "True" or id = "False" or id = "None"
)
@@ -692,8 +595,6 @@ class True extends BooleanLiteral {
/* syntax: True */
True() { name_consts(this, "True") }
override Object getLiteralObject() { name_consts(this, "True") and result = theTrueObject() }
override boolean booleanValue() { result = true }
}
@@ -702,8 +603,6 @@ class False extends BooleanLiteral {
/* syntax: False */
False() { name_consts(this, "False") }
override Object getLiteralObject() { name_consts(this, "False") and result = theFalseObject() }
override boolean booleanValue() { result = false }
}
@@ -712,8 +611,6 @@ class None extends NameConstant {
/* syntax: None */
None() { name_consts(this, "None") }
override Object getLiteralObject() { name_consts(this, "None") and result = theNoneObject() }
override boolean booleanValue() { result = false }
}

View File

@@ -1,5 +1,4 @@
import python
private import semmle.python.pointsto.PointsTo
private import semmle.python.internal.CachedStages
private import codeql.controlflow.BasicBlock as BB
@@ -144,56 +143,6 @@ class ControlFlowNode extends @py_flow_node {
/** Whether this flow node is the first in its scope */
predicate isEntryNode() { py_scope_flow(this, _, -1) }
/** Gets the value that this ControlFlowNode points-to. */
predicate pointsTo(Value value) { this.pointsTo(_, value, _) }
/** Gets the value that this ControlFlowNode points-to. */
Value pointsTo() { this.pointsTo(_, result, _) }
/** Gets a value that this ControlFlowNode may points-to. */
Value inferredValue() { this.pointsTo(_, result, _) }
/** Gets the value and origin that this ControlFlowNode points-to. */
predicate pointsTo(Value value, ControlFlowNode origin) { this.pointsTo(_, value, origin) }
/** Gets the value and origin that this ControlFlowNode points-to, given the context. */
predicate pointsTo(Context context, Value value, ControlFlowNode origin) {
PointsTo::pointsTo(this, context, value, origin)
}
/**
* Gets what this flow node might "refer-to". Performs a combination of localized (intra-procedural) points-to
* analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly
* precise, but may not provide information for a significant number of flow-nodes.
* If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead.
*/
pragma[nomagic]
predicate refersTo(Object obj, ClassObject cls, ControlFlowNode origin) {
this.refersTo(_, obj, cls, origin)
}
/** Gets what this expression might "refer-to" in the given `context`. */
pragma[nomagic]
predicate refersTo(Context context, Object obj, ClassObject cls, ControlFlowNode origin) {
not obj = unknownValue() and
not cls = theUnknownType() and
PointsTo::points_to(this, context, obj, cls, origin)
}
/**
* Whether this flow node might "refer-to" to `value` which is from `origin`
* Unlike `this.refersTo(value, _, origin)` this predicate includes results
* where the class cannot be inferred.
*/
pragma[nomagic]
predicate refersTo(Object obj, ControlFlowNode origin) {
not obj = unknownValue() and
PointsTo::points_to(this, _, obj, _, origin)
}
/** Equivalent to `this.refersTo(value, _)` */
predicate refersTo(Object obj) { this.refersTo(obj, _) }
/** Gets the basic block containing this flow node */
BasicBlock getBasicBlock() { result.contains(this) }
@@ -241,41 +190,6 @@ class ControlFlowNode extends @py_flow_node {
/** Whether this node is a normal (non-exceptional) exit */
predicate isNormalExit() { py_scope_flow(this, _, 0) or py_scope_flow(this, _, 2) }
/** Whether it is unlikely that this ControlFlowNode can be reached */
predicate unlikelyReachable() {
not start_bb_likely_reachable(this.getBasicBlock())
or
exists(BasicBlock b |
start_bb_likely_reachable(b) and
not end_bb_likely_reachable(b) and
// If there is an unlikely successor edge earlier in the BB
// than this node, then this node must be unreachable.
exists(ControlFlowNode p, int i, int j |
p.(RaisingNode).unlikelySuccessor(_) and
p = b.getNode(i) and
this = b.getNode(j) and
i < j
)
)
}
/**
* Check whether this control-flow node has complete points-to information.
* This would mean that the analysis managed to infer an over approximation
* of possible values at runtime.
*/
predicate hasCompletePointsToSet() {
// If the tracking failed, then `this` will be its own "origin". In that
// case, we want to exclude nodes for which there is also a different
// origin, as that would indicate that some paths failed and some did not.
this.refersTo(_, _, this) and
not exists(ControlFlowNode other | other != this and this.refersTo(_, _, other))
or
// If `this` is a use of a variable, then we must have complete points-to
// for that variable.
exists(SsaVariable v | v.getAUse() = this | varHasCompletePointsToSet(v))
}
/** Whether this strictly dominates other. */
pragma[inline]
predicate strictlyDominates(ControlFlowNode other) {
@@ -332,28 +246,6 @@ private class AnyNode extends ControlFlowNode {
override AstNode getNode() { result = super.getNode() }
}
/**
* Check whether a SSA variable has complete points-to information.
* This would mean that the analysis managed to infer an overapproximation
* of possible values at runtime.
*/
private predicate varHasCompletePointsToSet(SsaVariable var) {
// Global variables may be modified non-locally or concurrently.
not var.getVariable() instanceof GlobalVariable and
(
// If we have complete points-to information on the definition of
// this variable, then the variable has complete information.
var.getDefinition().(DefinitionNode).getValue().hasCompletePointsToSet()
or
// If this variable is a phi output, then we have complete
// points-to information about it if all phi inputs had complete
// information.
forex(SsaVariable phiInput | phiInput = var.getAPhiInput() |
varHasCompletePointsToSet(phiInput)
)
)
}
/** A control flow node corresponding to a call expression, such as `func(...)` */
class CallNode extends ControlFlowNode {
CallNode() { toAst(this) instanceof Call }
@@ -991,6 +883,58 @@ class StarredNode extends ControlFlowNode {
ControlFlowNode getValue() { toAst(result) = toAst(this).(Starred).getValue() }
}
/** The ControlFlowNode for an 'except' statement. */
class ExceptFlowNode extends ControlFlowNode {
ExceptFlowNode() { this.getNode() instanceof ExceptStmt }
/**
* Gets the type handled by this exception handler.
* `ExceptionType` in `except ExceptionType as e:`
*/
ControlFlowNode getType() {
exists(ExceptStmt ex |
this.getBasicBlock().dominates(result.getBasicBlock()) and
ex = this.getNode() and
result = ex.getType().getAFlowNode()
)
}
/**
* Gets the name assigned to the handled exception, if any.
* `e` in `except ExceptionType as e:`
*/
ControlFlowNode getName() {
exists(ExceptStmt ex |
this.getBasicBlock().dominates(result.getBasicBlock()) and
ex = this.getNode() and
result = ex.getName().getAFlowNode()
)
}
}
/** The ControlFlowNode for an 'except*' statement. */
class ExceptGroupFlowNode extends ControlFlowNode {
ExceptGroupFlowNode() { this.getNode() instanceof ExceptGroupStmt }
/**
* Gets the type handled by this exception handler.
* `ExceptionType` in `except* ExceptionType as e:`
*/
ControlFlowNode getType() {
this.getBasicBlock().dominates(result.getBasicBlock()) and
result = this.getNode().(ExceptGroupStmt).getType().getAFlowNode()
}
/**
* Gets the name assigned to the handled exception, if any.
* `e` in `except* ExceptionType as e:`
*/
ControlFlowNode getName() {
this.getBasicBlock().dominates(result.getBasicBlock()) and
result = this.getNode().(ExceptGroupStmt).getName().getAFlowNode()
}
}
private module Scopes {
private predicate fast_local(NameNode n) {
exists(FastLocalVariable v |
@@ -1094,7 +1038,8 @@ class BasicBlock extends @py_flow_node {
)
}
private ControlFlowNode firstNode() { result = this }
/** Gets the first node in this basic block */
ControlFlowNode firstNode() { result = this }
/** Gets the last node in this basic block */
ControlFlowNode getLastNode() {
@@ -1183,15 +1128,6 @@ class BasicBlock extends @py_flow_node {
)
}
/**
* Whether (as inferred by type inference) it is highly unlikely (or impossible) for control to flow from this to succ.
*/
predicate unlikelySuccessor(BasicBlock succ) {
this.getLastNode().(RaisingNode).unlikelySuccessor(succ.firstNode())
or
not end_bb_likely_reachable(this) and succ = this.getASuccessor()
}
/** Holds if this basic block strictly reaches the other. Is the start of other reachable from the end of this. */
cached
predicate strictlyReaches(BasicBlock other) {
@@ -1202,11 +1138,6 @@ class BasicBlock extends @py_flow_node {
/** Holds if this basic block reaches the other. Is the start of other reachable from the end of this. */
predicate reaches(BasicBlock other) { this = other or this.strictlyReaches(other) }
/**
* Whether (as inferred by type inference) this basic block is likely to be reachable.
*/
predicate likelyReachable() { start_bb_likely_reachable(this) }
/**
* Gets the `ConditionBlock`, if any, that controls this block and
* does not control any other `ConditionBlock`s that control this block.
@@ -1234,26 +1165,6 @@ class BasicBlock extends @py_flow_node {
}
}
private predicate start_bb_likely_reachable(BasicBlock b) {
exists(Scope s | s.getEntryNode() = b.getNode(_))
or
exists(BasicBlock pred |
pred = b.getAPredecessor() and
end_bb_likely_reachable(pred) and
not pred.getLastNode().(RaisingNode).unlikelySuccessor(b)
)
}
private predicate end_bb_likely_reachable(BasicBlock b) {
start_bb_likely_reachable(b) and
not exists(ControlFlowNode p, ControlFlowNode s |
p.(RaisingNode).unlikelySuccessor(s) and
p = b.getNode(_) and
s = b.getNode(_) and
not p = b.getLastNode()
)
}
private class ControlFlowNodeAlias = ControlFlowNode;
final private class FinalBasicBlock = BasicBlock;

View File

@@ -78,6 +78,7 @@ private import semmle.python.frameworks.Sanic
private import semmle.python.frameworks.ServerLess
private import semmle.python.frameworks.Setuptools
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.Socketio
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.SSRFSink
private import semmle.python.frameworks.Starlette
@@ -90,6 +91,7 @@ private import semmle.python.frameworks.TRender
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Urllib3
private import semmle.python.frameworks.Websockets
private import semmle.python.frameworks.Xmltodict
private import semmle.python.frameworks.Yaml
private import semmle.python.frameworks.Yarl

View File

@@ -84,12 +84,6 @@ class Function extends Function_, Scope, AstNode {
/** Gets the name used to define this function */
override string getName() { result = Function_.super.getName() }
/** Gets the metrics for this function */
FunctionMetrics getMetrics() { result = this }
/** Gets the FunctionObject corresponding to this function */
FunctionObject getFunctionObject() { result.getOrigin() = this.getDefinition() }
/**
* Whether this function is a procedure, that is, it has no explicit return statement and always returns None.
* Note that generator and async functions are not procedures as they return generators and coroutines respectively.

View File

@@ -1,4 +1,5 @@
import python
private import LegacyPointsTo
/** The metrics for a function */
class FunctionMetrics extends Function {
@@ -28,9 +29,9 @@ class FunctionMetrics extends Function {
*/
int getCyclomaticComplexity() {
exists(int e, int n |
n = count(BasicBlock b | b = this.getABasicBlock() and b.likelyReachable()) and
n = count(BasicBlockWithPointsTo b | b = this.getABasicBlock() and b.likelyReachable()) and
e =
count(BasicBlock b1, BasicBlock b2 |
count(BasicBlockWithPointsTo b1, BasicBlockWithPointsTo b2 |
b1 = this.getABasicBlock() and
b1.likelyReachable() and
b2 = this.getABasicBlock() and
@@ -59,7 +60,7 @@ class FunctionMetrics extends Function {
not non_coupling_method(result) and
exists(Call call | call.getScope() = this |
exists(FunctionObject callee | callee.getFunction() = result |
call.getAFlowNode().getFunction().refersTo(callee)
call.getAFlowNode().getFunction().(ControlFlowNodeWithPointsTo).refersTo(callee)
)
or
exists(Attribute a | call.getFunc() = a |
@@ -123,7 +124,7 @@ class ClassMetrics extends Class {
)
or
exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this |
c.getFunc().refersTo(cls) and
c.getFunc().(ExprWithPointsTo).refersTo(cls) and
cls.getPyClass() = other
)
)
@@ -292,7 +293,7 @@ class ModuleMetrics extends Module {
)
or
exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this |
c.getFunc().refersTo(cls) and
c.getFunc().(ExprWithPointsTo).refersTo(cls) and
cls.getPyClass().getEnclosingModule() = other
)
)

View File

@@ -1,5 +1,4 @@
import python
private import semmle.python.objects.Modules
private import semmle.python.internal.CachedStages
/**
@@ -66,15 +65,6 @@ class Module extends Module_, Scope, AstNode {
/** Whether this module is a package initializer */
predicate isPackageInit() { this.getName().matches("%\\_\\_init\\_\\_") and not this.isPackage() }
/** Gets a name exported by this module, that is the names that will be added to a namespace by 'from this-module import *' */
string getAnExport() {
py_exports(this, result)
or
exists(ModuleObjectInternal mod | mod.getSource() = this.getEntryNode() |
mod.(ModuleValue).exports(result)
)
}
/** Gets the source file for this module */
File getFile() { py_module_path(this, result) }
@@ -96,9 +86,6 @@ class Module extends Module_, Scope, AstNode {
result = this.getName().regexpReplaceAll("\\.[^.]*$", "")
}
/** Gets the metrics for this module */
ModuleMetrics getMetrics() { result = this }
string getAnImportedModuleName() {
exists(Import i | i.getEnclosingModule() = this | result = i.getAnImportedModuleName())
or

View File

@@ -0,0 +1,330 @@
/**
* Defines entity discard predicates for Python overlay analysis.
*/
private import internal.OverlayXml
/*- Predicates -*/
/**
* Holds always for the overlay variant and never for the base variant.
* This local predicate is used to define local predicates that behave
* differently for the base and overlay variant.
*/
overlay[local]
predicate isOverlay() { databaseMetadata("isOverlay", "true") }
overlay[local]
private string getPathForLocation(@location loc) {
exists(@file file | locations_default(loc, file, _, _, _, _) | files(file, result))
or
exists(@py_Module mod | locations_ast(loc, mod, _, _, _, _) | result = getPathForModule(mod))
}
overlay[local]
private string getPathForModule(@py_Module mod) {
exists(@container fileOrFolder | py_module_path(mod, fileOrFolder) |
result = getPathForContainer(fileOrFolder)
)
}
overlay[local]
private string getPathForContainer(@container fileOrFolder) {
files(fileOrFolder, result) or folders(fileOrFolder, result)
}
/*- Discardable entities and their discard predicates -*/
/** Python database entities that use named TRAP IDs; the rest use *-ids. */
overlay[local]
private class NamedEntity = @py_Module or @container or @py_cobject;
overlay[discard_entity]
private predicate discardNamedEntity(@top el) {
el instanceof NamedEntity and
// Entities with named IDs can exist both in base, overlay, or both.
exists(Discardable d | d = el |
overlayChangedFiles(d.getPath()) and
not d.existsInOverlay()
)
}
overlay[discard_entity]
private predicate discardStarEntity(@top el) {
not el instanceof NamedEntity and
// Entities with *-ids can exist either in base or overlay, but not both.
exists(Discardable d | d = el |
overlayChangedFiles(d.getPath()) and
d.existsInBase()
)
}
/**
* An abstract base class for all elements that can be discarded from the base.
*/
overlay[local]
abstract class Discardable extends @top {
/** Gets the path to the file in which this element occurs. */
abstract string getPath();
/** Holds if this element exists in the base variant. */
predicate existsInBase() { not isOverlay() and exists(this) }
/** Holds if this element exists in the overlay variant. */
predicate existsInOverlay() { isOverlay() and exists(this) }
/** Gets a textual representation of this discardable element. */
string toString() { none() }
}
/**
* Discardable locatable AST nodes (`@py_location_parent`).
*/
overlay[local]
final private class DiscardableLocatable extends Discardable instanceof @py_location_parent {
override string getPath() {
exists(@location loc | py_locations(loc, this) | result = getPathForLocation(loc))
}
}
/**
* Discardable scopes (classes, functions, modules).
*/
overlay[local]
final private class DiscardableScope extends Discardable instanceof @py_scope {
override string getPath() {
exists(@location loc | py_scope_location(loc, this) | result = getPathForLocation(loc))
or
result = getPathForModule(this)
}
}
/**
* Discardable files and folders.
*/
overlay[local]
final private class DiscardableContainer extends Discardable instanceof @container {
override string getPath() { result = getPathForContainer(this) }
}
/** Discardable control flow nodes */
overlay[local]
final private class DiscardableCfgNode extends Discardable instanceof @py_flow_node {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_flow_bb_node(this, d.(@py_ast_node), _, _))
}
}
/** Discardable Python variables. */
overlay[local]
final private class DiscardableVar extends Discardable instanceof @py_variable {
override string getPath() {
exists(Discardable parent | result = parent.getPath() | variable(this, parent.(@py_scope), _))
}
}
/** Discardable SSA variables. */
overlay[local]
final private class DiscardableSsaVar extends Discardable instanceof @py_ssa_var {
override string getPath() {
exists(DiscardableVar other | result = other.getPath() | py_ssa_var(this, other))
}
}
/** Discardable locations. */
overlay[local]
final private class DiscardableLocation extends Discardable instanceof @location {
override string getPath() { result = getPathForLocation(this) }
}
/** Discardable lines. */
overlay[local]
final private class DiscardableLine extends Discardable instanceof @py_line {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_line_lengths(this, d.(@py_Module), _, _))
}
}
/** Discardable string part lists. */
overlay[local]
final private class DiscardableStringPartList extends Discardable instanceof @py_StringPart_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_StringPart_lists(this, d.(@py_Bytes_or_Str)))
}
}
/** Discardable alias */
overlay[local]
final private class DiscardableAlias extends Discardable instanceof @py_alias {
override string getPath() {
exists(DiscardableAliasList d | result = d.getPath() | py_aliases(this, d, _))
}
}
/** Discardable alias list */
overlay[local]
final private class DiscardableAliasList extends Discardable instanceof @py_alias_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_alias_lists(this, d.(@py_Import)))
}
}
/** Discardable arguments */
overlay[local]
final private class DiscardableArguments extends Discardable instanceof @py_arguments {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_arguments(this, d.(@py_arguments_parent)))
}
}
/** Discardable boolop */
overlay[local]
final private class DiscardableBoolOp extends Discardable instanceof @py_boolop {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_boolops(this, _, d.(@py_BoolExpr)))
}
}
/** Discardable cmpop */
overlay[local]
final private class DiscardableCmpOp extends Discardable instanceof @py_cmpop {
override string getPath() {
exists(DiscardableCmpOpList d | result = d.getPath() | py_cmpops(this, _, d, _))
}
}
/** Discardable cmpop list */
overlay[local]
final private class DiscardableCmpOpList extends Discardable instanceof @py_cmpop_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_cmpop_lists(this, d.(@py_Compare)))
}
}
/** Discardable comprehension list */
overlay[local]
final private class DiscardableComprehensionList extends Discardable instanceof @py_comprehension_list
{
override string getPath() {
exists(Discardable d | result = d.getPath() | py_comprehension_lists(this, d.(@py_ListComp)))
}
}
/** Discardable dict item list */
overlay[local]
final private class DiscardableDictItemList extends Discardable instanceof @py_dict_item_list {
override string getPath() {
exists(Discardable d | result = d.getPath() |
py_dict_item_lists(this, d.(@py_dict_item_list_parent))
)
}
}
/** Discardable expr context */
overlay[local]
final private class DiscardableExprContext extends Discardable instanceof @py_expr_context {
override string getPath() {
exists(Discardable d | result = d.getPath() |
py_expr_contexts(this, _, d.(@py_expr_context_parent))
)
}
}
/** Discardable expr list */
overlay[local]
final private class DiscardableExprList extends Discardable instanceof @py_expr_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_expr_lists(this, d.(@py_expr_list_parent), _))
}
}
/** Discardable operator */
overlay[local]
final private class DiscardableOperator extends Discardable instanceof @py_operator {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_operators(this, _, d.(@py_BinaryExpr)))
}
}
/** Discardable parameter list */
overlay[local]
final private class DiscardableParameterList extends Discardable instanceof @py_parameter_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_parameter_lists(this, d.(@py_Function)))
}
}
/** Discardable pattern list */
overlay[local]
final private class DiscardablePatternList extends Discardable instanceof @py_pattern_list {
override string getPath() {
exists(Discardable d | result = d.getPath() |
py_pattern_lists(this, d.(@py_pattern_list_parent), _)
)
}
}
/** Discardable stmt list */
overlay[local]
final private class DiscardableStmtList extends Discardable instanceof @py_stmt_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_stmt_lists(this, d.(@py_stmt_list_parent), _))
}
}
/** Discardable str list */
overlay[local]
final private class DiscardableStrList extends Discardable instanceof @py_str_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_str_lists(this, d.(@py_str_list_parent)))
}
}
/** Discardable type parameter list */
overlay[local]
final private class DiscardableTypeParameterList extends Discardable instanceof @py_type_parameter_list
{
override string getPath() {
exists(Discardable d | result = d.getPath() |
py_type_parameter_lists(this, d.(@py_type_parameter_list_parent))
)
}
}
/** Discardable unaryop */
overlay[local]
final private class DiscardableUnaryOp extends Discardable instanceof @py_unaryop {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_unaryops(this, _, d.(@py_UnaryExpr)))
}
}
/** Discardable comment */
overlay[local]
final private class DiscardableComment extends Discardable instanceof @py_comment {
override string getPath() {
exists(DiscardableLocation d | result = d.getPath() | py_comments(this, _, d))
}
}
/*- YAML -*/
overlay[local]
final private class DiscardableYamlLocatable extends Discardable instanceof @yaml_locatable {
override string getPath() {
exists(@location loc | yaml_locations(this, loc) | result = getPathForLocation(loc))
}
}
overlay[local]
private predicate overlayYamlExtracted(string path) {
exists(DiscardableYamlLocatable l | l.existsInOverlay() | path = l.getPath())
}
overlay[discard_entity]
private predicate discardBaseYamlLocatable(@yaml_locatable el) {
exists(DiscardableYamlLocatable d | d = el |
// The Yaml extractor is currently not incremental and may extract more
// Yaml files than those included in `overlayChangedFiles`, so this discard predicate
// handles those files alongside the normal `discardStarEntity` logic.
overlayYamlExtracted(d.getPath()) and
d.existsInBase()
)
}

View File

@@ -61,14 +61,6 @@ class SsaVariable extends @py_ssa_var {
)
}
/** Gets an argument of the phi function defining this variable, pruned of unlikely edges. */
SsaVariable getAPrunedPhiInput() {
result = this.getAPhiInput() and
exists(BasicBlock incoming | incoming = this.getPredecessorBlockForPhiArgument(result) |
not incoming.getLastNode().(RaisingNode).unlikelySuccessor(this.getDefinition())
)
}
/** Gets a variable that ultimately defines this variable and is not itself defined by another variable */
SsaVariable getAnUltimateDefinition() {
result = this and not exists(this.getAPhiInput())
@@ -85,17 +77,11 @@ class SsaVariable extends @py_ssa_var {
string getId() { result = this.getVariable().getId() }
/** Gets the incoming edges for a Phi node. */
private BasicBlock getAPredecessorBlockForPhi() {
BasicBlock getAPredecessorBlockForPhi() {
exists(this.getAPhiInput()) and
result.getASuccessor() = this.getDefinition().getBasicBlock()
}
/** Gets the incoming edges for a Phi node, pruned of unlikely edges. */
private BasicBlock getAPrunedPredecessorBlockForPhi() {
result = this.getAPredecessorBlockForPhi() and
not result.unlikelySuccessor(this.getDefinition().getBasicBlock())
}
/** Whether it is possible to reach a use of this variable without passing a definition */
predicate reachableWithoutDefinition() {
not exists(this.getDefinition()) and not py_ssa_phi(this, _)
@@ -115,38 +101,6 @@ class SsaVariable extends @py_ssa_var {
)
}
/** Whether this variable may be undefined */
predicate maybeUndefined() {
not exists(this.getDefinition()) and not py_ssa_phi(this, _) and not this.implicitlyDefined()
or
this.getDefinition().isDelete()
or
exists(SsaVariable var | var = this.getAPrunedPhiInput() | var.maybeUndefined())
or
/*
* For phi-nodes, there must be a corresponding phi-input for each control-flow
* predecessor. Otherwise, the variable will be undefined on that incoming edge.
* WARNING: the same phi-input may cover multiple predecessors, so this check
* cannot be done by counting.
*/
exists(BasicBlock incoming |
reaches_end(incoming) and
incoming = this.getAPrunedPredecessorBlockForPhi() and
not this.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming)
)
}
private predicate implicitlyDefined() {
not exists(this.getDefinition()) and
not py_ssa_phi(this, _) and
exists(GlobalVariable var | this.getVariable() = var |
globallyDefinedName(var.getId())
or
var.getId() = "__path__" and var.getScope().(Module).isPackageInit()
)
}
/**
* Gets the global variable that is accessed if this local is undefined.
* Only applies to local variables in class scopes.
@@ -173,43 +127,6 @@ class SsaVariable extends @py_ssa_var {
}
}
private predicate reaches_end(BasicBlock b) {
not exits_early(b) and
(
/* Entry point */
not exists(BasicBlock prev | prev.getASuccessor() = b)
or
exists(BasicBlock prev | prev.getASuccessor() = b | reaches_end(prev))
)
}
private predicate exits_early(BasicBlock b) {
exists(FunctionObject f |
f.neverReturns() and
f.getACall().getBasicBlock() = b
)
}
private predicate gettext_installed() {
// Good enough (and fast) approximation
exists(Module m | m.getName() = "gettext")
}
private predicate builtin_constant(string name) {
exists(Object::builtin(name))
or
name = "WindowsError"
or
name = "_" and gettext_installed()
}
private predicate auto_name(string name) {
name = "__file__" or name = "__builtins__" or name = "__name__"
}
/** Whether this name is (almost) always defined, ie. it is a builtin or VM defined name */
predicate globallyDefinedName(string name) { builtin_constant(name) or auto_name(name) }
/** An SSA variable that is backed by a global variable */
class GlobalSsaVariable extends EssaVariable {
GlobalSsaVariable() { this.getSourceVariable() instanceof GlobalVariable }

View File

@@ -5,6 +5,7 @@
import python
private import semmle.python.pointsto.Filters
private import LegacyPointsTo
/**
* An attribute access where the left hand side of the attribute expression

View File

@@ -9,6 +9,7 @@
*/
private import python
private import LegacyPointsTo
/** A control flow node which might correspond to a special method call. */
class PotentialSpecialMethodCallNode extends ControlFlowNode instanceof SpecialMethod::Potential { }
@@ -106,7 +107,11 @@ class SpecialMethodCallNode extends PotentialSpecialMethodCallNode {
SpecialMethodCallNode() {
exists(SpecialMethod::Potential pot |
this = pot and
pot.getSelf().pointsTo().getClass().lookup(pot.getSpecialMethodName()) = resolvedSpecialMethod
pot.getSelf()
.(ControlFlowNodeWithPointsTo)
.pointsTo()
.getClass()
.lookup(pot.getSpecialMethodName()) = resolvedSpecialMethod
)
}

View File

@@ -22,30 +22,39 @@ deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack;
deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack;
/** A callable with a flow summary, identified by a unique string. */
abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable {
bindingset[this]
SummarizedCallable() { any() }
class Provenance = Impl::Public::Provenance;
/**
* DEPRECATED: Use `propagatesFlow` instead.
*/
deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
this.propagatesFlow(input, output, preservesValue, _)
/** Provides the `Range` class used to define the extent of `SummarizedCallable`. */
module SummarizedCallable {
/** A callable with a flow summary, identified by a unique string. */
abstract class Range extends LibraryCallable, Impl::Public::SummarizedCallable {
bindingset[this]
Range() { any() }
override predicate propagatesFlow(
string input, string output, boolean preservesValue, Provenance p, boolean isExact,
string model
) {
this.propagatesFlow(input, output, preservesValue) and
p = "manual" and
isExact = true and
model = this
}
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step or a taint-step.
*/
predicate propagatesFlow(string input, string output, boolean preservesValue) { none() }
}
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
this.propagatesFlow(input, output, preservesValue) and model = this
}
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step or a taint-step.
*/
predicate propagatesFlow(string input, string output, boolean preservesValue) { none() }
}
final private class SummarizedCallableFinal = SummarizedCallable::Range;
/** A callable with a flow summary, identified by a unique string. */
final class SummarizedCallable extends SummarizedCallableFinal,
Impl::Public::RelevantSummarizedCallable
{ }
deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack;

View File

@@ -334,3 +334,5 @@ private module SensitiveDataModeling {
}
predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;
predicate sensitiveLookupStringConst = SensitiveDataModeling::sensitiveLookupStringConst/1;

View File

@@ -1,9 +1,10 @@
/** This module provides an API for attribute reads and writes. */
private import python
import DataFlowUtil
import DataFlowPublic
private import DataFlowPrivate
private import semmle.python.types.Builtins
private import semmle.python.dataflow.new.internal.Builtins
/**
* A data flow node that reads or writes an attribute of an object.
@@ -134,8 +135,12 @@ private class BuiltInCallNode extends CallNode {
BuiltInCallNode() {
// TODO disallow instances where the name of the built-in may refer to an in-scope variable of that name.
exists(NameNode id | this.getFunction() = id and id.getId() = name and id.isGlobal()) and
name = any(Builtin b).getName()
exists(NameNode id |
name = Builtins::getBuiltinName() and
this.getFunction() = id and
id.getId() = name and
id.isGlobal()
)
}
/** Gets the name of the built-in function that is called at this `CallNode` */

View File

@@ -584,10 +584,6 @@ class GuardNode extends ControlFlowNode {
/**
* Holds if the guard `g` validates `node` upon evaluating to `branch`.
*
* The expression `e` is expected to be a syntactic part of the guard `g`.
* For example, the guard `g` might be a call `isSafe(x)` and the expression `e`
* the argument `x`.
*/
signature predicate guardChecksSig(GuardNode g, ControlFlowNode node, boolean branch);
@@ -600,15 +596,72 @@ signature predicate guardChecksSig(GuardNode g, ControlFlowNode node, boolean br
module BarrierGuard<guardChecksSig/3 guardChecks> {
/** Gets a node that is safely guarded by the given guard check. */
ExprNode getABarrierNode() {
result = ParameterizedBarrierGuard<Unit, extendedGuardChecks/4>::getABarrierNode(_)
}
private predicate extendedGuardChecks(GuardNode g, ControlFlowNode node, boolean branch, Unit u) {
guardChecks(g, node, branch) and
u = u
}
}
bindingset[this]
private signature class ParamSig;
private module WithParam<ParamSig P> {
signature predicate guardChecksSig(GuardNode g, ControlFlowNode node, boolean branch, P param);
}
/**
* Provides a set of barrier nodes for a guard that validates a node.
*
* This is expected to be used in `isBarrier`/`isSanitizer` definitions
* in data flow and taint tracking.
*/
module ParameterizedBarrierGuard<ParamSig P, WithParam<P>::guardChecksSig/4 guardChecks> {
/** Gets a node that is safely guarded by the given guard check with parameter `param`. */
ExprNode getABarrierNode(P param) {
exists(GuardNode g, EssaDefinition def, ControlFlowNode node, boolean branch |
AdjacentUses::useOfDef(def, node) and
guardChecks(g, node, branch) and
guardChecks(g, node, branch, param) and
AdjacentUses::useOfDef(def, result.asCfgNode()) and
g.controlsBlock(result.asCfgNode().getBasicBlock(), branch)
)
}
}
/**
* Provides a set of barrier nodes for a guard that validates a node as described by an external predicate.
*
* This is expected to be used in `isBarrier`/`isSanitizer` definitions
* in data flow and taint tracking.
*/
module ExternalBarrierGuard {
private import semmle.python.ApiGraphs
private predicate guardCheck(GuardNode g, ControlFlowNode node, boolean branch, string kind) {
exists(API::CallNode call, API::Node parameter |
parameter = call.getAParameter() and
parameter = ModelOutput::getABarrierGuardNode(kind, branch)
|
g = call.asCfgNode() and
node = parameter.asSink().asCfgNode()
)
}
/**
* Gets a node that is an external barrier of the given kind.
*
* This only provides external barrier nodes defined as guards. To get all externally defined barrer nodes,
* use `ModelOutput::barrierNode(node, kind)`.
*
* INTERNAL: Do not use.
*/
ExprNode getAnExternalBarrierNode(string kind) {
result = ParameterizedBarrierGuard<string, guardCheck/4>::getABarrierNode(kind)
}
}
/**
* Algebraic datatype for tracking data content associated with values.
* Content can be collection elements or object attributes.

View File

@@ -18,6 +18,8 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
class SinkBase = Void;
predicate callableFromSource(SummarizedCallableBase c) { none() }
ArgumentPosition callbackSelfParameterPosition() { result.isLambdaSelf() }
ReturnKind getStandardReturnValueKind() { any() }

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportStar
private import semmle.python.dataflow.new.TypeTracking
private import semmle.python.dataflow.new.internal.DataFlowPrivate
private import semmle.python.essa.SsaDefinitions
/**
* Python modules and the way imports are resolved are... complicated. Here's a crash course in how

View File

@@ -30,7 +30,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
super.propagatesFlow(input, output, preservesValue, _)
super.propagatesFlow(input, output, preservesValue, _, _, _)
}
}

View File

@@ -1,6 +1,6 @@
import python
import semmle.python.dataflow.TaintTracking
private import semmle.python.objects.ObjectInternal
private import LegacyPointsTo
private import semmle.python.dataflow.Implementation
module TaintTracking {

View File

@@ -1,4 +1,5 @@
import python
private import LegacyPointsTo
import semmle.python.dataflow.TaintTracking
class OpenFile extends TaintKind {

View File

@@ -1,6 +1,6 @@
import python
private import LegacyPointsTo
import semmle.python.dataflow.TaintTracking
private import semmle.python.objects.ObjectInternal
private import semmle.python.pointsto.Filters as Filters
import semmle.python.dataflow.Legacy
@@ -256,7 +256,7 @@ class TaintTrackingImplementation extends string instanceof TaintTracking::Confi
TaintKind kind, string edgeLabel
) {
this.unprunedStep(src, node, context, path, kind, edgeLabel) and
node.getBasicBlock().likelyReachable() and
node.getBasicBlock().(BasicBlockWithPointsTo).likelyReachable() and
not super.isBarrier(node) and
(
not path = TNoAttribute()
@@ -374,7 +374,7 @@ class TaintTrackingImplementation extends string instanceof TaintTracking::Confi
exists(ModuleValue m, string name |
src = TTaintTrackingNode_(_, context, path, kind, this) and
this.moduleAttributeTainted(m, name, src) and
node.asCfgNode().(ImportMemberNode).getModule(name).pointsTo(m)
node.asCfgNode().(ImportMemberNode).getModule(name).(ControlFlowNodeWithPointsTo).pointsTo(m)
)
}
@@ -408,7 +408,9 @@ class TaintTrackingImplementation extends string instanceof TaintTracking::Confi
src = TTaintTrackingNode_(srcnode, context, srcpath, srckind, this) and
exists(CallNode call, ControlFlowNode arg |
call = node.asCfgNode() and
call.getFunction().pointsTo(ObjectInternal::builtin("getattr")) and
call.getFunction()
.(ControlFlowNodeWithPointsTo)
.pointsTo(ObjectInternal::builtin("getattr")) and
arg = call.getArg(0) and
attrname = call.getArg(1).getNode().(StringLiteral).getText() and
arg = srcnode.asCfgNode()
@@ -515,7 +517,7 @@ class TaintTrackingImplementation extends string instanceof TaintTracking::Confi
TaintTrackingContext caller, TaintTrackingContext callee
) {
exists(ClassValue cls |
call.getFunction().pointsTo(cls) and
call.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(cls) and
cls.lookup("__init__") = init
|
exists(int arg, TaintKind callerKind, AttributePath callerPath, DataFlow::Node argument |
@@ -682,7 +684,9 @@ private class EssaTaintTracking extends string instanceof TaintTracking::Configu
TaintTrackingNode src, PhiFunction defn, TaintTrackingContext context, AttributePath path,
TaintKind kind
) {
exists(DataFlow::Node srcnode, BasicBlock pred, EssaVariable predvar, DataFlow::Node phi |
exists(
DataFlow::Node srcnode, BasicBlockWithPointsTo pred, EssaVariable predvar, DataFlow::Node phi
|
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
defn = phi.asVariable().getDefinition() and
predvar = defn.getInput(pred) and
@@ -878,7 +882,7 @@ private class EssaTaintTracking extends string instanceof TaintTracking::Configu
const.getNode() instanceof ImmutableLiteral
)
or
exists(ControlFlowNode c, ClassValue cls |
exists(ControlFlowNodeWithPointsTo c, ClassValue cls |
Filters::isinstance(test, c, use) and
c.pointsTo(cls)
|
@@ -978,7 +982,7 @@ module Implementation {
tonode.getArg(0) = fromnode
)
or
tonode.getFunction().pointsTo(ObjectInternal::builtin("reversed")) and
tonode.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(ObjectInternal::builtin("reversed")) and
tonode.getArg(0) = fromnode
}
}

View File

@@ -9,9 +9,7 @@
*/
import python
private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.PointsToContext
private import semmle.python.objects.ObjectInternal
private import LegacyPointsTo
/** A state that should be tracked. */
abstract class TrackableState extends string {

View File

@@ -87,8 +87,8 @@
*/
import python
private import LegacyPointsTo
private import semmle.python.pointsto.Filters as Filters
private import semmle.python.objects.ObjectInternal
private import semmle.python.dataflow.Implementation
import semmle.python.dataflow.Configuration
@@ -267,7 +267,11 @@ module DictKind {
Implementation::copyCall(fromnode, tonode) and
edgeLabel = "dict copy"
or
tonode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
tonode
.(CallNode)
.getFunction()
.(ControlFlowNodeWithPointsTo)
.pointsTo(ObjectInternal::builtin("dict")) and
tonode.(CallNode).getArg(0) = fromnode and
edgeLabel = "dict() call"
}
@@ -615,7 +619,7 @@ module DataFlow {
TCfgNode(ControlFlowNode node)
abstract class Node extends TDataFlowNode {
abstract ControlFlowNode asCfgNode();
abstract ControlFlowNodeWithPointsTo asCfgNode();
abstract EssaVariable asVariable();
@@ -632,7 +636,7 @@ module DataFlow {
}
class CfgNode extends Node, TCfgNode {
override ControlFlowNode asCfgNode() { this = TCfgNode(result) }
override ControlFlowNodeWithPointsTo asCfgNode() { this = TCfgNode(result) }
override EssaVariable asVariable() { none() }
@@ -647,7 +651,7 @@ module DataFlow {
}
class EssaNode extends Node, TEssaNode {
override ControlFlowNode asCfgNode() { none() }
override ControlFlowNodeWithPointsTo asCfgNode() { none() }
override EssaVariable asVariable() { this = TEssaNode(result) }
@@ -668,7 +672,11 @@ pragma[noinline]
private predicate dict_construct(ControlFlowNode itemnode, ControlFlowNode dictnode) {
dictnode.(DictNode).getAValue() = itemnode
or
dictnode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
dictnode
.(CallNode)
.getFunction()
.(ControlFlowNodeWithPointsTo)
.pointsTo(ObjectInternal::builtin("dict")) and
dictnode.(CallNode).getArgByName(_) = itemnode
}
@@ -688,7 +696,7 @@ private predicate sequence_construct(ControlFlowNode itemnode, ControlFlowNode s
pragma[noinline]
private predicate sequence_call(ControlFlowNode fromnode, CallNode tonode) {
tonode.getArg(0) = fromnode and
exists(ControlFlowNode cls | cls = tonode.getFunction() |
exists(ControlFlowNodeWithPointsTo cls | cls = tonode.getFunction() |
cls.pointsTo(ObjectInternal::builtin("list"))
or
cls.pointsTo(ObjectInternal::builtin("tuple"))

View File

@@ -1,4 +1,5 @@
import python
private import LegacyPointsTo
import semmle.python.dependencies.DependencyKind
private predicate importDependency(Object target, AstNode source) {
@@ -59,7 +60,7 @@ class PythonUse extends DependencyKind {
interesting(target) and
this = this and
source != target.(ControlFlowNode).getNode() and
exists(ControlFlowNode use, Object obj |
exists(ControlFlowNodeWithPointsTo use, Object obj |
use.getNode() = source and
use.refersTo(obj) and
use.isLoad()
@@ -114,12 +115,14 @@ private predicate attribute_access_dependency(Object target, AstNode source) {
private predicate use_of_attribute(Attribute attr, Scope s, string name) {
exists(AttrNode cfg | cfg.isLoad() and cfg.getNode() = attr |
exists(Object obj | cfg.getObject(name).refersTo(obj) |
exists(Object obj | cfg.getObject(name).(ControlFlowNodeWithPointsTo).refersTo(obj) |
s = obj.(PythonModuleObject).getModule() or
s = obj.(ClassObject).getPyClass()
)
or
exists(ClassObject cls | cfg.getObject(name).refersTo(_, cls, _) | s = cls.getPyClass())
exists(ClassObject cls | cfg.getObject(name).(ControlFlowNodeWithPointsTo).refersTo(_, cls, _) |
s = cls.getPyClass()
)
)
or
exists(SelfAttributeRead sar | sar = attr |

View File

@@ -1,4 +1,5 @@
import semmle.python.dependencies.Dependencies
private import LegacyPointsTo
/**
* A library describing an abstract mechanism for representing dependency categories.

View File

@@ -1,6 +1,7 @@
import python
import semmle.python.dependencies.Dependencies
import semmle.python.dependencies.DependencyKind
private import LegacyPointsTo
/**
* Combine the source-file and package into a single string:

View File

@@ -1,5 +1,4 @@
import python
/*
* Classification of variables. These should be non-overlapping and complete.
*
@@ -12,6 +11,9 @@ import python
* Escaping globals -- Global variables that have definitions and at least one of those definitions is in another scope.
*/
private import semmle.python.types.ImportTime
private import semmle.python.essa.SsaDefinitions
/** A source language variable, to be converted into a set of SSA variables. */
abstract class SsaSourceVariable extends @py_variable {
SsaSourceVariable() {
@@ -274,6 +276,17 @@ class ModuleVariable extends SsaSourceVariable instanceof GlobalVariable {
override CallNode redefinedAtCallSite() { none() }
}
/** Holds if `f` is an import of the form `from .[...] import ...` and the enclosing scope is an __init__ module */
private predicate import_from_dot_in_init(ImportExprNode f) {
f.getScope() = any(Module m).getInitModule() and
(
f.getNode().getLevel() = 1 and
not exists(f.getNode().getName())
or
f.getNode().getImportedModuleName() = f.getEnclosingModule().getPackage().getName()
)
}
class NonEscapingGlobalVariable extends ModuleVariable {
NonEscapingGlobalVariable() {
this instanceof GlobalVariable and

View File

@@ -6,6 +6,7 @@ import python
private import SsaCompute
import semmle.python.essa.Definitions
private import semmle.python.internal.CachedStages
private import semmle.python.essa.SsaDefinitions
/** An (enhanced) SSA variable derived from `SsaSourceVariable`. */
class EssaVariable extends TEssaDefinition {

View File

@@ -6,6 +6,13 @@
import python
private import semmle.python.internal.CachedStages
/** Hold if `expr` is a test (a branch) and `use` is within that test */
predicate test_contains(ControlFlowNode expr, ControlFlowNode use) {
expr.getNode() instanceof Expr and
expr.isBranch() and
expr.getAChild*() = use
}
cached
module SsaSource {
/** Holds if `v` is used as the receiver in a method call. */

View File

@@ -0,0 +1,6 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: barrierGuardModel
data:
- ['django', 'Member[utils].Member[http].Member[url_has_allowed_host_and_scheme].Argument[0,url:]', "true", 'url-redirection']

View File

@@ -2965,38 +2965,6 @@ module PrivateDjango {
override predicate csrfEnabled() { decoratorName in ["csrf_protect", "requires_csrf_token"] }
}
private predicate djangoUrlHasAllowedHostAndScheme(
DataFlow::GuardNode g, ControlFlowNode node, boolean branch
) {
exists(API::CallNode call |
call =
API::moduleImport("django")
.getMember("utils")
.getMember("http")
.getMember("url_has_allowed_host_and_scheme")
.getACall() and
g = call.asCfgNode() and
node = call.getParameter(0, "url").asSink().asCfgNode() and
branch = true
)
}
/**
* A call to `django.utils.http.url_has_allowed_host_and_scheme`, considered as a sanitizer-guard for URL redirection.
*
* See https://docs.djangoproject.com/en/4.2/_modules/django/utils/http/
*/
private class DjangoAllowedUrl extends UrlRedirect::Sanitizer {
DjangoAllowedUrl() {
this = DataFlow::BarrierGuard<djangoUrlHasAllowedHostAndScheme/3>::getABarrierNode()
}
override predicate sanitizes(UrlRedirect::FlowState state) {
// sanitize all flow states
any()
}
}
// ---------------------------------------------------------------------------
// Templates
// ---------------------------------------------------------------------------

View File

@@ -621,24 +621,15 @@ module Flask {
}
override DataFlow::Node getAPathArgument() {
result in [
this.getArg(0), this.getArgByName("directory"),
// as described in the docs, the `filename` argument is restrained to be within
// the provided directory, so is not exposed to path-injection. (but is still a
// path-argument).
this.getArg(1), this.getArgByName("filename")
]
result = this.getArg([0, 1]) or
result = this.getArgByName(["directory", "filename"])
}
}
/**
* To exclude `filename` argument to `flask.send_from_directory` as a path-injection sink.
*/
private class FlaskSendFromDirectoryCallFilenameSanitizer extends PathInjection::Sanitizer {
FlaskSendFromDirectoryCallFilenameSanitizer() {
this = any(FlaskSendFromDirectoryCall c).getArg(1)
or
this = any(FlaskSendFromDirectoryCall c).getArgByName("filename")
override DataFlow::Node getAVulnerablePathArgument() {
result = this.getAPathArgument() and
// as described in the docs, the `filename` argument is restricted to be within
// the provided directory, so is not exposed to path-injection.
not result in [this.getArg(1), this.getArgByName("filename")]
}
}
@@ -674,7 +665,7 @@ module Flask {
*
* see https://flask.palletsprojects.com/en/2.3.x/api/#flask.render_template_string
*/
private class RenderTemplateStringSummary extends SummarizedCallable {
private class RenderTemplateStringSummary extends SummarizedCallable::Range {
RenderTemplateStringSummary() { this = "flask.render_template_string" }
override DataFlow::CallCfgNode getACall() {
@@ -700,7 +691,7 @@ module Flask {
*
* see https://flask.palletsprojects.com/en/2.3.x/api/#flask.stream_template_string
*/
private class StreamTemplateStringSummary extends SummarizedCallable {
private class StreamTemplateStringSummary extends SummarizedCallable::Range {
StreamTemplateStringSummary() { this = "flask.stream_template_string" }
override DataFlow::CallCfgNode getACall() {

View File

@@ -0,0 +1,119 @@
/**
* Provides definitions and modeling for the `python-socketio` PyPI package.
* See https://python-socketio.readthedocs.io/en/stable/.
*/
private import python
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
/**
* Provides models for the `python-socketio` PyPI package.
* See https://python-socketio.readthedocs.io/en/stable/.
*/
module SocketIO {
/** Provides models for socketio `Server` and `AsyncServer` classes. */
module Server {
/** Gets an instance of a socketio `Server` or `AsyncServer`. */
API::Node server() {
result = API::moduleImport("socketio").getMember(["Server", "AsyncServer"]).getAnInstance()
}
/** Gets a decorator that indicates a socketio event handler. */
private API::Node serverEventAnnotation() {
result = server().getMember("event")
or
result = server().getMember("on").getReturn()
}
private class EventHandler extends Http::Server::RequestHandler::Range {
EventHandler() {
serverEventAnnotation().getAValueReachableFromSource().asExpr() = this.getADecorator()
or
exists(DataFlow::CallCfgNode c, DataFlow::Node arg |
c = server().getMember("on").getACall()
|
(
arg = c.getArg(1)
or
arg = c.getArgByName("handler")
) and
poorMansFunctionTracker(this) = arg
)
}
override Parameter getARoutedParameter() {
result = this.getAnArg() and
not result = this.getArg(0) // First parameter is `sid`, which is not a remote flow source as it cannot be controlled by the client.
}
override string getFramework() { result = "socketio" }
}
private class CallbackArgument extends DataFlow::Node {
CallbackArgument() {
exists(DataFlow::CallCfgNode c |
c = [server(), Namespace::instance()].getMember(["emit", "send"]).getACall()
|
this = c.getArgByName("callback")
)
}
}
private class CallbackHandler extends Http::Server::RequestHandler::Range {
CallbackHandler() { any(CallbackArgument ca) = poorMansFunctionTracker(this) }
override Parameter getARoutedParameter() { result = this.getAnArg() }
override string getFramework() { result = "socketio" }
}
private class SocketIOCall extends RemoteFlowSource::Range {
SocketIOCall() { this = [server(), Namespace::instance()].getMember("call").getACall() }
override string getSourceType() { result = "socketio call" }
}
}
/** Provides modeling for socketio server Namespace/AsyncNamespace classes. */
module Namespace {
/** Gets a reference to the `socketio.Namespace` or `socketio.AsyncNamespace` classes or any subclass. */
API::Node subclassRef() {
result =
API::moduleImport("socketio").getMember(["Namespace", "AsyncNamespace"]).getASubclass*()
}
/** Gets a reference to an instance of a subclass of `socketio.Namespace` or `socketio.AsyncNamespace`. */
API::Node instance() {
result = subclassRef().getAnInstance()
or
result = subclassRef().getAMember().getSelfParameter()
}
/** A socketio Namespace class. */
class NamespaceClass extends Class {
NamespaceClass() { this.getABase() = subclassRef().asSource().asExpr() }
/** Gets a handler for socketio events. */
Function getAnEventHandler() {
result = this.getAMethod() and
result.getName().matches("on_%")
}
}
private class NamespaceEventHandler extends Http::Server::RequestHandler::Range {
NamespaceEventHandler() { this = any(NamespaceClass nc).getAnEventHandler() }
override Parameter getARoutedParameter() {
result = this.getAnArg() and
not result = this.getArg(0) and
not result = this.getArg(1) // First 2 parameters are `self` and `sid`.
}
override string getFramework() { result = "socketio" }
}
}
}

View File

@@ -142,6 +142,8 @@ extensions:
- ["typing", "Member[cast]", "Argument[1,val:]", "ReturnValue", "value"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qs
- ["urllib", "Member[parse].Member[parse_qs]", "Argument[0,qs:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
- ["urllib", "Member[parse].Member[urlparse]", "Argument[0,urlstring:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote
- ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus
@@ -181,7 +183,9 @@ extensions:
- addsTo:
pack: codeql/python-all
extensible: typeModel
data: []
data:
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
- ["urllib.parse.ParseResult~Subclass", 'urllib', 'Member[parse].Member[urlparse]']
- addsTo:
pack: codeql/python-all

View File

@@ -245,6 +245,67 @@ module Stdlib {
}
}
/**
* Provides models for the `urllib.parse.ParseResult` class
*
* See https://docs.python.org/3.9/library/urllib.parse.html#urllib.parse.ParseResult.
*/
module ParseResult {
/** Gets a reference to the `urllib.parse.ParseResult` class. */
API::Node classRef() {
result = API::moduleImport("urllib").getMember("parse").getMember("ParseResult")
or
result = ModelOutput::getATypeNode("urllib.parse.ParseResult~Subclass").getASubclass*()
}
/**
* A source of instances of `urllib.parse.ParseResult`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `ParseResult::instance()` to get references to instances of `urllib.parse.ParseResult`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `urllib.parse.ParseResult`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
}
/** Gets a reference to an instance of `urllib.parse.ParseResult`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `urllib.parse.ParseResult`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `urllib.parse.ParseResult`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "urllib.parse.ParseResult" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
"netloc", "path", "params", "query", "fragment", "username", "password", "hostname",
"port"
]
}
override string getMethodName() { none() }
override string getAsyncMethodName() { none() }
}
}
// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
@@ -3126,7 +3187,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.11/library/re.html#re-objects
*/
class RePatternSummary extends SummarizedCallable {
class RePatternSummary extends SummarizedCallable::Range {
RePatternSummary() { this = "re.Pattern" }
override DataFlow::CallCfgNode getACall() {
@@ -3166,7 +3227,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3/library/re.html#re.Match
*/
class ReMatchSummary extends SummarizedCallable {
class ReMatchSummary extends SummarizedCallable::Range {
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
override DataFlow::CallCfgNode getACall() {
@@ -3230,7 +3291,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3/library/re.html#re.Match
*/
class ReMatchMethodsSummary extends SummarizedCallable {
class ReMatchMethodsSummary extends SummarizedCallable::Range {
string methodName;
ReMatchMethodsSummary() {
@@ -3274,7 +3335,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3/library/re.html#functions
*/
class ReFunctionsSummary extends SummarizedCallable {
class ReFunctionsSummary extends SummarizedCallable::Range {
string methodName;
ReFunctionsSummary() {
@@ -4122,7 +4183,7 @@ module StdlibPrivate {
*
* see https://docs.python.org/3/library/stdtypes.html#dict
*/
class DictSummary extends SummarizedCallable {
class DictSummary extends SummarizedCallable::Range {
DictSummary() { this = "builtins.dict" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("dict").getACall() }
@@ -4161,7 +4222,7 @@ module StdlibPrivate {
}
/** A flow summary for `list`. */
class ListSummary extends SummarizedCallable {
class ListSummary extends SummarizedCallable::Range {
ListSummary() { this = "builtins.list" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("list").getACall() }
@@ -4191,7 +4252,7 @@ module StdlibPrivate {
}
/** A flow summary for tuple */
class TupleSummary extends SummarizedCallable {
class TupleSummary extends SummarizedCallable::Range {
TupleSummary() { this = "builtins.tuple" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("tuple").getACall() }
@@ -4216,7 +4277,7 @@ module StdlibPrivate {
}
/** A flow summary for set */
class SetSummary extends SummarizedCallable {
class SetSummary extends SummarizedCallable::Range {
SetSummary() { this = "builtins.set" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("set").getACall() }
@@ -4246,7 +4307,7 @@ module StdlibPrivate {
}
/** A flow summary for frozenset */
class FrozensetSummary extends SummarizedCallable {
class FrozensetSummary extends SummarizedCallable::Range {
FrozensetSummary() { this = "builtins.frozenset" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("frozenset").getACall() }
@@ -4264,7 +4325,7 @@ module StdlibPrivate {
// Flow summaries for functions operating on containers
// ---------------------------------------------------------------------------
/** A flow summary for `reversed`. */
class ReversedSummary extends SummarizedCallable {
class ReversedSummary extends SummarizedCallable::Range {
ReversedSummary() { this = "builtins.reversed" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("reversed").getACall() }
@@ -4294,7 +4355,7 @@ module StdlibPrivate {
}
/** A flow summary for `sorted`. */
class SortedSummary extends SummarizedCallable {
class SortedSummary extends SummarizedCallable::Range {
SortedSummary() { this = "builtins.sorted" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("sorted").getACall() }
@@ -4326,7 +4387,7 @@ module StdlibPrivate {
}
/** A flow summary for `iter`. */
class IterSummary extends SummarizedCallable {
class IterSummary extends SummarizedCallable::Range {
IterSummary() { this = "builtins.iter" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("iter").getACall() }
@@ -4356,7 +4417,7 @@ module StdlibPrivate {
}
/** A flow summary for `next`. */
class NextSummary extends SummarizedCallable {
class NextSummary extends SummarizedCallable::Range {
NextSummary() { this = "builtins.next" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("next").getACall() }
@@ -4386,7 +4447,7 @@ module StdlibPrivate {
}
/** A flow summary for `map`. */
class MapSummary extends SummarizedCallable {
class MapSummary extends SummarizedCallable::Range {
MapSummary() { this = "builtins.map" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("map").getACall() }
@@ -4421,7 +4482,7 @@ module StdlibPrivate {
}
/** A flow summary for `filter`. */
class FilterSummary extends SummarizedCallable {
class FilterSummary extends SummarizedCallable::Range {
FilterSummary() { this = "builtins.filter" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("filter").getACall() }
@@ -4447,7 +4508,7 @@ module StdlibPrivate {
}
/**A summary for `enumerate`. */
class EnumerateSummary extends SummarizedCallable {
class EnumerateSummary extends SummarizedCallable::Range {
EnumerateSummary() { this = "builtins.enumerate" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("enumerate").getACall() }
@@ -4473,7 +4534,7 @@ module StdlibPrivate {
}
/** A flow summary for `zip`. */
class ZipSummary extends SummarizedCallable {
class ZipSummary extends SummarizedCallable::Range {
ZipSummary() { this = "builtins.zip" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("zip").getACall() }
@@ -4507,7 +4568,7 @@ module StdlibPrivate {
// Flow summaries for container methods
// ---------------------------------------------------------------------------
/** A flow summary for `copy`. */
class CopySummary extends SummarizedCallable {
class CopySummary extends SummarizedCallable::Range {
CopySummary() { this = "collection.copy" }
override DataFlow::CallCfgNode getACall() {
@@ -4530,7 +4591,7 @@ module StdlibPrivate {
}
/** A flow summary for `copy.replace`. */
class ReplaceSummary extends SummarizedCallable {
class ReplaceSummary extends SummarizedCallable::Range {
ReplaceSummary() { this = "copy.replace" }
override DataFlow::CallCfgNode getACall() {
@@ -4563,7 +4624,7 @@ module StdlibPrivate {
* I also handles the default value when `pop` is called
* on a dictionary, since that also does not depend on the key.
*/
class PopSummary extends SummarizedCallable {
class PopSummary extends SummarizedCallable::Range {
PopSummary() { this = "collection.pop" }
override DataFlow::CallCfgNode getACall() {
@@ -4594,7 +4655,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.pop` */
class DictPopSummary extends SummarizedCallable {
class DictPopSummary extends SummarizedCallable::Range {
string key;
DictPopSummary() {
@@ -4617,7 +4678,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.get` at specific content. */
class DictGetSummary extends SummarizedCallable {
class DictGetSummary extends SummarizedCallable::Range {
string key;
DictGetSummary() {
@@ -4645,7 +4706,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.get` disregarding content. */
class DictGetAnySummary extends SummarizedCallable {
class DictGetAnySummary extends SummarizedCallable::Range {
DictGetAnySummary() { this = "dict.get" }
override DataFlow::CallCfgNode getACall() {
@@ -4668,7 +4729,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.popitem` */
class DictPopitemSummary extends SummarizedCallable {
class DictPopitemSummary extends SummarizedCallable::Range {
DictPopitemSummary() { this = "dict.popitem" }
override DataFlow::CallCfgNode getACall() {
@@ -4692,7 +4753,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
*/
class DictSetdefaultSummary extends SummarizedCallable {
class DictSetdefaultSummary extends SummarizedCallable::Range {
DictSetdefaultSummary() { this = "dict.setdefault" }
override DataFlow::CallCfgNode getACall() {
@@ -4717,7 +4778,7 @@ module StdlibPrivate {
* This summary handles read and store steps. See `DictSetdefaultSummary`
* for the dataflow steps.
*/
class DictSetdefaultKeySummary extends SummarizedCallable {
class DictSetdefaultKeySummary extends SummarizedCallable::Range {
string key;
DictSetdefaultKeySummary() {
@@ -4750,7 +4811,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.values
*/
class DictValues extends SummarizedCallable {
class DictValues extends SummarizedCallable::Range {
DictValues() { this = "dict.values" }
override DataFlow::CallCfgNode getACall() {
@@ -4779,7 +4840,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.keys
*/
class DictKeys extends SummarizedCallable {
class DictKeys extends SummarizedCallable::Range {
DictKeys() { this = "dict.keys" }
override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "keys") }
@@ -4801,7 +4862,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.items
*/
class DictItems extends SummarizedCallable {
class DictItems extends SummarizedCallable::Range {
DictItems() { this = "dict.items" }
override DataFlow::CallCfgNode getACall() {
@@ -4831,7 +4892,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#typesseq-mutable
*/
class ListAppend extends SummarizedCallable {
class ListAppend extends SummarizedCallable::Range {
ListAppend() { this = "list.append" }
override DataFlow::CallCfgNode getACall() {
@@ -4860,7 +4921,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#frozenset.add
*/
class SetAdd extends SummarizedCallable {
class SetAdd extends SummarizedCallable::Range {
SetAdd() { this = "set.add" }
override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "add") }
@@ -4887,7 +4948,7 @@ module StdlibPrivate {
*
* See https://devdocs.io/python~3.11/library/os#os.getenv
*/
class OsGetEnv extends SummarizedCallable {
class OsGetEnv extends SummarizedCallable::Range {
OsGetEnv() { this = "os.getenv" }
override DataFlow::CallCfgNode getACall() {

View File

@@ -135,6 +135,8 @@ module Tornado {
API::Node subclassRef() {
result = web().getMember("RequestHandler").getASubclass*()
or
result = WebSocket::WebSocketHandler::subclassRef()
or
result = ModelOutput::getATypeNode("tornado.web.RequestHandler~Subclass").getASubclass*()
}
@@ -428,6 +430,49 @@ module Tornado {
}
}
}
// ---------------------------------------------------------------------------
// tornado.websocket
// ---------------------------------------------------------------------------
/** Gets a reference to the `tornado.websocket` module. */
API::Node websocket() { result = Tornado::tornado().getMember("websocket") }
/** Provides models for the `tornado.websocket` module */
module WebSocket {
/**
* Provides models for the `tornado.websocket.WebSocketHandler` class and subclasses.
*
* See https://www.tornadoweb.org/en/stable/websocket.html#tornado.websocket.WebSocketHandler.
*/
module WebSocketHandler {
/** Gets a reference to the `tornado.websocket.WebSocketHandler` class or any subclass. */
API::Node subclassRef() {
result = websocket().getMember("WebSocketHandler").getASubclass*()
or
result =
ModelOutput::getATypeNode("tornado.websocket.WebSocketHandler~Subclass").getASubclass*()
}
/** A subclass of `tornado.websocket.WebSocketHandler`. */
class WebSocketHandlerClass extends Web::RequestHandler::RequestHandlerClass {
WebSocketHandlerClass() { this.getParent() = subclassRef().asSource().asExpr() }
override Function getARequestHandler() {
result = super.getARequestHandler()
or
result = this.getAMethod() and
result.getName() = "open"
}
/** Gets a function that could handle incoming WebSocket events, if any. */
Function getAWebSocketEventHandler() {
result = this.getAMethod() and
result.getName() =
["on_message", "on_close", "on_ping", "on_pong", "select_subprotocol", "check_origin"]
}
}
}
}
}
// ---------------------------------------------------------------------------
@@ -542,6 +587,27 @@ module Tornado {
override string getFramework() { result = "Tornado" }
}
/** A request handler for WebSocket events. */
private class TornadoWebSocketEventHandler extends Http::Server::RequestHandler::Range {
TornadoWebSocketEventHandler() {
exists(TornadoModule::WebSocket::WebSocketHandler::WebSocketHandlerClass cls |
cls.getAWebSocketEventHandler() = this
)
}
override Parameter getARoutedParameter() {
// The `open` method is handled as a normal request handler in `TornadoRouteSetup` or `TornadoRequestHandlerWithoutKnownRoute`.
// For other event handlers (such as `on_message`), all parameters should be remote flow sources, as they are not affected by routing.
result in [
this.getArg(_), this.getArgByName(_), this.getVararg().(Parameter),
this.getKwarg().(Parameter)
] and
not result = this.getArg(0)
}
override string getFramework() { result = "Tornado" }
}
// ---------------------------------------------------------------------------
// Response modeling
// ---------------------------------------------------------------------------

View File

@@ -0,0 +1,84 @@
/**
* Provides definitions and modeling for the `websockets` PyPI package.
*
* See https://websockets.readthedocs.io/en/stable/
*/
private import python
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* Provides models for the `websockets` PyPI package.
* See https://websockets.readthedocs.io/en/stable/
*/
module Websockets {
private class HandlerArg extends DataFlow::Node {
HandlerArg() {
exists(DataFlow::CallCfgNode c |
c =
API::moduleImport("websockets")
.getMember(["asyncio", "sync"])
.getMember("server")
.getMember(["serve", "unix_serve"])
.getACall()
|
(this = c.getArg(0) or this = c.getArgByName("handler"))
)
}
}
/** A websocket handler that is passed to `serve`. */
// TODO: handlers defined via route maps, e.g. through `websockets.asyncio.router.route`, are more complex to handle.
class WebSocketHandler extends Http::Server::RequestHandler::Range {
WebSocketHandler() { poorMansFunctionTracker(this) = any(HandlerArg a) }
override Parameter getARoutedParameter() { result = this.getAnArg() }
override string getFramework() { result = "websockets" }
}
/** Provides taint models for instances of `ServerConnection` objects passed to websocket handlers. */
module ServerConnection {
/**
* A source of instances of `websockets.asyncio.ServerConnection` and `websockets.sync.ServerConnection`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `ServerConnection::instance()` to get references to instances of `websockets.asyncio.ServerConnection` and `websockets.sync.ServerConnection`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `websockets.asyncio.ServerConnection` or `websockets.sync.ServerConnection`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `websockets.asyncio.ServerConnection` or `websockets.sync.ServerConnection`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
private class HandlerParam extends DataFlow::Node, InstanceSource {
HandlerParam() { exists(WebSocketHandler h | this = DataFlow::parameterNode(h.getArg(0))) }
}
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "websockets.asyncio.ServerConnection" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { none() }
override string getAsyncMethodName() { result = ["recv", "recv_streaming"] }
override string getMethodName() { result = ["recv", "recv_streaming"] }
}
}
}

View File

@@ -0,0 +1,6 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sinkModel
data:
- ['agents', 'Member[Agent].Argument[instructions:]', 'prompt-injection']

View File

@@ -24,21 +24,25 @@ private import semmle.python.Concepts
* A threat-model flow source originating from a data extension.
*/
private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Range {
ThreatModelSourceFromDataExtension() { this = ModelOutput::getASourceNode(_).asSource() }
ThreatModelSourceFromDataExtension() { ModelOutput::sourceNode(this, _) }
override string getThreatModel() { this = ModelOutput::getASourceNode(result).asSource() }
override string getThreatModel() { ModelOutput::sourceNode(this, result) }
override string getSourceType() {
result = "Source node (" + this.getThreatModel() + ") [from data-extension]"
}
}
private class SummarizedCallableFromModel extends SummarizedCallable {
private class SummarizedCallableFromModel extends SummarizedCallable::Range {
string type;
string path;
string input_;
string output_;
string kind;
string model_;
SummarizedCallableFromModel() {
ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and
ModelOutput::relevantSummaryModel(type, path, input_, output_, kind, model_) and
this = type + ";" + path
}
@@ -52,14 +56,13 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
}
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
string input, string output, boolean preservesValue, Provenance p, boolean isExact, string model
) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) |
kind = "value" and
preservesValue = true
or
kind = "taint" and
preservesValue = false
)
input = input_ and
output = output_ and
(if kind = "value" then preservesValue = true else preservesValue = false) and
p = "manual" and
isExact = true and
model = model_
}
}

View File

@@ -62,6 +62,8 @@
* should be prefixed with a tilde character (`~`). For example, `~Bar` can be used to indicate that
* the type is not intended to match a static type.
*/
overlay[local?]
module;
private import codeql.util.Unit
private import ApiGraphModelsSpecific as Specific
@@ -342,6 +344,26 @@ private predicate sinkModel(string type, string path, string kind, string model)
)
}
/** Holds if a barrier model exists for the given parameters. */
private predicate barrierModel(string type, string path, string kind, string model) {
// No deprecation adapter for barrier models, they were not around back then.
exists(QlBuiltins::ExtensionId madId |
Extensions::barrierModel(type, path, kind, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a barrier guard model exists for the given parameters. */
private predicate barrierGuardModel(
string type, string path, string branch, string kind, string model
) {
// No deprecation adapter for barrier models, they were not around back then.
exists(QlBuiltins::ExtensionId madId |
Extensions::barrierGuardModel(type, path, branch, kind, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a summary model `row` exists for the given parameters. */
private predicate summaryModel(
string type, string path, string input, string output, string kind, string model
@@ -398,6 +420,8 @@ predicate isRelevantType(string type) {
(
sourceModel(type, _, _, _) or
sinkModel(type, _, _, _) or
barrierModel(type, _, _, _) or
barrierGuardModel(type, _, _, _, _) or
summaryModel(type, _, _, _, _, _) or
typeModel(_, type, _)
) and
@@ -425,6 +449,8 @@ predicate isRelevantFullPath(string type, string path) {
(
sourceModel(type, path, _, _) or
sinkModel(type, path, _, _) or
barrierModel(type, path, _, _) or
barrierGuardModel(type, path, _, _, _) or
summaryModel(type, path, _, _, _, _) or
typeModel(_, type, path)
)
@@ -490,6 +516,7 @@ private predicate invocationMatchesCallSiteFilter(
Specific::invocationMatchesExtraCallSiteFilter(invoke, token)
}
overlay[local?]
private class TypeModelUseEntry extends API::EntryPoint {
private string type;
@@ -503,6 +530,7 @@ private class TypeModelUseEntry extends API::EntryPoint {
API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() }
}
overlay[local?]
private class TypeModelDefEntry extends API::EntryPoint {
private string type;
@@ -743,6 +771,32 @@ module ModelOutput {
)
}
/**
* Holds if a barrier model contributed `barrier` with the given `kind`.
*/
cached
API::Node getABarrierNode(string kind, string model) {
exists(string type, string path |
barrierModel(type, path, kind, model) and
result = getNodeFromPath(type, path)
)
}
/**
* Holds if a barrier model contributed `barrier` with the given `kind` for the given `branch`.
*/
cached
API::Node getABarrierGuardNode(string kind, boolean branch, string model) {
exists(string type, string path, string branch_str |
branch = true and branch_str = "true"
or
branch = false and branch_str = "false"
|
barrierGuardModel(type, path, branch_str, kind, model) and
result = getNodeFromPath(type, path)
)
}
/**
* Holds if a relevant summary exists for these parameters.
*/
@@ -785,15 +839,50 @@ module ModelOutput {
private import codeql.mad.ModelValidation as SharedModelVal
/**
* Holds if a CSV source model contributed `source` with the given `kind`.
* Holds if an external model contributed `source` with the given `kind`.
*/
API::Node getASourceNode(string kind) { result = getASourceNode(kind, _) }
/**
* Holds if a CSV sink model contributed `sink` with the given `kind`.
* Holds if an external model contributed `sink` with the given `kind`.
*/
API::Node getASinkNode(string kind) { result = getASinkNode(kind, _) }
/**
* Holds if an external model contributed `barrier` with the given `kind`.
*
* INTERNAL: Do not use.
*/
API::Node getABarrierNode(string kind) { result = getABarrierNode(kind, _) }
/**
* Holds if an external model contributed `barrier-guard` with the given `kind` and `branch`.
*
* INTERNAL: Do not use.
*/
API::Node getABarrierGuardNode(string kind, boolean branch) {
result = getABarrierGuardNode(kind, branch, _)
}
/**
* Holds if `node` is specified as a source with the given kind in an external model.
*/
predicate sourceNode(DataFlow::Node node, string kind) { node = getASourceNode(kind).asSource() }
/**
* Holds if `node` is specified as a sink with the given kind in an external model.
*/
predicate sinkNode(DataFlow::Node node, string kind) { node = getASinkNode(kind).asSink() }
/**
* Holds if `node` is specified as a barrier with the given kind in an external model.
*/
predicate barrierNode(DataFlow::Node node, string kind) {
node = getABarrierNode(kind).asSource()
or
node = DataFlow::ExternalBarrierGuard::getAnExternalBarrierNode(kind)
}
private module KindValConfig implements SharedModelVal::KindValidationConfigSig {
predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind, _) }

View File

@@ -1,6 +1,8 @@
/**
* Defines extensible predicates for contributing library models from data extensions.
*/
overlay[local]
module;
/**
* Holds if the value at `(type, path)` should be seen as a flow
@@ -18,6 +20,26 @@ extensible predicate sourceModel(
*/
extensible predicate sinkModel(string type, string path, string kind, QlBuiltins::ExtensionId madId);
/**
* Holds if the value at `(type, path)` should be seen as a barrier
* of the given `kind` and `madId` is the data extension row number.
*/
extensible predicate barrierModel(
string type, string path, string kind, QlBuiltins::ExtensionId madId
);
/**
* Holds if the value at `(type, path)` should be seen as a barrier guard
* of the given `kind` and `madId` is the data extension row number.
* `path` is assumed to lead to a parameter of a call (possibly `self`), and
* the call is guarding the parameter.
* `branch` is either `true` or `false`, indicating which branch of the guard
* is protecting the parameter.
*/
extensible predicate barrierGuardModel(
string type, string path, string branch, string kind, QlBuiltins::ExtensionId madId
);
/**
* Holds if in calls to `(type, path)`, the value referred to by `input`
* can flow to the value referred to by `output` and `madId` is the data

View File

@@ -142,15 +142,13 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
// `DataFlow::DictionaryElementContent` just from seeing a subscript read, so we would
// need to add that. (also need to handle things like `DictionaryElementAny` which
// doesn't have any value for .getAnArgument())
(
token.getName() = "DictionaryElement" and
result = node.getSubscript(token.getAnArgument())
or
token.getName() = "DictionaryElementAny" and
result = node.getASubscript() and
not exists(token.getAnArgument())
// TODO: ListElement/SetElement/TupleElement
)
token.getName() = "DictionaryElement" and
result = node.getSubscript(token.getAnArgument())
or
token.getName() in ["DictionaryElementAny", "ListElement"] and
result = node.getASubscript() and
not exists(token.getAnArgument())
// TODO: SetElement/TupleElement
// Some features don't have MaD tokens yet, they would need to be added to API-graphs first.
// - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter")
}
@@ -261,7 +259,7 @@ predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
name =
[
"Member", "Instance", "Awaited", "Call", "Method", "Subclass", "DictionaryElement",
"DictionaryElementAny"
"DictionaryElementAny", "ListElement"
]
}
@@ -270,7 +268,7 @@ predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
* in an identifying access path.
*/
predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = ["Instance", "Awaited", "Call", "Subclass", "DictionaryElementAny"]
name = ["Instance", "Awaited", "Call", "Subclass", "DictionaryElementAny", "ListElement"]
}
/**

View File

@@ -11,6 +11,16 @@ extensions:
extensible: sinkModel
data: []
- addsTo:
pack: codeql/python-all
extensible: barrierModel
data: []
- addsTo:
pack: codeql/python-all
extensible: barrierGuardModel
data: []
- addsTo:
pack: codeql/python-all
extensible: summaryModel

View File

@@ -0,0 +1,12 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sinkModel
data:
- ['OpenAI', 'Member[beta].Member[assistants].Member[create].Argument[instructions:]', 'prompt-injection']
- addsTo:
pack: codeql/python-all
extensible: typeModel
data:
- ['OpenAI', 'openai', 'Member[OpenAI,AsyncOpenAI,AzureOpenAI].ReturnValue']

View File

@@ -0,0 +1,90 @@
/**
* Provides consistency queries for checking that every database entity
* that can be discarded (i.e. everything but `@py_cobject`) in an overlay
* database is indeed discarded, by proxy of having exactly one `Discardable.getPath()`.
*/
import python
import semmle.python.Overlay
class TopWithToString instanceof @top {
string getDbType() {
this instanceof @py_source_element and result = "@source_element"
or
this instanceof @py_object and result = "@py_object"
or
this instanceof @py_base_var and result = "@py_base_var"
or
this instanceof @location and result = "@location"
or
this instanceof @py_line and result = "@py_line"
or
this instanceof @py_comment and result = "@py_comment"
or
this instanceof @py_expr_parent and result = "@py_expr_parent"
or
this instanceof @py_expr_context and result = "@py_expr_context"
or
this instanceof @py_operator and result = "@py_operator"
or
this instanceof @py_boolop and result = "@py_boolop"
or
this instanceof @py_cmpop and result = "@py_cmpop"
or
this instanceof @py_unaryop and result = "@py_unaryop"
or
this instanceof @py_cmpop_list and result = "@py_cmpop_list"
or
this instanceof @py_alias_list and result = "@py_alias_list"
or
this instanceof @py_StringPart_list and result = "@py_StringPart_list"
or
this instanceof @py_comprehension_list and result = "@py_comprehension_list"
or
this instanceof @py_dict_item_list and result = "@py_dict_item_list"
or
this instanceof @py_pattern_list and result = "@py_pattern_list"
or
this instanceof @py_stmt_list and result = "@py_stmt_list"
or
this instanceof @py_str_list and result = "@py_str_list"
or
this instanceof @py_type_parameter_list and result = "@py_type_parameter_list"
or
this instanceof @externalDefect and result = "@externalDefect"
or
this instanceof @externalMetric and result = "@externalMetric"
or
this instanceof @externalDataElement and result = "@externalDataElement"
or
this instanceof @duplication_or_similarity and result = "@duplication_or_similarity"
or
this instanceof @svnentry and result = "@svnentry"
or
this instanceof @xmllocatable and result = "@xmllocatable"
or
this instanceof @yaml_locatable and result = "@yaml_locatable"
}
string toString() {
result = this.getDbType()
or
not exists(this.getDbType()) and
result = "Unknown type"
}
}
query predicate consistencyTest(TopWithToString el, string message) {
not el instanceof Discardable and
not el instanceof @py_cobject and // cannot be linked to a path
not el instanceof @externalDataElement and // cannot be linked to a path
message = "Not Discardable"
or
exists(Discardable d, int numPaths | d = el and numPaths = count(d.getPath()) |
numPaths = 0 and
message = "Discardable but no path found"
or
numPaths > 1 and
message = "Discardable but multiple paths found (" + concat(d.getPath(), ", ") + ")"
)
}

View File

@@ -0,0 +1,46 @@
overlay[local]
module;
/**
* A local predicate that always holds for the overlay variant and never holds for the base variant.
* This is used to define local predicates that behave differently for the base and overlay variant.
*/
private predicate isOverlay() { databaseMetadata("isOverlay", "true") }
private string getXmlFile(@xmllocatable locatable) {
exists(@location_default location, @file file | xmllocations(locatable, location) |
locations_default(location, file, _, _, _, _) and
files(file, result)
)
}
private string getXmlFileInBase(@xmllocatable locatable) {
not isOverlay() and
result = getXmlFile(locatable)
}
/**
* Holds if the given `file` was extracted as part of the overlay and was extracted by the HTML/XML
* extractor.
*/
private predicate overlayXmlExtracted(string file) {
isOverlay() and
exists(@xmllocatable locatable |
not files(locatable, _) and not xmlNs(locatable, _, _, _) and file = getXmlFile(locatable)
)
}
/**
* Holds if the given XML `locatable` should be discarded, because it is part of the overlay base
* and is in a file that was also extracted as part of the overlay database.
*/
overlay[discard_entity]
private predicate discardXmlLocatable(@xmllocatable locatable) {
exists(string file | file = getXmlFileInBase(locatable) |
overlayChangedFiles(file)
or
// The HTML/XML extractor is currently not incremental and may extract more files than those
// included in overlayChangedFiles.
overlayXmlExtracted(file)
)
}

View File

@@ -1,7 +1,7 @@
/** Utilities for handling the zope libraries */
import python
private import semmle.python.pointsto.PointsTo
private import LegacyPointsTo
/** A method that belongs to a sub-class of `zope.interface.Interface` */
class ZopeInterfaceMethodValue extends PythonFunctionValue {

View File

@@ -5,6 +5,7 @@ private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.MRO
private import semmle.python.pointsto.PointsToContext
private import semmle.python.types.Builtins
private import semmle.python.objects.ObjectAPI
/**
* A constant.

View File

@@ -5,6 +5,7 @@ private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.PointsToContext
private import semmle.python.pointsto.MRO
private import semmle.python.types.Builtins
private import semmle.python.pointsto.Context
/** A property object. */
class PropertyInternal extends ObjectInternal, TProperty {

View File

@@ -5,6 +5,8 @@ private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.MRO
private import semmle.python.pointsto.PointsToContext
private import semmle.python.types.Builtins
private import semmle.python.pointsto.Context
private import semmle.python.pointsto.Base
/** A class representing instances */
abstract class InstanceObject extends ObjectInternal {

View File

@@ -5,6 +5,7 @@ private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.MRO
private import semmle.python.pointsto.PointsToContext
private import semmle.python.types.Builtins
private import semmle.python.types.ImportTime
/** A class representing modules */
abstract class ModuleObjectInternal extends ObjectInternal {

View File

@@ -4,10 +4,8 @@
*/
import python
private import LegacyPointsTo
private import TObject
private import semmle.python.objects.ObjectInternal
private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.PointsToContext
private import semmle.python.pointsto.MRO
private import semmle.python.types.Builtins
@@ -696,7 +694,9 @@ abstract class FunctionValue extends CallableValue {
exists(ClassValue cls, string name |
cls.declaredAttribute(name) = this and
name != "__new__" and
exists(Expr expr, AstNode origin | expr.pointsTo(this, origin) | not origin instanceof Lambda)
exists(ExprWithPointsTo expr, AstNode origin | expr.pointsTo(this, origin) |
not origin instanceof Lambda
)
)
}
@@ -704,12 +704,14 @@ abstract class FunctionValue extends CallableValue {
abstract ClassValue getARaisedType();
/** Gets a call-site from where this function is called as a function */
CallNode getAFunctionCall() { result.getFunction().pointsTo() = this }
CallNode getAFunctionCall() {
result.getFunction().(ControlFlowNodeWithPointsTo).pointsTo() = this
}
/** Gets a call-site from where this function is called as a method */
CallNode getAMethodCall() {
exists(BoundMethodObjectInternal bm |
result.getFunction().pointsTo() = bm and
result.getFunction().(ControlFlowNodeWithPointsTo).pointsTo() = bm and
bm.getFunction() = this
)
}
@@ -753,7 +755,7 @@ class PythonFunctionValue extends FunctionValue {
* explicit return nodes that we can query and get the class of.
*/
result = this.getAReturnedNode().pointsTo().getClass()
result = this.getAReturnedNode().(ControlFlowNodeWithPointsTo).pointsTo().getClass()
}
}

View File

@@ -6,6 +6,7 @@ private import semmle.python.objects.ObjectInternal
private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.PointsToContext
private import semmle.python.internal.CachedStages
private import semmle.python.pointsto.Context
/**
* Internal type backing `ObjectInternal` and `Value`

View File

@@ -12,6 +12,8 @@ import python
import semmle.python.essa.SsaDefinitions
private import semmle.python.types.Builtins
private import semmle.python.internal.CachedStages
private import semmle.python.types.Object
private import semmle.python.types.ClassObject
/*
* The following predicates exist in order to provide
@@ -42,24 +44,6 @@ private predicate class_defines_name(Class cls, string name) {
exists(SsaVariable var | name = var.getId() and var.getAUse() = cls.getANormalExit())
}
/** Hold if `expr` is a test (a branch) and `use` is within that test */
predicate test_contains(ControlFlowNode expr, ControlFlowNode use) {
expr.getNode() instanceof Expr and
expr.isBranch() and
expr.getAChild*() = use
}
/** Holds if `f` is an import of the form `from .[...] import ...` and the enclosing scope is an __init__ module */
predicate import_from_dot_in_init(ImportExprNode f) {
f.getScope() = any(Module m).getInitModule() and
(
f.getNode().getLevel() = 1 and
not exists(f.getNode().getName())
or
f.getNode().getImportedModuleName() = f.getEnclosingModule().getPackage().getName()
)
}
/** Gets the pseudo-object representing the value referred to by an undefined variable */
Object undefinedVariable() { py_special_objects(result, "_semmle_undefined_value") }

View File

@@ -11,6 +11,8 @@
import python
private import semmle.python.pointsto.PointsToContext
private import semmle.python.types.FunctionObject
private import semmle.python.pointsto.Context
private newtype TTInvocation =
TInvocation(FunctionObject f, Context c) {

View File

@@ -6,7 +6,13 @@ private import semmle.python.pointsto.PointsToContext
private import semmle.python.pointsto.MRO
private import semmle.python.types.Builtins
private import semmle.python.types.Extensions
private import semmle.python.pointsto.Context
private import semmle.python.internal.CachedStages
private import semmle.python.types.Object
private import semmle.python.types.FunctionObject
private import semmle.python.types.ClassObject
private import semmle.python.pointsto.Base
private import semmle.python.types.ImportTime
/* Use this version for speed */
class CfgOrigin extends @py_object {
@@ -709,7 +715,7 @@ private module InterModulePointsTo {
i.getImportedModuleName() = name and
PointsToInternal::module_imported_as(value, name) and
origin = f and
context.appliesTo(f)
context.appliesTo(pragma[only_bind_into](f))
)
}

View File

@@ -1,6 +1,8 @@
import python
private import semmle.python.pointsto.PointsTo
private import semmle.python.objects.ObjectInternal
private import semmle.python.types.ImportTime
private import semmle.python.types.Version
/*
* A note on 'cost'. Cost doesn't represent the cost to compute,

View File

@@ -50,7 +50,7 @@ module CodeInjection {
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("code-injection").asSink() }
SinkFromModel() { ModelOutput::sinkNode(this, "code-injection") }
}
/**
@@ -60,4 +60,11 @@ module CodeInjection {
/** DEPRECATED: Use ConstCompareAsSanitizerGuard instead. */
deprecated class StringConstCompareAsSanitizerGuard = ConstCompareAsSanitizerGuard;
/**
* A sanitizer defined via models-as-data with kind "code-injection".
*/
class SanitizerFromModel extends Sanitizer {
SanitizerFromModel() { ModelOutput::barrierNode(this, "code-injection") }
}
}

View File

@@ -85,7 +85,7 @@ module CommandInjection {
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("command-injection").asSink() }
SinkFromModel() { ModelOutput::sinkNode(this, "command-injection") }
}
/**
@@ -95,4 +95,11 @@ module CommandInjection {
/** DEPRECATED: Use ConstCompareAsSanitizerGuard instead. */
deprecated class StringConstCompareAsSanitizerGuard = ConstCompareAsSanitizerGuard;
/**
* A sanitizer defined via models-as-data with kind "command-injection".
*/
class SanitizerFromModel extends Sanitizer {
SanitizerFromModel() { ModelOutput::barrierNode(this, "command-injection") }
}
}

View File

@@ -78,7 +78,7 @@ module LogInjection {
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("log-injection").asSink() }
SinkFromModel() { ModelOutput::sinkNode(this, "log-injection") }
}
/**
@@ -106,4 +106,11 @@ module LogInjection {
this.getArg(0).asExpr().(StringLiteral).getText() in ["\r\n", "\n"]
}
}
/**
* A sanitizer defined via models-as-data with kind "log-injection".
*/
class SanitizerFromModel extends Sanitizer {
SanitizerFromModel() { ModelOutput::barrierNode(this, "log-injection") }
}
}

View File

@@ -57,7 +57,7 @@ module PathInjection {
*/
class FileSystemAccessAsSink extends Sink {
FileSystemAccessAsSink() {
this = any(FileSystemAccess e).getAPathArgument() and
this = any(FileSystemAccess e).getAVulnerablePathArgument() and
// since implementation of Path.open in pathlib.py is like
// ```py
// def open(self, ...):
@@ -88,7 +88,7 @@ module PathInjection {
private import semmle.python.frameworks.data.ModelsAsData
private class DataAsFileSink extends Sink {
DataAsFileSink() { this = ModelOutput::getASinkNode("path-injection").asSink() }
DataAsFileSink() { ModelOutput::sinkNode(this, "path-injection") }
}
/**
@@ -98,4 +98,11 @@ module PathInjection {
/** DEPRECATED: Use ConstCompareAsSanitizerGuard instead. */
deprecated class StringConstCompareAsSanitizerGuard = ConstCompareAsSanitizerGuard;
/**
* A sanitizer defined via models-as-data with kind "path-injection".
*/
class SanitizerFromModel extends Sanitizer {
SanitizerFromModel() { ModelOutput::barrierNode(this, "path-injection") }
}
}

Some files were not shown because too many files have changed in this diff Show More