mirror of
https://github.com/github/codeql.git
synced 2026-07-03 10:35:29 +02:00
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll) and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade (semmle.python.controlflow.internal.Cfg) and the new SSA adapter (semmle.python.dataflow.new.internal.SsaImpl), both introduced additively in the preceding PRs in this stack. This is the trunk-flip equivalent of the original draft PR #21894 (kept around as documentation), rebased on top of the four preparatory PRs: P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919). P2: Qualify Flow.qll's AST references with Py:: prefix (#21920). P3: Add new shared-CFG-backed control flow graph (#21921). P4: Add new shared-SSA-backed SSA adapter (#21923). The Python dataflow library (semmle/python/dataflow/new/) now imports the new CFG facade and SSA adapter. All CFG-typed predicates (ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are qualified with the Cfg:: prefix; SSA references switch from EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable. GuardNode is redesigned to use the new CFG's outcome-node model (isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock + flipped indirection. Only BarrierGuard<...> is preserved as public API. Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib, ...) are updated to take CFG nodes from the new facade. A handful of dataflow consistency tweaks for the new CFG: - Augmented-assignment targets are treated as both load and store. - 'from X import *' produces uncertain SSA writes for unknown names. - CFG nodes are canonicalised so dataflow does not see equivalent pre/post-order pairs as distinct nodes. Two AST tweaks for the new CFG: - AstNodeImpl: omit PEP 695 type-parameter names from FunctionDefExpr / ClassDefExpr children. - ImportResolution: drop the legacy essa import. Test churn (~175 files): reblessed library- and query-test .expected files reflect slightly different CFG granularity, different toString output, and a handful of true alert deltas in security queries. Verification: all 367 lib + src + consistency-queries compile clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
192 lines
6.8 KiB
Plaintext
192 lines
6.8 KiB
Plaintext
/** Definitions for reasoning about whether files are closed. */
|
|
|
|
import python
|
|
import semmle.python.dataflow.new.internal.DataFlowDispatch
|
|
import semmle.python.ApiGraphs
|
|
private import semmle.python.dataflow.new.internal.ReExposedInstance
|
|
private import semmle.python.controlflow.internal.Cfg as Cfg
|
|
|
|
/** A CFG node where a file is opened. */
|
|
abstract class FileOpenSource extends DataFlow::CfgNode { }
|
|
|
|
/** A call to the builtin `open` or `os.open`. */
|
|
class FileOpenCall extends FileOpenSource {
|
|
FileOpenCall() {
|
|
this = [API::builtin("open").getACall(), API::moduleImport("os").getMember("open").getACall()]
|
|
}
|
|
}
|
|
|
|
private DataFlow::TypeTrackingNode fileOpenInstance(DataFlow::TypeTracker t) {
|
|
t.start() and
|
|
result instanceof FileOpenSource
|
|
or
|
|
exists(DataFlow::TypeTracker t2 | result = fileOpenInstance(t2).track(t2, t))
|
|
}
|
|
|
|
/**
|
|
* Holds if `node` is tracked to be an instance of an open file object.
|
|
*/
|
|
private predicate fileInstanceNode(DataFlow::Node node) {
|
|
fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(node)
|
|
}
|
|
|
|
private module FileReExposed = ReExposedInstance<fileInstanceNode/1>;
|
|
|
|
/**
|
|
* A call that returns an instance of an open file object.
|
|
* This includes calls to methods that transitively call `open` or similar.
|
|
*/
|
|
class FileOpen extends DataFlow::CallCfgNode {
|
|
FileOpen() {
|
|
fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) and
|
|
// Don't treat an accessor that merely re-exposes a file held in an instance attribute
|
|
// (e.g. `FileIO.fileno` returning `self._fd`) as opening a new file. Such flow is
|
|
// introduced by instance-attribute type tracking; the underlying open is tracked at its
|
|
// real creation site.
|
|
not FileReExposed::isReExposed(this)
|
|
}
|
|
}
|
|
|
|
/** A call that may wrap a file object in a wrapper class or `os.fdopen`. */
|
|
class FileWrapperCall extends DataFlow::CallCfgNode {
|
|
DataFlow::Node wrapped;
|
|
|
|
FileWrapperCall() {
|
|
// Approximation: Treat any passing of a file object to a class constructor as potentially a wrapper
|
|
// This could be made more precise by checking that the constructor writes the file to a field.
|
|
wrapped = this.getArg(_).getALocalSource() and
|
|
this.getFunction() = classTracker(_)
|
|
or
|
|
wrapped = this.getArg(0) and
|
|
this = API::moduleImport("os").getMember("fdopen").getACall()
|
|
or
|
|
wrapped = this.getArg(0) and
|
|
this = API::moduleImport("django").getMember("http").getMember("FileResponse").getACall()
|
|
}
|
|
|
|
/** Gets the file that this call wraps. */
|
|
DataFlow::Node getWrapped() { result = wrapped }
|
|
}
|
|
|
|
/** A node where a file is closed. */
|
|
abstract class FileClose extends DataFlow::CfgNode {
|
|
/** Holds if this file close will occur if an exception is raised at `fileRaises`. */
|
|
predicate guardsExceptions(DataFlow::CfgNode fileRaises) {
|
|
// The close call occurs after an exception edge in the cfg (a catch or finally)
|
|
bbReachableRefl(fileRaises.asCfgNode().getBasicBlock().getAnExceptionalSuccessor(),
|
|
this.asCfgNode().getBasicBlock())
|
|
or
|
|
// The exception is after the close call.
|
|
// A full cfg reachability check is not feasible for performance, instead we use local dataflow
|
|
fileLocalFlow(this, fileRaises)
|
|
}
|
|
}
|
|
|
|
private predicate bbSuccessor(Cfg::BasicBlock src, Cfg::BasicBlock sink) {
|
|
sink = src.getASuccessor()
|
|
}
|
|
|
|
private predicate bbReachableStrict(Cfg::BasicBlock src, Cfg::BasicBlock sink) =
|
|
fastTC(bbSuccessor/2)(src, sink)
|
|
|
|
private predicate bbReachableRefl(Cfg::BasicBlock src, Cfg::BasicBlock sink) {
|
|
bbReachableStrict(src, sink) or src = sink
|
|
}
|
|
|
|
/** A call to the `.close()` method of a file object. */
|
|
class FileCloseCall extends FileClose {
|
|
FileCloseCall() { exists(DataFlow::MethodCallNode mc | mc.calls(this, "close")) }
|
|
}
|
|
|
|
/** A call to `os.close`. */
|
|
class OsCloseCall extends FileClose {
|
|
OsCloseCall() { this = API::moduleImport("os").getMember("close").getACall().getArg(0) }
|
|
}
|
|
|
|
/** A `with` statement. */
|
|
class WithStatement extends FileClose {
|
|
With w;
|
|
|
|
WithStatement() { this.asExpr() = w.getContextExpr() }
|
|
|
|
override predicate guardsExceptions(DataFlow::CfgNode fileRaises) {
|
|
super.guardsExceptions(fileRaises)
|
|
or
|
|
w.getBody().contains(fileRaises.asExpr())
|
|
}
|
|
}
|
|
|
|
/** Holds if an exception may be raised at `raises` if `file` is a file object. */
|
|
private predicate mayRaiseWithFile(DataFlow::CfgNode file, DataFlow::CfgNode raises) {
|
|
// Currently just consider any method called on `file`; e.g. `file.write()`; as potentially raising an exception
|
|
raises.(DataFlow::MethodCallNode).getObject() = file and
|
|
not file instanceof FileOpen and
|
|
not file instanceof FileClose
|
|
}
|
|
|
|
/** Holds if data flows from `nodeFrom` to `nodeTo` in one step that also includes file wrapper classes. */
|
|
private predicate fileAdditionalLocalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
|
exists(FileWrapperCall fw | nodeFrom = fw.getWrapped() and nodeTo = fw)
|
|
}
|
|
|
|
private predicate fileLocalFlowHelper0(
|
|
DataFlow::LocalSourceNode nodeFrom, DataFlow::LocalSourceNode nodeTo
|
|
) {
|
|
exists(DataFlow::Node nodeMid |
|
|
nodeFrom.flowsTo(nodeMid) and fileAdditionalLocalFlowStep(nodeMid, nodeTo)
|
|
)
|
|
}
|
|
|
|
private predicate fileLocalFlowHelper1(
|
|
DataFlow::LocalSourceNode nodeFrom, DataFlow::LocalSourceNode nodeTo
|
|
) {
|
|
fileLocalFlowHelper0*(nodeFrom, nodeTo)
|
|
}
|
|
|
|
/** Holds if data flows from `source` to `sink`, including file wrapper classes. */
|
|
pragma[inline]
|
|
private predicate fileLocalFlow(DataFlow::Node source, DataFlow::Node sink) {
|
|
exists(DataFlow::LocalSourceNode mid | fileLocalFlowHelper1(source, mid) and mid.flowsTo(sink))
|
|
}
|
|
|
|
/** Holds if the file opened at `fo` is closed. */
|
|
predicate fileIsClosed(FileOpen fo) { exists(FileClose fc | fileLocalFlow(fo, fc)) }
|
|
|
|
/** Holds if the file opened at `fo` is returned to the caller. This makes the caller responsible for closing the file. */
|
|
predicate fileIsReturned(FileOpen fo) {
|
|
exists(Return ret, Expr retVal |
|
|
(
|
|
retVal = ret.getValue()
|
|
or
|
|
retVal = ret.getValue().(List).getAnElt()
|
|
or
|
|
retVal = ret.getValue().(Tuple).getAnElt()
|
|
) and
|
|
fileLocalFlow(fo, DataFlow::exprNode(retVal))
|
|
)
|
|
}
|
|
|
|
/** Holds if the file opened at `fo` is stored in a field. We assume that another method is then responsible for closing the file. */
|
|
predicate fileIsStoredInField(FileOpen fo) {
|
|
exists(DataFlow::AttrWrite aw | fileLocalFlow(fo, aw.getValue()))
|
|
}
|
|
|
|
/** Holds if the file opened at `fo` is not closed, and is expected to be closed. */
|
|
predicate fileNotClosed(FileOpen fo) {
|
|
not fileIsClosed(fo) and
|
|
not fileIsReturned(fo) and
|
|
not fileIsStoredInField(fo)
|
|
}
|
|
|
|
predicate fileMayNotBeClosedOnException(FileOpen fo, DataFlow::Node raises) {
|
|
fileIsClosed(fo) and
|
|
exists(DataFlow::CfgNode fileRaised |
|
|
mayRaiseWithFile(fileRaised, raises) and
|
|
fileLocalFlow(fo, fileRaised) and
|
|
not exists(FileClose fc |
|
|
fileLocalFlow(fo, fc) and
|
|
fc.guardsExceptions(fileRaised)
|
|
)
|
|
)
|
|
}
|