Fix TODO in containerStep

This commit is contained in:
Owen Mansel-Chan
2026-05-28 11:34:02 +01:00
parent ec13e1bcd3
commit 80c6f082d1

View File

@@ -11,21 +11,35 @@ private import semmle.python.ApiGraphs
*/
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
/**
* Holds if default taint tracking should read content `contentSet` implicitly and
* propagate taint from a container to reads of that content.
*/
private predicate defaultTaintReadContent(DataFlow::ContentSet contentSet) {
// Tuple and dictionary content is precise, so use wildcard content sets to avoid
// blowing up the size of `Stage1::readSetEx` (otherwise this predicate would
// expand to one row per (node, distinct key or index) and the framework's
// read-set relation grows quadratically). `ContentSet.getAReadContent` expands
// these wildcards back to the specific contents when matching against stores.
contentSet.isAnyTupleElement()
or
contentSet.isAnyDictionaryElement()
or
// List and set element content is already imprecise, so no wildcard expansion is
// needed.
contentSet.getAStoreContent() instanceof DataFlow::ListElementContent
or
contentSet.getAStoreContent() instanceof DataFlow::SetElementContent
}
/**
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
* of `c` at sinks and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) {
// We allow implicit reads of precise content; imprecise content has already
// bubbled up. We use the wildcard content sets here rather than the
// per-key/per-index ones to avoid blowing up the size of `Stage1::readSetEx`
// (otherwise this predicate would expand to one row per (node, distinct key
// or index) and the framework's read-set relation grows quadratically).
// `ContentSet.getAReadContent` expands these wildcards back to the specific
// contents when matching against stores.
exists(node) and
(c.isAnyTupleElement() or c.isAnyDictionaryElement())
defaultTaintReadContent(c)
}
private module Cached {
@@ -171,28 +185,15 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
* (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to reading
* content from containers (lists/sets/dictionaries/tuples): subscripts, iteration,
* constructor invocation, methods.
*/
predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// construction by literal
//
// TODO: once we have proper flow-summary modeling, we might not need this step any
// longer -- but there needs to be a matching read-step for the store-step, and we
// don't provide that right now.
DataFlowPrivate::listStoreStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::setStoreStep(nodeFrom, _, nodeTo)
or
// comprehension, so there is taint-flow from `x` in `[x for x in xs]` to the
// resulting list of the list-comprehension.
//
// TODO: once we have proper flow-summary modeling, we might not need this step any
// longer -- but there needs to be a matching read-step for the store-step, and we
// don't provide that right now.
DataFlowPrivate::yieldStoreStep(nodeFrom, _, nodeTo)
exists(DataFlow::ContentSet contentSet |
DataFlowPrivate::readStep(nodeFrom, contentSet, nodeTo) and
defaultTaintReadContent(contentSet)
)
}
/**