Python: Remove imprecise container steps

- remove `tupleStoreStep` and `dictStoreStep` from `containerStep`
   These are imprecise compared to the content being precise.
- add implicit reads to recover taint at sinks
- add implicit read steps for decoders
  to supplement the `AdditionalTaintStep`
  that now only covers when the full container is tainted.
This commit is contained in:
Rasmus Lerchedahl Petersen
2024-09-17 12:11:16 +02:00
committed by Owen Mansel-Chan
parent 7e6b10e8cf
commit b67694b2ab
16 changed files with 182 additions and 55 deletions

View File

@@ -1009,6 +1009,8 @@ predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
or
VariableCapture::readStep(nodeFrom, c, nodeTo)
or
Conversions::readStep(nodeFrom, c, nodeTo)
}
/** Data flows from a sequence to a subscript of the sequence. */
@@ -1064,6 +1066,40 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
nodeTo.accesses(nodeFrom, c.getAttribute())
}
module Conversions {
private import semmle.python.Concepts
predicate decoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
exists(Decoding decoding |
nodeFrom = decoding.getAnInput() and
nodeTo = decoding.getOutput()
) and
(
c instanceof TupleElementContent
or
c instanceof DictionaryElementContent
)
}
predicate encoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
exists(Encoding encoding |
nodeFrom = encoding.getAnInput() and
nodeTo = encoding.getOutput()
) and
(
c instanceof TupleElementContent
or
c instanceof DictionaryElementContent
)
}
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
decoderReadStep(nodeFrom, c, nodeTo)
or
encoderReadStep(nodeFrom, c, nodeTo)
}
}
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`

View File

@@ -16,7 +16,16 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
* of `c` at sinks and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { none() }
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) {
// We allow implicit reads of precise content
// imprecise content has already bubled up.
exists(node) and
(
c instanceof DataFlow::TupleElementContent
or
c instanceof DataFlow::DictionaryElementContent
)
}
private module Cached {
/**
@@ -176,10 +185,6 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
or
DataFlowPrivate::setStoreStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::tupleStoreStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::dictStoreStep(nodeFrom, _, nodeTo)
or
// comprehension, so there is taint-flow from `x` in `[x for x in xs]` to the
// resulting list of the list-comprehension.
//