mirror of
https://github.com/github/codeql.git
synced 2026-06-05 13:37:06 +02:00
Add wildcard ContentSets to avoid performance problems
This commit is contained in:
@@ -753,7 +753,7 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
|
||||
* As of 2024-04-02 the type-tracking library only supports precise content, so there is
|
||||
* no reason to include steps for list content right now.
|
||||
*/
|
||||
predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
predicate storeStepCommon(Node nodeFrom, Content c, Node nodeTo) {
|
||||
tupleStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
dictStoreStep(nodeFrom, c, nodeTo)
|
||||
@@ -767,29 +767,31 @@ predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
|
||||
* content `c`.
|
||||
*/
|
||||
predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
storeStepCommon(nodeFrom, c, nodeTo)
|
||||
predicate storeStep(Node nodeFrom, ContentSet cs, Node nodeTo) {
|
||||
exists(Content c | cs = singleton(c) |
|
||||
storeStepCommon(nodeFrom, c, nodeTo)
|
||||
or
|
||||
listStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
setStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
attributeStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
matchStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
yieldStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
VariableCapture::storeStep(nodeFrom, c, nodeTo)
|
||||
)
|
||||
or
|
||||
listStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
setStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
attributeStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
matchStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), c,
|
||||
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), cs,
|
||||
nodeTo.(FlowSummaryNode).getSummaryNode())
|
||||
or
|
||||
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
yieldStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
VariableCapture::storeStep(nodeFrom, c, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -985,7 +987,7 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) {
|
||||
/**
|
||||
* Subset of `readStep` that should be shared with type-tracking.
|
||||
*/
|
||||
predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
predicate readStepCommon(Node nodeFrom, Content c, Node nodeTo) {
|
||||
subscriptReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
|
||||
@@ -994,23 +996,25 @@ predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
/**
|
||||
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
|
||||
*/
|
||||
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
readStepCommon(nodeFrom, c, nodeTo)
|
||||
predicate readStep(Node nodeFrom, ContentSet cs, Node nodeTo) {
|
||||
exists(Content c | cs = singleton(c) |
|
||||
readStepCommon(nodeFrom, c, nodeTo)
|
||||
or
|
||||
matchReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
forReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
attributeReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
VariableCapture::readStep(nodeFrom, c, nodeTo)
|
||||
)
|
||||
or
|
||||
matchReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
forReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
attributeReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), c,
|
||||
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), cs,
|
||||
nodeTo.(FlowSummaryNode).getSummaryNode())
|
||||
or
|
||||
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
VariableCapture::readStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
Conversions::readStep(nodeFrom, c, nodeTo)
|
||||
Conversions::readStep(nodeFrom, cs, nodeTo)
|
||||
}
|
||||
|
||||
/** Data flows from a sequence to a subscript of the sequence. */
|
||||
@@ -1074,11 +1078,7 @@ module Conversions {
|
||||
nodeFrom = decoding.getAnInput() and
|
||||
nodeTo = decoding.getOutput()
|
||||
) and
|
||||
(
|
||||
c instanceof TupleElementContent
|
||||
or
|
||||
c instanceof DictionaryElementContent
|
||||
)
|
||||
(c.isAnyTupleElement() or c.isAnyDictionaryElement())
|
||||
}
|
||||
|
||||
predicate encoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
@@ -1086,11 +1086,7 @@ module Conversions {
|
||||
nodeFrom = encoding.getAnInput() and
|
||||
nodeTo = encoding.getOutput()
|
||||
) and
|
||||
(
|
||||
c instanceof TupleElementContent
|
||||
or
|
||||
c instanceof DictionaryElementContent
|
||||
)
|
||||
(c.isAnyTupleElement() or c.isAnyDictionaryElement())
|
||||
}
|
||||
|
||||
predicate formatReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
@@ -1099,13 +1095,13 @@ module Conversions {
|
||||
fmt.getOp() instanceof Mod and
|
||||
fmt.getRight() = nodeFrom.asCfgNode()
|
||||
) and
|
||||
c instanceof TupleElementContent
|
||||
c.isAnyTupleElement()
|
||||
or
|
||||
// format_map
|
||||
// see https://docs.python.org/3/library/stdtypes.html#str.format_map
|
||||
nodeTo.(MethodCallNode).calls(_, "format_map") and
|
||||
nodeTo.(MethodCallNode).getArg(0) = nodeFrom and
|
||||
c instanceof DictionaryElementContent
|
||||
c.isAnyDictionaryElement()
|
||||
}
|
||||
|
||||
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
@@ -1122,18 +1118,20 @@ module Conversions {
|
||||
* any value stored inside `f` is cleared at the pre-update node associated with `x`
|
||||
* in `x.f = newValue`.
|
||||
*/
|
||||
predicate clearsContent(Node n, ContentSet c) {
|
||||
matchClearStep(n, c)
|
||||
predicate clearsContent(Node n, ContentSet cs) {
|
||||
exists(Content c | cs = singleton(c) |
|
||||
matchClearStep(n, c)
|
||||
or
|
||||
attributeClearStep(n, c)
|
||||
or
|
||||
dictClearStep(n, c)
|
||||
or
|
||||
dictSplatParameterNodeClearStep(n, c)
|
||||
or
|
||||
VariableCapture::clearsContent(n, c)
|
||||
)
|
||||
or
|
||||
attributeClearStep(n, c)
|
||||
or
|
||||
dictClearStep(n, c)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryClearsContent(n.(FlowSummaryNode).getSummaryNode(), c)
|
||||
or
|
||||
dictSplatParameterNodeClearStep(n, c)
|
||||
or
|
||||
VariableCapture::clearsContent(n, c)
|
||||
FlowSummaryImpl::Private::Steps::summaryClearsContent(n.(FlowSummaryNode).getSummaryNode(), cs)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -898,19 +898,65 @@ class CapturedVariableContent extends Content, TCapturedVariableContent {
|
||||
override string getMaDRepresentation() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An entity that represents a set of `Content`s.
|
||||
*
|
||||
* Most `ContentSet`s are singletons (i.e. they consist of a single `Content`),
|
||||
* but `AnyDictionaryElement` and `AnyTupleElement` act as wildcards on the
|
||||
* read side: a read at such a `ContentSet` matches any specific dictionary
|
||||
* key / tuple index store, as well as (for dictionaries) the
|
||||
* "unknown-bucket" Content `DictionaryElementAnyContent`.
|
||||
*
|
||||
* Keeping these as wildcard `ContentSet`s (rather than enumerating one
|
||||
* `ContentSet` per key/index) keeps the dataflow `readSetEx` relation small
|
||||
* when implicit reads are used (e.g. at sinks via `defaultImplicitTaintRead`).
|
||||
*/
|
||||
private newtype TContentSet =
|
||||
TSingletonContent(Content c) or
|
||||
TAnyTupleElement() or
|
||||
TAnyDictionaryElement()
|
||||
|
||||
/**
|
||||
* An entity that represents a set of `Content`s.
|
||||
*
|
||||
* The set may be interpreted differently depending on whether it is
|
||||
* stored into (`getAStoreContent`) or read from (`getAReadContent`).
|
||||
*/
|
||||
class ContentSet instanceof Content {
|
||||
class ContentSet extends TContentSet {
|
||||
/** Holds if this content set is the singleton `{c}`. */
|
||||
predicate isSingleton(Content c) { this = TSingletonContent(c) }
|
||||
|
||||
/** Holds if this content set is the wildcard for all tuple elements. */
|
||||
predicate isAnyTupleElement() { this = TAnyTupleElement() }
|
||||
|
||||
/** Holds if this content set is the wildcard for all dictionary elements. */
|
||||
predicate isAnyDictionaryElement() { this = TAnyDictionaryElement() }
|
||||
|
||||
/** Gets a content that may be stored into when storing into this set. */
|
||||
Content getAStoreContent() { result = this }
|
||||
Content getAStoreContent() { this = TSingletonContent(result) }
|
||||
|
||||
/** Gets a content that may be read from when reading from this set. */
|
||||
Content getAReadContent() { result = this }
|
||||
Content getAReadContent() {
|
||||
this = TSingletonContent(result)
|
||||
or
|
||||
// Wildcard expansion: a read at "any tuple element" matches a store at any
|
||||
// specific tuple index. (Stores always target a specific index, so we don't
|
||||
// need a `TupleElementAnyContent` Content kind here.)
|
||||
this = TAnyTupleElement() and result instanceof TupleElementContent
|
||||
or
|
||||
this = TAnyDictionaryElement() and
|
||||
(result instanceof DictionaryElementContent or result instanceof DictionaryElementAnyContent)
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this content set. */
|
||||
string toString() { result = super.toString() }
|
||||
string toString() {
|
||||
exists(Content c | this = TSingletonContent(c) | result = c.toString())
|
||||
or
|
||||
this = TAnyTupleElement() and result = "Any tuple element"
|
||||
or
|
||||
this = TAnyDictionaryElement() and result = "Any dictionary element"
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets the singleton `ContentSet` wrapping the `Content` `c`. */
|
||||
ContentSet singleton(Content c) { result = TSingletonContent(c) }
|
||||
|
||||
@@ -66,21 +66,27 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
|
||||
}
|
||||
|
||||
string encodeContent(ContentSet cs, string arg) {
|
||||
cs = TListElementContent() and result = "ListElement" and arg = ""
|
||||
or
|
||||
cs = TSetElementContent() and result = "SetElement" and arg = ""
|
||||
or
|
||||
exists(int index |
|
||||
cs = TTupleElementContent(index) and result = "TupleElement" and arg = index.toString()
|
||||
exists(Content c | cs.isSingleton(c) |
|
||||
c = TListElementContent() and result = "ListElement" and arg = ""
|
||||
or
|
||||
c = TSetElementContent() and result = "SetElement" and arg = ""
|
||||
or
|
||||
exists(int index |
|
||||
c = TTupleElementContent(index) and result = "TupleElement" and arg = index.toString()
|
||||
)
|
||||
or
|
||||
exists(string key |
|
||||
c = TDictionaryElementContent(key) and result = "DictionaryElement" and arg = key
|
||||
)
|
||||
or
|
||||
c = TDictionaryElementAnyContent() and result = "DictionaryElementAny" and arg = ""
|
||||
or
|
||||
exists(string attr | c = TAttributeContent(attr) and result = "Attribute" and arg = attr)
|
||||
)
|
||||
or
|
||||
exists(string key |
|
||||
cs = TDictionaryElementContent(key) and result = "DictionaryElement" and arg = key
|
||||
)
|
||||
cs.isAnyTupleElement() and result = "AnyTupleElement" and arg = ""
|
||||
or
|
||||
cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny" and arg = ""
|
||||
or
|
||||
exists(string attr | cs = TAttributeContent(attr) and result = "Attribute" and arg = attr)
|
||||
cs.isAnyDictionaryElement() and result = "AnyDictionaryElement" and arg = ""
|
||||
}
|
||||
|
||||
bindingset[token]
|
||||
@@ -139,27 +145,29 @@ module Private {
|
||||
predicate withContent = SC::withContent/1;
|
||||
|
||||
/** Gets a summary component that represents a list element. */
|
||||
SummaryComponent listElement() { result = content(any(ListElementContent c)) }
|
||||
SummaryComponent listElement() { result = content(singleton(any(ListElementContent c))) }
|
||||
|
||||
/** Gets a summary component that represents a set element. */
|
||||
SummaryComponent setElement() { result = content(any(SetElementContent c)) }
|
||||
SummaryComponent setElement() { result = content(singleton(any(SetElementContent c))) }
|
||||
|
||||
/** Gets a summary component that represents a tuple element. */
|
||||
SummaryComponent tupleElement(int index) {
|
||||
exists(TupleElementContent c | c.getIndex() = index and result = content(c))
|
||||
exists(TupleElementContent c | c.getIndex() = index and result = content(singleton(c)))
|
||||
}
|
||||
|
||||
/** Gets a summary component that represents a dictionary element. */
|
||||
SummaryComponent dictionaryElement(string key) {
|
||||
exists(DictionaryElementContent c | c.getKey() = key and result = content(c))
|
||||
exists(DictionaryElementContent c | c.getKey() = key and result = content(singleton(c)))
|
||||
}
|
||||
|
||||
/** Gets a summary component that represents a dictionary element at any key. */
|
||||
SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) }
|
||||
SummaryComponent dictionaryElementAny() {
|
||||
result = content(singleton(any(DictionaryElementAnyContent c)))
|
||||
}
|
||||
|
||||
/** Gets a summary component that represents an attribute element. */
|
||||
SummaryComponent attribute(string attr) {
|
||||
exists(AttributeContent c | c.getAttribute() = attr and result = content(c))
|
||||
exists(AttributeContent c | c.getAttribute() = attr and result = content(singleton(c)))
|
||||
}
|
||||
|
||||
/** Gets a summary component that represents the return value of a call. */
|
||||
|
||||
@@ -17,14 +17,15 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
|
||||
*/
|
||||
bindingset[node]
|
||||
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) {
|
||||
// We allow implicit reads of precise content
|
||||
// imprecise content has already bubled up.
|
||||
// We allow implicit reads of precise content; imprecise content has already
|
||||
// bubbled up. We use the wildcard content sets here rather than the
|
||||
// per-key/per-index ones to avoid blowing up the size of `Stage1::readSetEx`
|
||||
// (otherwise this predicate would expand to one row per (node, distinct key
|
||||
// or index) and the framework's read-set relation grows quadratically).
|
||||
// `ContentSet.getAReadContent` expands these wildcards back to the specific
|
||||
// contents when matching against stores.
|
||||
exists(node) and
|
||||
(
|
||||
c instanceof DataFlow::TupleElementContent
|
||||
or
|
||||
c instanceof DataFlow::DictionaryElementContent
|
||||
)
|
||||
(c.isAnyTupleElement() or c.isAnyDictionaryElement())
|
||||
}
|
||||
|
||||
private module Cached {
|
||||
|
||||
@@ -241,7 +241,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
|
||||
// is only fed set/list content)
|
||||
not nodeFrom instanceof DataFlowPublic::IterableElementNode
|
||||
or
|
||||
TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, content)
|
||||
TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, DataFlowPublic::singleton(content))
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -272,14 +272,15 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
|
||||
nodeFrom.asCfgNode() instanceof SequenceNode
|
||||
)
|
||||
or
|
||||
TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, content)
|
||||
TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, DataFlowPublic::singleton(content))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
|
||||
*/
|
||||
predicate loadStoreStep(Node nodeFrom, Node nodeTo, Content loadContent, Content storeContent) {
|
||||
TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContent, storeContent)
|
||||
TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo,
|
||||
DataFlowPublic::singleton(loadContent), DataFlowPublic::singleton(storeContent))
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -61,10 +61,13 @@ module EscapingCaptureFlowConfig implements DataFlow::ConfigSig {
|
||||
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet cs) {
|
||||
isSink(node) and
|
||||
(
|
||||
cs.(DataFlow::TupleElementContent).getIndex() in [0 .. 10] or
|
||||
cs instanceof DataFlow::ListElementContent or
|
||||
cs instanceof DataFlow::SetElementContent or
|
||||
cs instanceof DataFlow::DictionaryElementAnyContent
|
||||
cs.isAnyTupleElement()
|
||||
or
|
||||
cs.isAnyDictionaryElement()
|
||||
or
|
||||
cs.getAStoreContent() instanceof DataFlow::ListElementContent
|
||||
or
|
||||
cs.getAStoreContent() instanceof DataFlow::SetElementContent
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,16 +6,16 @@ pat = ... # some pattern
|
||||
compiled_pat = re.compile(pat)
|
||||
|
||||
# see https://docs.python.org/3/library/re.html#functions
|
||||
ensure_not_tainted(
|
||||
# returns Match object, which is tested properly below. (note: with the flow summary
|
||||
# modeling, objects containing tainted values are not themselves tainted).
|
||||
re.search(pat, ts),
|
||||
re.match(pat, ts),
|
||||
re.fullmatch(pat, ts),
|
||||
ensure_tainted(
|
||||
# returns Match object, which is tested properly below. (note: the match objects contain
|
||||
# tainted values but are not themselves tainted - this test relies on implicit reads at sinks).
|
||||
re.search(pat, ts), # $ tainted
|
||||
re.match(pat, ts), # $ tainted
|
||||
re.fullmatch(pat, ts), # $ tainted
|
||||
|
||||
compiled_pat.search(ts),
|
||||
compiled_pat.match(ts),
|
||||
compiled_pat.fullmatch(ts),
|
||||
compiled_pat.search(ts), # $ tainted
|
||||
compiled_pat.match(ts), # $ tainted
|
||||
compiled_pat.fullmatch(ts), # $ tainted
|
||||
)
|
||||
|
||||
# Match object
|
||||
|
||||
Reference in New Issue
Block a user