mirror of
https://github.com/github/codeql.git
synced 2026-04-22 23:35:14 +02:00
Merge pull request #15711 from RasmusWL/tt-content
Python: Add type tracking for content
This commit is contained in:
@@ -5,9 +5,14 @@
|
||||
|
||||
private import internal.TypeTrackingImpl as Impl
|
||||
import Impl::Shared::TypeTracking<Impl::TypeTrackingInput>
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
|
||||
|
||||
/** A string that may appear as the name of an attribute or access path. */
|
||||
class AttributeName = Impl::TypeTrackingInput::Content;
|
||||
/**
|
||||
* DEPRECATED.
|
||||
*
|
||||
* A string that may appear as the name of an attribute or access path.
|
||||
*/
|
||||
deprecated class AttributeName = Impl::TypeTrackingInput::Content;
|
||||
|
||||
/**
|
||||
* A summary of the steps needed to track a value to a given dataflow node.
|
||||
@@ -40,7 +45,11 @@ class TypeTracker extends Impl::TypeTracker {
|
||||
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
|
||||
* The type tracking only ends after the attribute has been loaded.
|
||||
*/
|
||||
predicate startInAttr(string attrName) { this.startInContent(attrName) }
|
||||
predicate startInAttr(string attrName) {
|
||||
exists(DataFlowPublic::AttributeContent content | content.getAttribute() = attrName |
|
||||
this.startInContent(content)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
@@ -48,9 +57,8 @@ class TypeTracker extends Impl::TypeTracker {
|
||||
* Gets the attribute associated with this type tracker.
|
||||
*/
|
||||
string getAttr() {
|
||||
result = this.getContent().asSome()
|
||||
or
|
||||
this.getContent().isNone() and
|
||||
result = ""
|
||||
if this.getContent().asSome() instanceof DataFlowPublic::AttributeContent
|
||||
then result = this.getContent().asSome().(DataFlowPublic::AttributeContent).getAttribute()
|
||||
else result = ""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -642,23 +642,37 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
|
||||
// Field flow
|
||||
//--------
|
||||
/**
|
||||
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
|
||||
* content `c`.
|
||||
* Subset of `storeStep` that should be shared with type-tracking.
|
||||
*
|
||||
* NOTE: This does not include attributeStoreStep right now, since it has its' own
|
||||
* modeling in the type-tracking library (which is slightly different due to
|
||||
* PostUpdateNodes).
|
||||
*
|
||||
* As of 2024-04-02 the type-tracking library only supports precise content, so there is
|
||||
* no reason to include steps for list content right now.
|
||||
*/
|
||||
predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
listStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
setStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
tupleStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
dictStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
moreDictStoreSteps(nodeFrom, c, nodeTo)
|
||||
or
|
||||
comprehensionStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
|
||||
* content `c`.
|
||||
*/
|
||||
predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
storeStepCommon(nodeFrom, c, nodeTo)
|
||||
or
|
||||
listStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
setStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
comprehensionStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
attributeStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
@@ -892,12 +906,19 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
|
||||
* Subset of `readStep` that should be shared with type-tracking.
|
||||
*/
|
||||
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
subscriptReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
|
||||
*/
|
||||
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
readStepCommon(nodeFrom, c, nodeTo)
|
||||
or
|
||||
matchReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/** Step Summaries and Type Tracking */
|
||||
|
||||
private import TypeTrackerSpecific
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
@@ -12,10 +13,22 @@ private module Cached {
|
||||
LevelStep() or
|
||||
CallStep() or
|
||||
ReturnStep() or
|
||||
deprecated StoreStep(TypeTrackerContent content) { basicStoreStep(_, _, content) } or
|
||||
deprecated LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or
|
||||
deprecated StoreStep(TypeTrackerContent content) {
|
||||
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
|
||||
basicStoreStep(_, _, dfc)
|
||||
)
|
||||
} or
|
||||
deprecated LoadStep(TypeTrackerContent content) {
|
||||
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
|
||||
basicLoadStep(_, _, dfc)
|
||||
)
|
||||
} or
|
||||
deprecated LoadStoreStep(TypeTrackerContent load, TypeTrackerContent store) {
|
||||
basicLoadStoreStep(_, _, load, store)
|
||||
exists(DataFlowPublic::AttributeContent dfcLoad, DataFlowPublic::AttributeContent dfcStore |
|
||||
dfcLoad.getAttribute() = load and dfcStore.getAttribute() = store
|
||||
|
|
||||
basicLoadStoreStep(_, _, dfcLoad, dfcStore)
|
||||
)
|
||||
} or
|
||||
deprecated WithContent(ContentFilter filter) { basicWithContentStep(_, _, filter) } or
|
||||
deprecated WithoutContent(ContentFilter filter) { basicWithoutContentStep(_, _, filter) } or
|
||||
@@ -29,13 +42,13 @@ private module Cached {
|
||||
// Restrict `content` to those that might eventually match a load.
|
||||
// We can't rely on `basicStoreStep` since `startInContent` might be used with
|
||||
// a content that has no corresponding store.
|
||||
exists(TypeTrackerContent loadContents |
|
||||
exists(DataFlowPublic::AttributeContent loadContents |
|
||||
(
|
||||
basicLoadStep(_, _, loadContents)
|
||||
or
|
||||
basicLoadStoreStep(_, _, loadContents, _)
|
||||
) and
|
||||
compatibleContents(content, loadContents)
|
||||
compatibleContents(content, loadContents.getAttribute())
|
||||
)
|
||||
}
|
||||
|
||||
@@ -45,13 +58,13 @@ private module Cached {
|
||||
content = noContent()
|
||||
or
|
||||
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
|
||||
exists(TypeTrackerContent storeContent |
|
||||
exists(DataFlowPublic::AttributeContent storeContent |
|
||||
(
|
||||
basicStoreStep(_, _, storeContent)
|
||||
or
|
||||
basicLoadStoreStep(_, _, _, storeContent)
|
||||
) and
|
||||
compatibleContents(storeContent, content)
|
||||
compatibleContents(storeContent.getAttribute(), content)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -198,7 +211,10 @@ private module Cached {
|
||||
flowsToStoreStep(nodeFrom, nodeTo, content) and
|
||||
summary = StoreStep(content)
|
||||
or
|
||||
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
|
||||
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
|
||||
basicLoadStep(nodeFrom, nodeTo, dfc)
|
||||
) and
|
||||
summary = LoadStep(content)
|
||||
)
|
||||
or
|
||||
exists(TypeTrackerContent loadContent, TypeTrackerContent storeContent |
|
||||
@@ -281,7 +297,12 @@ deprecated private predicate smallstepProj(Node nodeFrom, StepSummary summary) {
|
||||
deprecated private predicate flowsToStoreStep(
|
||||
Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent content
|
||||
) {
|
||||
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
|
||||
exists(Node obj |
|
||||
nodeTo.flowsTo(obj) and
|
||||
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
|
||||
basicStoreStep(nodeFrom, obj, dfc)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -292,7 +313,12 @@ deprecated private predicate flowsToLoadStoreStep(
|
||||
TypeTrackerContent storeContent
|
||||
) {
|
||||
exists(Node obj |
|
||||
nodeTo.flowsTo(obj) and basicLoadStoreStep(nodeFrom, obj, loadContent, storeContent)
|
||||
nodeTo.flowsTo(obj) and
|
||||
exists(DataFlowPublic::AttributeContent loadDfc, DataFlowPublic::AttributeContent storeDfc |
|
||||
loadDfc.getAttribute() = loadContent and storeDfc.getAttribute() = storeContent
|
||||
|
|
||||
basicLoadStoreStep(nodeFrom, obj, loadDfc, storeDfc)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ deprecated class OptionalTypeTrackerContent extends string {
|
||||
OptionalTypeTrackerContent() {
|
||||
this = ""
|
||||
or
|
||||
this instanceof TypeTrackingImpl::TypeTrackingInput::Content
|
||||
this = any(DataFlowPublic::AttributeContent dfc).getAttribute()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPr
|
||||
private import codeql.typetracking.internal.SummaryTypeTracker as SummaryTypeTracker
|
||||
private import semmle.python.dataflow.new.internal.FlowSummaryImpl as FlowSummaryImpl
|
||||
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
|
||||
private import semmle.python.dataflow.new.internal.IterableUnpacking as IterableUnpacking
|
||||
|
||||
private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
|
||||
// Dataflow nodes
|
||||
@@ -97,24 +98,25 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
|
||||
|
||||
private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow<SummaryTypeTrackerInput>;
|
||||
|
||||
/**
|
||||
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
|
||||
* using the name of the attribute for the corresponding content.
|
||||
*/
|
||||
private string getPossibleContentName() {
|
||||
Stages::TypeTracking::ref() and // the TypeTracking::append() etc. predicates that we want to cache depend on this predicate, so we can place the `ref()` call here to get around identical files.
|
||||
result = any(DataFlowPublic::AttrRef a).getAttributeName()
|
||||
}
|
||||
|
||||
module TypeTrackingInput implements Shared::TypeTrackingInput {
|
||||
class Node = DataFlowPublic::Node;
|
||||
|
||||
class LocalSourceNode = DataFlowPublic::LocalSourceNode;
|
||||
|
||||
class Content instanceof string {
|
||||
Content() { this = getPossibleContentName() }
|
||||
|
||||
string toString() { result = this }
|
||||
class Content extends DataFlowPublic::Content {
|
||||
Content() {
|
||||
// TODO: for now, it's not 100% clear if should support non-precise content in
|
||||
// type-tracking, or if it will lead to bad results. We start with only allowing
|
||||
// precise content, which should always be a good improvement! It also simplifies
|
||||
// the process of examining new results from non-precise content steps in the
|
||||
// future, since you will _only_ have to look over the results from the new
|
||||
// non-precise steps.
|
||||
this instanceof DataFlowPublic::AttributeContent
|
||||
or
|
||||
this instanceof DataFlowPublic::DictionaryElementContent
|
||||
or
|
||||
this instanceof DataFlowPublic::TupleElementContent
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -134,7 +136,27 @@ module TypeTrackingInput implements Shared::TypeTrackingInput {
|
||||
}
|
||||
|
||||
/** Holds if there is a simple local flow step from `nodeFrom` to `nodeTo` */
|
||||
predicate simpleLocalSmallStep = DataFlowPrivate::simpleLocalFlowStepForTypetracking/2;
|
||||
predicate simpleLocalSmallStep(Node nodeFrom, Node nodeTo) {
|
||||
DataFlowPrivate::simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo) and
|
||||
// for `for k,v in foo` no need to do local flow step from the synthetic sequence
|
||||
// node for `k,v` to the tuple `k,v` -- since type-tracking only supports one level
|
||||
// of content tracking, and there is one read-step from `foo` the synthetic sequence
|
||||
// node required, we can skip the flow step from the synthetic sequence node to the
|
||||
// tuple itself, since the read-step from the tuple to the tuple elements will not
|
||||
// matter.
|
||||
not (
|
||||
IterableUnpacking::iterableUnpackingForReadStep(_, _, nodeFrom) and
|
||||
IterableUnpacking::iterableUnpackingTupleFlowStep(nodeFrom, nodeTo)
|
||||
) and
|
||||
// for nested iterable unpacking, such as `[[a]] = foo` or `((a,b),) = bar`, we can
|
||||
// ignore the flow steps from the synthetic sequence node to the real sequence node,
|
||||
// since we only support one level of content in type-trackers, and the nested
|
||||
// structure requires two levels at least to be useful.
|
||||
not exists(SequenceNode outer |
|
||||
outer.getAnElement() = nodeTo.asCfgNode() and
|
||||
IterableUnpacking::iterableUnpackingTupleFlowStep(nodeFrom, nodeTo)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
|
||||
predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { none() }
|
||||
@@ -181,46 +203,68 @@ module TypeTrackingInput implements Shared::TypeTrackingInput {
|
||||
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
|
||||
*/
|
||||
predicate storeStep(Node nodeFrom, Node nodeTo, Content content) {
|
||||
exists(DataFlowPublic::AttrWrite a |
|
||||
a.mayHaveAttributeName(content) and
|
||||
exists(DataFlowPublic::AttrWrite a, string attrName |
|
||||
content.(DataFlowPublic::AttributeContent).getAttribute() = attrName and
|
||||
a.mayHaveAttributeName(attrName) and
|
||||
nodeFrom = a.getValue() and
|
||||
nodeTo = a.getObject()
|
||||
)
|
||||
or
|
||||
exists(DataFlowPublic::ContentSet contents |
|
||||
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
||||
// type-tracking doesn't really handle PostUpdateNodes, so for some assignment steps
|
||||
// like `my_dict["foo"] = foo` the data-flow step targets the PostUpdateNode for
|
||||
// `my_dict`, where we want to translate that into a type-tracking step that targets
|
||||
// the normal/non-PostUpdateNode for `my_dict`.
|
||||
exists(DataFlowPublic::Node storeTarget |
|
||||
DataFlowPrivate::storeStepCommon(nodeFrom, content, storeTarget)
|
||||
|
|
||||
TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents)
|
||||
)
|
||||
not storeTarget instanceof DataFlowPrivate::SyntheticPostUpdateNode and
|
||||
nodeTo = storeTarget
|
||||
or
|
||||
nodeTo = storeTarget.(DataFlowPrivate::SyntheticPostUpdateNode).getPreUpdateNode()
|
||||
) and
|
||||
// when only supporting precise content, no need for IterableElementNode (since it
|
||||
// is only fed set/list content)
|
||||
not nodeFrom instanceof DataFlowPublic::IterableElementNode
|
||||
or
|
||||
TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, content)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
|
||||
*/
|
||||
predicate loadStep(Node nodeFrom, LocalSourceNode nodeTo, Content content) {
|
||||
exists(DataFlowPublic::AttrRead a |
|
||||
a.mayHaveAttributeName(content) and
|
||||
exists(DataFlowPublic::AttrRead a, string attrName |
|
||||
content.(DataFlowPublic::AttributeContent).getAttribute() = attrName and
|
||||
a.mayHaveAttributeName(attrName) and
|
||||
nodeFrom = a.getObject() and
|
||||
nodeTo = a
|
||||
)
|
||||
or
|
||||
exists(DataFlowPublic::ContentSet contents |
|
||||
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
||||
|
|
||||
TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents)
|
||||
DataFlowPrivate::readStepCommon(nodeFrom, content, nodeTo) and
|
||||
// Since we only support one level of content in type-trackers we don't actually
|
||||
// support `(aa, ab), (ba, bb) = ...`. Therefore we exclude the read-step from `(aa,
|
||||
// ab)` to `aa` (since it is not needed).
|
||||
not exists(SequenceNode outer |
|
||||
outer.getAnElement() = nodeFrom.asCfgNode() and
|
||||
IterableUnpacking::iterableUnpackingTupleFlowStep(_, nodeFrom)
|
||||
) and
|
||||
// Again, due to only supporting one level deep, for `for (k,v) in ...` we exclude read-step from
|
||||
// the tuple to `k` and `v`.
|
||||
not exists(DataFlowPublic::IterableSequenceNode seq, DataFlowPublic::IterableElementNode elem |
|
||||
IterableUnpacking::iterableUnpackingForReadStep(_, _, seq) and
|
||||
IterableUnpacking::iterableUnpackingConvertingReadStep(seq, _, elem) and
|
||||
IterableUnpacking::iterableUnpackingConvertingStoreStep(elem, _, nodeFrom) and
|
||||
nodeFrom.asCfgNode() instanceof SequenceNode
|
||||
)
|
||||
or
|
||||
TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, content)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
|
||||
*/
|
||||
predicate loadStoreStep(Node nodeFrom, Node nodeTo, Content loadContent, Content storeContent) {
|
||||
exists(DataFlowPublic::ContentSet loadContents, DataFlowPublic::ContentSet storeContents |
|
||||
loadContents.(DataFlowPublic::AttributeContent).getAttribute() = loadContent and
|
||||
storeContents.(DataFlowPublic::AttributeContent).getAttribute() = storeContent
|
||||
|
|
||||
TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContents, storeContents)
|
||||
)
|
||||
TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContent, storeContent)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user