PS: Add element content flow for reads and writes into arrays.

This commit is contained in:
Mathias Vorreiter Pedersen
2024-10-04 19:18:52 +01:00
parent 1f558a0b7f
commit cbfd0b363b
4 changed files with 183 additions and 25 deletions

View File

@@ -128,6 +128,8 @@ private module Cached {
n = member.getBase() and
not member.isStatic()
)
or
n = any(CfgNodes::ExprNodes::IndexCfgNode index).getBase()
}
cached
@@ -219,7 +221,16 @@ private module Cached {
}
cached
newtype TContentSet = TSingletonContent(Content c)
newtype TContentSet =
TSingletonContent(Content c) or
TAnyElementContent() or
TKnownOrUnknownElementContent(Content::KnownElementContent c)
private predicate trackKnownValue(ConstantValue cv) {
exists(cv.asString())
or
cv.asInt() = [0 .. 10]
}
cached
newtype TContent =
@@ -227,15 +238,34 @@ private module Cached {
name = any(PropertyMember member).getName()
or
name = any(MemberExpr me).getMemberName()
}
} or
TKnownElementContent(ConstantValue cv) { trackKnownValue(cv) } or
TUnknownElementContent()
cached
newtype TContentApprox = TNonElementContentApprox(Content c)
newtype TContentApprox =
TNonElementContentApprox(Content c) { not c instanceof Content::ElementContent } or
TUnknownElementContentApprox() or
TKnownIntegerElementContentApprox() or
TKnownElementContentApprox(string approx) { approx = approxKnownElementIndex(_) }
cached
newtype TDataFlowType = TUnknownDataFlowType()
}
class TElementContent = TKnownElementContent or TUnknownElementContent;
/** Gets a string for approximating known element indices. */
private string approxKnownElementIndex(ConstantValue cv) {
not exists(cv.asInt()) and
exists(string s | s = cv.serialize() |
s.length() < 2 and
result = s
or
result = s.prefix(2)
)
}
import Cached
/** Holds if `n` should be hidden from path explanations. */
@@ -477,26 +507,54 @@ predicate jumpStep(Node pred, Node succ) {
* content `c`.
*/
predicate storeStep(Node node1, ContentSet c, Node node2) {
node2.(PostUpdateNode).getPreUpdateNode().asExpr() =
any(CfgNodes::ExprNodes::MemberCfgNode var |
exists(CfgNodes::StmtNodes::AssignStmtCfgNode assign |
var = assign.getLeftHandSide() and
node1.asStmt() = assign.getRightHandSide()
|
c.isSingleton(any(Content::FieldContent ct | ct.getName() = var.getMemberName()))
)
).getBase()
exists(CfgNodes::ExprNodes::MemberCfgWriteAccessNode var, Content::FieldContent fc |
node2.(PostUpdateNode).getPreUpdateNode().asExpr() = var.getBase() and
node1.asStmt() = var.getAssignStmt().getRightHandSide() and
fc.getName() = var.getMemberName() and
c.isSingleton(fc)
)
or
exists(
CfgNodes::ExprNodes::IndexCfgWriteNode var, Content::KnownElementContent ec, int index,
CfgNodes::ExprCfgNode e
|
node2.(PostUpdateNode).getPreUpdateNode().asExpr() = var.getBase() and
node1.asStmt() = var.getAssignStmt().getRightHandSide() and
c.isKnownOrUnknownElement(ec) and
index = ec.getIndex().asInt() and
e = var.getIndex()
|
index = e.getValue().asInt()
or
not exists(e.getValue().asInt())
)
}
/**
* Holds if there is a read step of content `c` from `node1` to `node2`.
*/
predicate readStep(Node node1, ContentSet c, Node node2) {
node2.asExpr() =
any(CfgNodes::ExprNodes::MemberCfgReadAccessNode var |
node1.asExpr() = var.getBase() and
c.isSingleton(any(Content::FieldContent ct | ct.getName() = var.getMemberName()))
)
exists(CfgNodes::ExprNodes::MemberCfgReadAccessNode var, Content::FieldContent fc |
node2.asExpr() = var and
node1.asExpr() = var.getBase() and
fc.getName() = var.getMemberName() and
c.isSingleton(fc)
)
or
exists(
CfgNodes::ExprNodes::IndexCfgReadNode var, Content::KnownElementContent ec, int index,
CfgNodes::ExprCfgNode e
|
node2.asExpr() = var and
node1.asExpr() = var.getBase() and
c.isKnownOrUnknownElement(ec) and
index = ec.getIndex().asInt() and
e = var.getIndex()
|
index = e.getValue().asInt()
or
not exists(e.getValue().asInt())
)
}
/**
@@ -584,7 +642,7 @@ class DataFlowExpr = CfgNodes::ExprCfgNode;
* Holds if access paths with `c` at their head always should be tracked at high
* precision. This disables adaptive access path precision for such access paths.
*/
predicate forceHighPrecision(Content c) { none() }
predicate forceHighPrecision(Content c) { c instanceof Content::ElementContent }
class NodeRegion instanceof Unit {
string toString() { result = "NodeRegion" }
@@ -653,7 +711,18 @@ class ContentApprox extends TContentApprox {
}
/** Gets an approximated value for content `c`. */
ContentApprox getContentApprox(Content c) { result = TNonElementContentApprox(c) }
ContentApprox getContentApprox(Content c) {
c instanceof Content::UnknownElementContent and
result = TUnknownElementContentApprox()
or
exists(c.(Content::KnownElementContent).getIndex().asInt()) and
result = TKnownIntegerElementContentApprox()
or
result =
TKnownElementContentApprox(approxKnownElementIndex(c.(Content::KnownElementContent).getIndex()))
or
result = TNonElementContentApprox(c)
}
/**
* A unit class for adding additional jump steps.

View File

@@ -169,6 +169,26 @@ class Content extends TContent {
/** Provides different sub classes of `Content`. */
module Content {
/** An element in a collection, for example an element in an array or in a hash. */
class ElementContent extends Content, TElementContent { }
/** An element in a collection at a known index. */
class KnownElementContent extends ElementContent, TKnownElementContent {
private ConstantValue cv;
KnownElementContent() { this = TKnownElementContent(cv) }
/** Gets the index in the collection. */
ConstantValue getIndex() { result = cv }
override string toString() { result = "element " + cv }
}
/** An element in a collection at an unknown index. */
class UnknownElementContent extends ElementContent, TUnknownElementContent {
override string toString() { result = "element" }
}
/** A field of an object. */
class FieldContent extends Content, TFieldContent {
private string name;
@@ -192,19 +212,66 @@ class ContentSet extends TContentSet {
/** Holds if this content set is the singleton `{c}`. */
predicate isSingleton(Content c) { this = TSingletonContent(c) }
/** Holds if this content set represents all `ElementContent`s. */
predicate isAnyElement() { this = TAnyElementContent() }
/**
* Holds if this content set represents a specific known element index, or an
* unknown element index.
*/
predicate isKnownOrUnknownElement(Content::KnownElementContent c) {
this = TKnownOrUnknownElementContent(c)
}
/** Gets a textual representation of this content set. */
string toString() {
exists(Content c |
this.isSingleton(c) and
result = c.toString()
)
or
this.isAnyElement() and
result = "any element"
or
exists(Content::KnownElementContent c |
this.isKnownOrUnknownElement(c) and
result = c + " or unknown"
)
}
/** Gets a content that may be stored into when storing into this set. */
Content getAStoreContent() { this.isSingleton(result) }
Content getAStoreContent() {
this.isSingleton(result)
or
// For reverse stores, `a[unknown][0] = x`, it is important that the read-step
// from `a` to `a[unknown]` (which can read any element), gets translated into
// a reverse store step that store only into `?`
this.isAnyElement() and
result = TUnknownElementContent()
or
// For reverse stores, `a[1][0] = x`, it is important that the read-step
// from `a` to `a[1]` (which can read both elements stored at exactly index `1`
// and elements stored at unknown index), gets translated into a reverse store
// step that store only into `1`
this.isKnownOrUnknownElement(result)
}
pragma[nomagic]
private Content getAnElementReadContent() {
exists(Content::KnownElementContent c | this.isKnownOrUnknownElement(c) |
result = c or
result = TUnknownElementContent()
)
}
/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() { this.isSingleton(result) }
Content getAReadContent() {
this.isSingleton(result)
or
this.isAnyElement() and
result instanceof Content::ElementContent
or
result = this.getAnElementReadContent()
}
}
/**

View File

@@ -30,7 +30,19 @@ private module Cached {
*/
cached
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
none()
// Although flow through collections is modeled precisely using stores/reads, we still
// allow flow out of a _tainted_ collection. This is needed in order to support taint-
// tracking configurations where the source is a collection.
exists(DataFlow::ContentSet c | readStep(nodeFrom, c, nodeTo) |
c.isSingleton(any(DataFlow::Content::ElementContent ec))
or
c.isKnownOrUnknownElement(_)
// or
// TODO: We do't generate this one from readSteps yet, but we will as
// soon as we start on models-as-data.
// c.isAnyElement()
) and
model = ""
}
/**

View File

@@ -53,11 +53,21 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
class ContentFilter = TypeTrackingInput::ContentFilter;
ContentFilter getFilterFromWithoutContentStep(Content content) {
none() // TODO
(
content.isAnyElement()
or
content.isSingleton(any(DataFlow::Content::UnknownElementContent c))
) and
result = MkElementFilter()
}
ContentFilter getFilterFromWithContentStep(Content content) {
none() // TODO
(
content.isAnyElement()
or
content.isSingleton(any(DataFlow::Content::ElementContent c))
) and
result = MkElementFilter()
}
// Summaries and their stacks