diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPrivate.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPrivate.qll index 5a2e3f1145b..815df034a5a 100644 --- a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPrivate.qll +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPrivate.qll @@ -128,6 +128,8 @@ private module Cached { n = member.getBase() and not member.isStatic() ) + or + n = any(CfgNodes::ExprNodes::IndexCfgNode index).getBase() } cached @@ -219,7 +221,16 @@ private module Cached { } cached - newtype TContentSet = TSingletonContent(Content c) + newtype TContentSet = + TSingletonContent(Content c) or + TAnyElementContent() or + TKnownOrUnknownElementContent(Content::KnownElementContent c) + + private predicate trackKnownValue(ConstantValue cv) { + exists(cv.asString()) + or + cv.asInt() = [0 .. 10] + } cached newtype TContent = @@ -227,15 +238,34 @@ private module Cached { name = any(PropertyMember member).getName() or name = any(MemberExpr me).getMemberName() - } + } or + TKnownElementContent(ConstantValue cv) { trackKnownValue(cv) } or + TUnknownElementContent() cached - newtype TContentApprox = TNonElementContentApprox(Content c) + newtype TContentApprox = + TNonElementContentApprox(Content c) { not c instanceof Content::ElementContent } or + TUnknownElementContentApprox() or + TKnownIntegerElementContentApprox() or + TKnownElementContentApprox(string approx) { approx = approxKnownElementIndex(_) } cached newtype TDataFlowType = TUnknownDataFlowType() } +class TElementContent = TKnownElementContent or TUnknownElementContent; + +/** Gets a string for approximating known element indices. */ +private string approxKnownElementIndex(ConstantValue cv) { + not exists(cv.asInt()) and + exists(string s | s = cv.serialize() | + s.length() < 2 and + result = s + or + result = s.prefix(2) + ) +} + import Cached /** Holds if `n` should be hidden from path explanations. */ @@ -477,26 +507,54 @@ predicate jumpStep(Node pred, Node succ) { * content `c`. */ predicate storeStep(Node node1, ContentSet c, Node node2) { - node2.(PostUpdateNode).getPreUpdateNode().asExpr() = - any(CfgNodes::ExprNodes::MemberCfgNode var | - exists(CfgNodes::StmtNodes::AssignStmtCfgNode assign | - var = assign.getLeftHandSide() and - node1.asStmt() = assign.getRightHandSide() - | - c.isSingleton(any(Content::FieldContent ct | ct.getName() = var.getMemberName())) - ) - ).getBase() + exists(CfgNodes::ExprNodes::MemberCfgWriteAccessNode var, Content::FieldContent fc | + node2.(PostUpdateNode).getPreUpdateNode().asExpr() = var.getBase() and + node1.asStmt() = var.getAssignStmt().getRightHandSide() and + fc.getName() = var.getMemberName() and + c.isSingleton(fc) + ) + or + exists( + CfgNodes::ExprNodes::IndexCfgWriteNode var, Content::KnownElementContent ec, int index, + CfgNodes::ExprCfgNode e + | + node2.(PostUpdateNode).getPreUpdateNode().asExpr() = var.getBase() and + node1.asStmt() = var.getAssignStmt().getRightHandSide() and + c.isKnownOrUnknownElement(ec) and + index = ec.getIndex().asInt() and + e = var.getIndex() + | + index = e.getValue().asInt() + or + not exists(e.getValue().asInt()) + ) } /** * Holds if there is a read step of content `c` from `node1` to `node2`. */ predicate readStep(Node node1, ContentSet c, Node node2) { - node2.asExpr() = - any(CfgNodes::ExprNodes::MemberCfgReadAccessNode var | - node1.asExpr() = var.getBase() and - c.isSingleton(any(Content::FieldContent ct | ct.getName() = var.getMemberName())) - ) + exists(CfgNodes::ExprNodes::MemberCfgReadAccessNode var, Content::FieldContent fc | + node2.asExpr() = var and + node1.asExpr() = var.getBase() and + fc.getName() = var.getMemberName() and + c.isSingleton(fc) + ) + or + exists( + CfgNodes::ExprNodes::IndexCfgReadNode var, Content::KnownElementContent ec, int index, + CfgNodes::ExprCfgNode e + | + node2.asExpr() = var and + node1.asExpr() = var.getBase() and + c.isKnownOrUnknownElement(ec) and + index = ec.getIndex().asInt() and + e = var.getIndex() + | + index = e.getValue().asInt() + or + not exists(e.getValue().asInt()) + ) } /** @@ -584,7 +642,7 @@ class DataFlowExpr = CfgNodes::ExprCfgNode; * Holds if access paths with `c` at their head always should be tracked at high * precision. This disables adaptive access path precision for such access paths. */ -predicate forceHighPrecision(Content c) { none() } +predicate forceHighPrecision(Content c) { c instanceof Content::ElementContent } class NodeRegion instanceof Unit { string toString() { result = "NodeRegion" } @@ -653,7 +711,18 @@ class ContentApprox extends TContentApprox { } /** Gets an approximated value for content `c`. */ -ContentApprox getContentApprox(Content c) { result = TNonElementContentApprox(c) } +ContentApprox getContentApprox(Content c) { + c instanceof Content::UnknownElementContent and + result = TUnknownElementContentApprox() + or + exists(c.(Content::KnownElementContent).getIndex().asInt()) and + result = TKnownIntegerElementContentApprox() + or + result = + TKnownElementContentApprox(approxKnownElementIndex(c.(Content::KnownElementContent).getIndex())) + or + result = TNonElementContentApprox(c) +} /** * A unit class for adding additional jump steps. diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPublic.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPublic.qll index 5dd19b1eabe..03ceb54bfd2 100644 --- a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPublic.qll +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPublic.qll @@ -169,6 +169,26 @@ class Content extends TContent { /** Provides different sub classes of `Content`. */ module Content { + /** An element in a collection, for example an element in an array or in a hash. */ + class ElementContent extends Content, TElementContent { } + + /** An element in a collection at a known index. */ + class KnownElementContent extends ElementContent, TKnownElementContent { + private ConstantValue cv; + + KnownElementContent() { this = TKnownElementContent(cv) } + + /** Gets the index in the collection. */ + ConstantValue getIndex() { result = cv } + + override string toString() { result = "element " + cv } + } + + /** An element in a collection at an unknown index. */ + class UnknownElementContent extends ElementContent, TUnknownElementContent { + override string toString() { result = "element" } + } + /** A field of an object. */ class FieldContent extends Content, TFieldContent { private string name; @@ -192,19 +212,66 @@ class ContentSet extends TContentSet { /** Holds if this content set is the singleton `{c}`. */ predicate isSingleton(Content c) { this = TSingletonContent(c) } + /** Holds if this content set represents all `ElementContent`s. */ + predicate isAnyElement() { this = TAnyElementContent() } + + /** + * Holds if this content set represents a specific known element index, or an + * unknown element index. + */ + predicate isKnownOrUnknownElement(Content::KnownElementContent c) { + this = TKnownOrUnknownElementContent(c) + } + /** Gets a textual representation of this content set. */ string toString() { exists(Content c | this.isSingleton(c) and result = c.toString() ) + or + this.isAnyElement() and + result = "any element" + or + exists(Content::KnownElementContent c | + this.isKnownOrUnknownElement(c) and + result = c + " or unknown" + ) } - /** Gets a content that may be stored into when storing into this set. */ - Content getAStoreContent() { this.isSingleton(result) } + Content getAStoreContent() { + this.isSingleton(result) + or + // For reverse stores, `a[unknown][0] = x`, it is important that the read-step + // from `a` to `a[unknown]` (which can read any element), gets translated into + // a reverse store step that store only into `?` + this.isAnyElement() and + result = TUnknownElementContent() + or + // For reverse stores, `a[1][0] = x`, it is important that the read-step + // from `a` to `a[1]` (which can read both elements stored at exactly index `1` + // and elements stored at unknown index), gets translated into a reverse store + // step that store only into `1` + this.isKnownOrUnknownElement(result) + } + + pragma[nomagic] + private Content getAnElementReadContent() { + exists(Content::KnownElementContent c | this.isKnownOrUnknownElement(c) | + result = c or + result = TUnknownElementContent() + ) + } /** Gets a content that may be read from when reading from this set. */ - Content getAReadContent() { this.isSingleton(result) } + Content getAReadContent() { + this.isSingleton(result) + or + this.isAnyElement() and + result instanceof Content::ElementContent + or + result = this.getAnElementReadContent() + } } /** diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/TaintTrackingPrivate.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/TaintTrackingPrivate.qll index 08f53d78bc1..ac6a32de36d 100644 --- a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/TaintTrackingPrivate.qll +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/TaintTrackingPrivate.qll @@ -30,7 +30,19 @@ private module Cached { */ cached predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) { - none() + // Although flow through collections is modeled precisely using stores/reads, we still + // allow flow out of a _tainted_ collection. This is needed in order to support taint- + // tracking configurations where the source is a collection. + exists(DataFlow::ContentSet c | readStep(nodeFrom, c, nodeTo) | + c.isSingleton(any(DataFlow::Content::ElementContent ec)) + or + c.isKnownOrUnknownElement(_) + // or + // TODO: We do't generate this one from readSteps yet, but we will as + // soon as we start on models-as-data. + // c.isAnyElement() + ) and + model = "" } /** diff --git a/powershell/ql/lib/semmle/code/powershell/typetracking/internal/TypeTrackingImpl.qll b/powershell/ql/lib/semmle/code/powershell/typetracking/internal/TypeTrackingImpl.qll index b806c9400f2..e2d21739a57 100644 --- a/powershell/ql/lib/semmle/code/powershell/typetracking/internal/TypeTrackingImpl.qll +++ b/powershell/ql/lib/semmle/code/powershell/typetracking/internal/TypeTrackingImpl.qll @@ -53,11 +53,21 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { class ContentFilter = TypeTrackingInput::ContentFilter; ContentFilter getFilterFromWithoutContentStep(Content content) { - none() // TODO + ( + content.isAnyElement() + or + content.isSingleton(any(DataFlow::Content::UnknownElementContent c)) + ) and + result = MkElementFilter() } ContentFilter getFilterFromWithContentStep(Content content) { - none() // TODO + ( + content.isAnyElement() + or + content.isSingleton(any(DataFlow::Content::ElementContent c)) + ) and + result = MkElementFilter() } // Summaries and their stacks