Files
codeql/javascript/ql/lib/semmle/javascript/dataflow/Configuration.qll

1999 lines
68 KiB
Plaintext

/**
* Provides a class for performing customized inter-procedural data flow.
*
* The class in this module provides an interface for performing inter-procedural
* data flow from a custom set of source nodes to a custom set of sink nodes.
* Additional data flow edges can be specified, and conversely certain nodes or
* edges can be designated as _barriers_ that block flow.
*
* NOTE: The API of this library is not stable yet and may change in
* the future.
*
*
* # Technical overview
*
* This module implements a summarization-based inter-procedural data flow
* analysis. Data flow is tracked through local variables, imports and (some)
* object properties, as well as into and out of function calls. The latter
* is done by computing function summaries that record which function parameters
* and captured variables may flow into the function's return value.
*
* For example, for the function
*
* ```
* function choice(b, x, y) {
* return b ? x : y;
* }
* ```
*
* we determine that its second and third (but not the first) parameter may
* flow into its return value.
*
* Hence when we see a call `a = choice(b, c, d)`, we propagate flow from `c`
* to `a` and from `d` to `a` (but not from `b` to `a`).
*
* The inter-procedural data flow graph is represented by class `PathNode`
* and its member predicate `getASuccessor`. Each `PathNode` is a pair
* of an underlying `DataFlow::Node` and a `DataFlow::Configuration`, which
* can be accessed through member predicates `getNode` and `getConfiguration`,
* respectively.
*
* # Implementation details
*
* Overall, flow is tracked forwards, starting at the sources and looking
* for an inter-procedural path to a sink.
*
* Function summaries are computed by predicate `flowThroughCall`.
* Predicate `flowStep` computes a "one-step" flow relation, where, however,
* a single step may be based on a function summary, and hence already involve
* inter-procedural flow.
*
* Flow steps are classified as being "call", "return" or "level": a call step
* goes from an argument to a parameter, an return step from a return to a caller,
* and a level step is either a step that does not involve function calls
* or a step through a summary.
*
* Predicate `reachableFromSource` computes inter-procedural paths from
* sources along the `flowStep` relation, keeping track of whether any of
* these steps is a call step. Return steps are only allowed if no previous
* step was a call step to avoid confusion between different call sites.
*
* Predicate `onPath` builds on `reachableFromSource` to compute full
* paths from sources to sinks, this time starting with the sinks. Similar
* to above, it keeps track of whether any of the steps from a node to a
* sink is a return step, and only considers call steps for paths that do
* not contain a return step.
*
* Finally, we build `PathNode`s for all nodes that appear on a path
* computed by `onPath`.
*/
private import javascript
private import internal.FlowSteps
private import internal.AccessPaths
private import internal.CallGraphs
private import semmle.javascript.Unit
private import semmle.javascript.internal.CachedStages
/**
* A data flow tracking configuration for finding inter-procedural paths from
* sources to sinks.
*
* Each use of the data flow tracking library must define its own unique extension
* of this abstract class. A configuration defines a set of relevant sources
* (`isSource`) and sinks (`isSink`), and may additionally
* define additional edges beyond the standard data flow edges (`isAdditionalFlowStep`)
* and prohibit intermediate flow nodes and edges (`isBarrier`).
*/
abstract class Configuration extends string {
bindingset[this]
Configuration() { any() }
/**
* Gets the unique identifier of this configuration among all data flow tracking
* configurations.
*/
string getId() { result = this }
/**
* Holds if `source` is a relevant data flow source for this configuration.
*/
predicate isSource(DataFlow::Node source) { none() }
/**
* Gets the flow label to associate with sources added by the 1-argument `isSource` predicate.
*
* For taint-tracking configurations, this defaults to `taint` and for other data-flow configurations
* it defaults to `data`.
*
* Overriding this predicate is rarely needed, and overriding the 2-argument `isSource` predicate
* should be preferred when possible.
*/
FlowLabel getDefaultSourceLabel() { result = FlowLabel::data() }
/**
* Holds if `source` is a source of flow labeled with `lbl` that is relevant
* for this configuration.
*/
predicate isSource(DataFlow::Node source, FlowLabel lbl) { none() }
/**
* Holds if `sink` is a relevant data flow sink for this configuration.
*/
predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a sink of flow labeled with `lbl` that is relevant
* for this configuration.
*/
predicate isSink(DataFlow::Node sink, FlowLabel lbl) { none() }
/**
* Holds if `src -> trg` should be considered as a flow edge
* in addition to standard data flow edges.
*/
predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg) { none() }
/**
* INTERNAL: This predicate should not normally be used outside the data flow
* library.
*
* Holds if `src -> trg` should be considered as a flow edge
* in addition to standard data flow edges, with `valuePreserving`
* indicating whether the step preserves values or just taintedness.
*/
predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg, boolean valuePreserving) {
isAdditionalFlowStep(src, trg) and valuePreserving = true
}
/**
* Holds if `src -> trg` is a flow edge converting flow with label `inlbl` to
* flow with label `outlbl`.
*/
predicate isAdditionalFlowStep(
DataFlow::Node src, DataFlow::Node trg, FlowLabel inlbl, FlowLabel outlbl
) {
none()
}
/**
* Holds if the intermediate flow node `node` is prohibited.
*/
predicate isBarrier(DataFlow::Node node) {
exists(BarrierGuardNode guard |
isBarrierGuardInternal(guard) and
barrierGuardBlocksNode(guard, node, "")
)
}
/**
* Holds if flow from `pred` to `succ` is prohibited.
*/
predicate isBarrierEdge(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if flow with label `lbl` cannot flow from `pred` to `succ`.
*/
predicate isBarrierEdge(DataFlow::Node pred, DataFlow::Node succ, FlowLabel lbl) { none() }
/**
* Holds if flow with label `lbl` cannot flow into `node`.
*/
predicate isLabeledBarrier(DataFlow::Node node, FlowLabel lbl) {
exists(BarrierGuardNode guard |
isBarrierGuardInternal(guard) and
barrierGuardBlocksNode(guard, node, lbl)
)
or
none() // relax type inference to account for overriding
}
/**
* Holds if data flow node `guard` can act as a barrier when appearing
* in a condition.
*
* For example, if `guard` is the comparison expression in
* `if(x == 'some-constant'){ ... x ... }`, it could block flow of
* `x` into the "then" branch.
*/
predicate isBarrierGuard(BarrierGuardNode guard) { none() }
/**
* Holds if `guard` is a barrier guard for this configuration, added through
* `isBarrierGuard` or `AdditionalBarrierGuardNode`.
*/
pragma[nomagic]
private predicate isBarrierGuardInternal(BarrierGuardNode guard) {
isBarrierGuard(guard)
or
guard.(AdditionalBarrierGuardNode).appliesTo(this)
}
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
isSource(_, this, _) and
isSink(_, this, _) and
exists(SourcePathNode flowsource, SinkPathNode flowsink |
hasFlowPath(flowsource, flowsink) and
source = flowsource.getNode() and
sink = flowsink.getNode()
)
}
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
predicate hasFlowPath(SourcePathNode source, SinkPathNode sink) {
flowsTo(source, _, sink, _, this)
}
/**
* EXPERIMENTAL. This API may change in the future.
*
* Holds if `pred` should be stored in the object `succ` under the property `prop`.
* The object `succ` must be a `DataFlow::SourceNode` for the object wherein the value is stored.
*/
predicate isAdditionalStoreStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
none()
}
/**
* EXPERIMENTAL. This API may change in the future.
*
* Holds if the property `prop` of the object `pred` should be loaded into `succ`.
*/
predicate isAdditionalLoadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }
/**
* EXPERIMENTAL. This API may change in the future.
*
* Holds if the property `prop` should be copied from the object `pred` to the object `succ`.
*/
predicate isAdditionalLoadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
none()
}
/**
* EXPERIMENTAL. This API may change in the future.
*
* Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
*/
predicate isAdditionalLoadStoreStep(
DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
) {
none()
}
}
/**
* A label describing the kind of information tracked by a flow configuration.
*
* There are two standard labels "data" and "taint".
* - "data" only propagates along value-preserving data flow, such as assignments
* and parameter-passing, and is the default flow source for a `DataFlow::Configuration`.
* - "taint" additionally permits flow through transformations such as string operations,
* and is the default flow source for a `TaintTracking::Configuration`.
*/
abstract class FlowLabel extends string {
bindingset[this]
FlowLabel() { any() }
/**
* Holds if this is the standard `FlowLabel::data()` flow label,
* describing values that directly originate from a flow source.
*/
final predicate isData() { this = FlowLabel::data() }
/**
* Holds if this is the standard `FlowLabel::taint()` flow label,
* describing values that are influenced ("tainted") by a flow
* source, but not necessarily directly derived from it.
*/
final predicate isTaint() { this = FlowLabel::taint() }
/**
* Holds if this is one of the standard flow labels `FlowLabel::data()`
* or `FlowLabel::taint()`.
*/
final predicate isDataOrTaint() { isData() or isTaint() }
}
/**
* A kind of taint tracked by a taint-tracking configuration.
*
* This is an alias of `FlowLabel`, so the two types can be used interchangeably.
*/
class TaintKind = FlowLabel;
/**
* A standard flow label, that is, either `FlowLabel::data()` or `FlowLabel::taint()`.
*/
class StandardFlowLabel extends FlowLabel {
StandardFlowLabel() { this = "data" or this = "taint" }
}
module FlowLabel {
/**
* Gets the standard flow label for describing values that directly originate from a flow source.
*/
FlowLabel data() { result = "data" }
/**
* Gets the standard flow label for describing values that are influenced ("tainted") by a flow
* source, but not necessarily directly derived from it.
*/
FlowLabel taint() { result = "taint" }
}
/**
* A node that can act as a barrier when appearing in a condition.
*
* To add a barrier guard to a configuration, define a subclass of this class overriding the
* `blocks` predicate, and then extend the configuration's `isBarrierGuard` predicate to include
* the new class.
*
* Note that it is generally a good idea to make the characteristic predicate of barrier guard
* classes as precise as possible: if two subclasses of `BarrierGuardNode` overlap, their
* implementations of `blocks` will _both_ apply to any configuration that includes either of them.
*/
abstract class BarrierGuardNode extends DataFlow::Node {
/**
* Holds if this node blocks expression `e` provided it evaluates to `outcome`.
*
* This will block all flow labels.
*/
abstract predicate blocks(boolean outcome, Expr e);
/**
* Holds if this node blocks expression `e` from flow of type `label`, provided it evaluates to `outcome`.
*/
predicate blocks(boolean outcome, Expr e, FlowLabel label) { none() }
}
/**
* Holds if data flow node `guard` acts as a barrier for data flow.
*
* `label` is bound to the blocked label, or the empty string if all labels should be blocked.
*/
pragma[nomagic]
private predicate barrierGuardBlocksExpr(
BarrierGuardNode guard, boolean outcome, Expr test, string label
) {
guard.blocks(outcome, test) and label = ""
or
guard.blocks(outcome, test, label)
or
// Handle labelled barrier guard functions specially, to avoid negative recursion
// through the non-abstract 3-argument version of blocks().
guard.(AdditionalBarrierGuardCall).internalBlocksLabel(outcome, test, label)
}
/**
* Holds if `guard` may block the flow of a value reachable through exploratory flow.
*/
pragma[nomagic]
private predicate barrierGuardIsRelevant(BarrierGuardNode guard) {
exists(Expr e |
barrierGuardBlocksExpr(guard, _, e, _) and
isRelevantForward(e.flow(), _)
)
}
/**
* Holds if data flow node `guard` acts as a barrier for data flow due to aliasing through
* an access path.
*
* `label` is bound to the blocked label, or the empty string if all labels should be blocked.
*/
pragma[nomagic]
private predicate barrierGuardBlocksAccessPath(
BarrierGuardNode guard, boolean outcome, AccessPath ap, string label
) {
barrierGuardIsRelevant(guard) and
barrierGuardBlocksExpr(guard, outcome, ap.getAnInstance(), label)
}
/**
* Holds if there exists an input variable of `ref` that blocks the label `label`.
*
* This predicate is outlined to give the optimizer a hint about the join ordering.
*/
pragma[nomagic]
private predicate barrierGuardBlocksSsaRefinement(
BarrierGuardNode guard, boolean outcome, SsaRefinementNode ref, string label
) {
barrierGuardIsRelevant(guard) and
guard.getEnclosingExpr() = ref.getGuard().getTest() and
forex(SsaVariable input | input = ref.getAnInput() |
barrierGuardBlocksExpr(guard, outcome, input.getAUse(), label)
)
}
/**
* Holds if the result of `guard` is used in the branching condition `cond`.
*
* `outcome` is bound to the outcome of `cond` for join-ordering purposes.
*/
pragma[nomagic]
private predicate barrierGuardUsedInCondition(
BarrierGuardNode guard, ConditionGuardNode cond, boolean outcome
) {
barrierGuardIsRelevant(guard) and
outcome = cond.getOutcome() and
(
cond.getTest() = guard.getEnclosingExpr()
or
cond.getTest().flow().getImmediatePredecessor+() = guard
)
}
/**
* Holds if data flow node `nd` acts as a barrier for data flow, possibly due to aliasing
* through an access path.
*
* `label` is bound to the blocked label, or the empty string if all labels should be blocked.
*/
pragma[nomagic]
private predicate barrierGuardBlocksNode(BarrierGuardNode guard, DataFlow::Node nd, string label) {
// 1) `nd` is a use of a refinement node that blocks its input variable
exists(SsaRefinementNode ref, boolean outcome |
nd = DataFlow::ssaDefinitionNode(ref) and
outcome = ref.getGuard().(ConditionGuardNode).getOutcome() and
barrierGuardBlocksSsaRefinement(guard, outcome, ref, label)
)
or
// 2) `nd` is an instance of an access path `p`, and dominated by a barrier for `p`
exists(AccessPath p, BasicBlock bb, ConditionGuardNode cond, boolean outcome |
nd = DataFlow::valueNode(p.getAnInstanceIn(bb)) and
barrierGuardUsedInCondition(guard, cond, outcome) and
barrierGuardBlocksAccessPath(guard, outcome, p, label) and
cond.dominates(bb)
)
}
/**
* Holds if `guard` should block flow along the edge `pred -> succ`.
*
* `label` is bound to the blocked label, or the empty string if all labels should be blocked.
*/
pragma[nomagic]
private predicate barrierGuardBlocksEdge(
BarrierGuardNode guard, DataFlow::Node pred, DataFlow::Node succ, string label
) {
exists(
SsaVariable input, SsaPhiNode phi, BasicBlock bb, ConditionGuardNode cond, boolean outcome
|
bb = getADominatedBasicBlock(guard, cond) and
pred = DataFlow::ssaDefinitionNode(input) and
succ = DataFlow::ssaDefinitionNode(phi) and
input = phi.getInputFromBlock(bb) and
outcome = cond.getOutcome() and
barrierGuardBlocksExpr(guard, outcome, input.getAUse(), label)
)
}
/**
* Gets a basicblock that is dominated by `cond`, where the test for `cond` cond is `guard`.
*
* This predicate exists to get a better join-order for the `barrierGuardBlocksEdge` predicate above.
*/
pragma[noinline]
private BasicBlock getADominatedBasicBlock(BarrierGuardNode guard, ConditionGuardNode cond) {
barrierGuardIsRelevant(guard) and
guard.getEnclosingExpr() = cond.getTest() and
cond.dominates(result)
}
/**
* Holds if there is a barrier edge `pred -> succ` in `cfg` either through an explicit barrier edge
* or one implied by a barrier guard.
*
* Only holds for barriers that should apply to all flow labels.
*/
private predicate isBarrierEdge(Configuration cfg, DataFlow::Node pred, DataFlow::Node succ) {
cfg.isBarrierEdge(pred, succ)
or
exists(DataFlow::BarrierGuardNode guard |
cfg.isBarrierGuard(guard) and
barrierGuardBlocksEdge(guard, pred, succ, "")
)
}
/**
* Holds if there is a labeled barrier edge `pred -> succ` in `cfg` either through an explicit barrier edge
* or one implied by a barrier guard.
*/
private predicate isLabeledBarrierEdge(
Configuration cfg, DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel label
) {
cfg.isBarrierEdge(pred, succ, label)
or
exists(DataFlow::BarrierGuardNode guard |
cfg.isBarrierGuard(guard) and
barrierGuardBlocksEdge(guard, pred, succ, label)
)
}
/**
* A guard node that only blocks specific labels.
*/
abstract class LabeledBarrierGuardNode extends BarrierGuardNode {
override predicate blocks(boolean outcome, Expr e) { none() }
}
/**
* A data flow edge that should be added to all data flow configurations in
* addition to standard data flow edges.
*
* This class is a singleton, and thus subclasses do not need to specify a characteristic predicate.
*
* Note: For performance reasons, all subclasses of this class should be part
* of the standard library. Override `Configuration::isAdditionalFlowStep`
* for analysis-specific flow steps.
*/
class SharedFlowStep extends Unit {
/**
* Holds if `pred` → `succ` should be considered a data flow edge.
*/
predicate step(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` → `succ` should be considered a data flow edge
* transforming values with label `predlbl` to have label `succlbl`.
*/
predicate step(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel predlbl,
DataFlow::FlowLabel succlbl
) {
none()
}
/**
* Holds if `pred` should be stored in the object `succ` under the property `prop`.
* The object `succ` must be a `DataFlow::SourceNode` for the object wherein the value is stored.
*/
predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) { none() }
/**
* Holds if the property `prop` of the object `pred` should be loaded into `succ`.
*/
predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }
/**
* Holds if the property `prop` should be copied from the object `pred` to the object `succ`.
*/
predicate loadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }
/**
* Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
*/
predicate loadStoreStep(
DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
) {
none()
}
}
/**
* Contains predicates for accessing the steps contributed by `SharedFlowStep` subclasses.
*/
cached
module SharedFlowStep {
cached
private module Internal {
// Forces this to be part of the `FlowSteps` stage.
// We use a public predicate in a private module to avoid warnings about this being unused.
cached
predicate forceStage() { Stages::FlowSteps::ref() }
}
/**
* Holds if `pred` → `succ` should be considered a data flow edge.
*/
cached
predicate step(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedFlowStep s).step(pred, succ)
}
/**
* Holds if `pred` → `succ` should be considered a data flow edge
* transforming values with label `predlbl` to have label `succlbl`.
*/
cached
predicate step(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel predlbl,
DataFlow::FlowLabel succlbl
) {
any(SharedFlowStep s).step(pred, succ, predlbl, succlbl)
}
/**
* Holds if `pred` should be stored in the object `succ` under the property `prop`.
* The object `succ` must be a `DataFlow::SourceNode` for the object wherein the value is stored.
*/
cached
predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
any(SharedFlowStep s).storeStep(pred, succ, prop)
}
/**
* Holds if the property `prop` of the object `pred` should be loaded into `succ`.
*/
cached
predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
any(SharedFlowStep s).loadStep(pred, succ, prop)
}
/**
* Holds if the property `prop` should be copied from the object `pred` to the object `succ`.
*/
cached
predicate loadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
any(SharedFlowStep s).loadStoreStep(pred, succ, prop)
}
/**
* Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
*/
cached
predicate loadStoreStep(
DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
) {
any(SharedFlowStep s).loadStoreStep(pred, succ, loadProp, storeProp)
}
}
/**
* A collection of pseudo-properties that are used in multiple files.
*
* A pseudo-property represents the location where some value is stored in an object.
*
* For use with load/store steps in `DataFlow::SharedFlowStep` and TypeTracking.
*/
module PseudoProperties {
bindingset[s]
private string pseudoProperty(string s) { result = "$" + s + "$" }
bindingset[s, v]
private string pseudoProperty(string s, string v) { result = "$" + s + "|" + v + "$" }
/**
* Gets a pseudo-property for the location of elements in a `Set`
*/
string setElement() { result = pseudoProperty("setElement") }
/**
* Gets a pseudo-property for the location of elements in a JavaScript iterator.
*/
string iteratorElement() { result = pseudoProperty("iteratorElement") }
/**
* Gets a pseudo-property for the location of elements in an `Array`.
*/
string arrayElement() { result = pseudoProperty("arrayElement") }
/**
* Gets a pseudo-property for the location of the `i`th element in an `Array`.
*/
bindingset[i]
string arrayElement(int i) {
i < 5 and result = i.toString()
or
result = arrayElement()
}
/**
* Gets a pseudo-property for the location of elements in some array-like object. (Set, Array, or Iterator).
*/
string arrayLikeElement() { result = [setElement(), iteratorElement(), arrayElement()] }
/**
* Gets a pseudo-property for the location of map values, where the key is unknown.
*/
string mapValueUnknownKey() { result = pseudoProperty("mapValueUnknownKey") }
/**
* Gets a pseudo-property for the location of all the values in a map.
*/
string mapValueAll() { result = pseudoProperty("allMapValues") }
/**
* Gets a pseudo-property for the location of a map value where the key is `key`.
* The string value of the `key` is encoded in the result, and there is only a result if the string value of `key` is known.
*/
pragma[inline]
string mapValueKnownKey(DataFlow::Node key) {
result = mapValueKey(any(string s | key.mayHaveStringValue(s)))
}
/**
* Gets a pseudo-property for the location of a map value where the key is `key`.
*/
bindingset[key]
string mapValueKey(string key) { result = pseudoProperty("mapValue", key) }
/**
* Gets a pseudo-property for the location of a map value where the key is `key`.
*/
pragma[inline]
string mapValue(DataFlow::Node key) {
result = mapValueKnownKey(key)
or
not exists(mapValueKnownKey(key)) and
result = mapValueUnknownKey()
}
}
/**
* A data flow node that should be considered a source for some specific configuration,
* in addition to any other sources that configuration may recognize.
*/
abstract class AdditionalSource extends DataFlow::Node {
/**
* Holds if this data flow node should be considered a source node for
* configuration `cfg`.
*/
predicate isSourceFor(Configuration cfg) { none() }
/**
* Holds if this data flow node should be considered a source node for
* values labeled with `lbl` under configuration `cfg`.
*/
predicate isSourceFor(Configuration cfg, FlowLabel lbl) { none() }
}
/**
* A data flow node that should be considered a sink for some specific configuration,
* in addition to any other sinks that configuration may recognize.
*/
abstract class AdditionalSink extends DataFlow::Node {
/**
* Holds if this data flow node should be considered a sink node for
* configuration `cfg`.
*/
predicate isSinkFor(Configuration cfg) { none() }
/**
* Holds if this data flow node should be considered a sink node for
* values labeled with `lbl` under configuration `cfg`.
*/
predicate isSinkFor(Configuration cfg, FlowLabel lbl) { none() }
}
/**
* Additional flow step to model flow from import specifiers into the SSA variable
* corresponding to the imported variable.
*/
private class FlowStepThroughImport extends SharedFlowStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(ImportSpecifier specifier |
pred = DataFlow::valueNode(specifier) and
succ = DataFlow::ssaDefinitionNode(SSA::definition(specifier))
)
}
}
/**
* Holds if there is a flow step from `pred` to `succ` described by `summary`
* under configuration `cfg`, disregarding barriers.
*
* Summary steps through function calls are not taken into account.
*/
pragma[inline]
private predicate basicFlowStepNoBarrier(
DataFlow::Node pred, DataFlow::Node succ, PathSummary summary, DataFlow::Configuration cfg
) {
// Local flow
exists(FlowLabel predlbl, FlowLabel succlbl |
localFlowStep(pred, succ, cfg, predlbl, succlbl) and
not cfg.isBarrierEdge(pred, succ) and
summary = MkPathSummary(false, false, predlbl, succlbl)
)
or
// Flow through properties of objects
propertyFlowStep(pred, succ) and
summary = PathSummary::level()
or
// Flow through global variables
globalFlowStep(pred, succ) and
summary = PathSummary::level()
or
// Flow into function
callStep(pred, succ) and
summary = PathSummary::call()
or
// Flow out of function
returnStep(pred, succ) and
summary = PathSummary::return()
}
/**
* Holds if there is a flow step from `pred` to `succ` under configuration `cfg`,
* including both basic flow steps and steps into/out of properties.
*
* This predicate is field insensitive (it does not distinguish between `x` and `x.p`)
* and hence should only be used for purposes of approximation.
*/
pragma[noinline]
private predicate exploratoryFlowStep(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg
) {
isRelevantForward(pred, cfg) and
(
basicFlowStepNoBarrier(pred, succ, _, cfg) or
exploratoryLoadStep(pred, succ, cfg) or
isAdditionalLoadStoreStep(pred, succ, _, _, cfg) or
// the following three disjuncts taken together over-approximate flow through
// higher-order calls
exploratoryCallbackStep(pred, succ) or
succ = pred.(DataFlow::FunctionNode).getAParameter() or
exploratoryBoundInvokeStep(pred, succ)
)
}
/**
* Holds if `nd` is a source node for configuration `cfg`.
*/
private predicate isSource(DataFlow::Node nd, DataFlow::Configuration cfg, FlowLabel lbl) {
(cfg.isSource(nd) or nd.(AdditionalSource).isSourceFor(cfg)) and
lbl = cfg.getDefaultSourceLabel()
or
nd.(AdditionalSource).isSourceFor(cfg, lbl)
or
cfg.isSource(nd, lbl)
}
/**
* Holds if `nd` is a sink node for configuration `cfg`.
*/
private predicate isSink(DataFlow::Node nd, DataFlow::Configuration cfg, FlowLabel lbl) {
(cfg.isSink(nd) or nd.(AdditionalSink).isSinkFor(cfg)) and
lbl = any(StandardFlowLabel f)
or
nd.(AdditionalSink).isSinkFor(cfg, lbl)
or
cfg.isSink(nd, lbl)
}
/**
* Holds if there exists a load-step from `pred` to `succ` under configuration `cfg`,
* and the forwards exploratory flow has found a relevant store-step with the same property as the load-step.
*/
private predicate exploratoryLoadStep(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg
) {
exists(string prop | prop = getAForwardRelevantLoadProperty(cfg) |
isAdditionalLoadStep(pred, succ, prop, cfg)
or
basicLoadStep(pred, succ, prop)
)
}
/**
* Gets a property where the forwards exploratory flow has found a relevant store-step with that property.
* The property is therefore relevant for load-steps in the forward exploratory flow.
*
* This private predicate is only used in `exploratoryLoadStep`, and exists as a separate predicate to give the compiler a hint about join-ordering.
*/
pragma[noinline]
private string getAForwardRelevantLoadProperty(DataFlow::Configuration cfg) {
exists(DataFlow::Node previous | isRelevantForward(previous, cfg) |
basicStoreStep(previous, _, result) or
isAdditionalStoreStep(previous, _, result, cfg)
)
or
result = getAPropertyUsedInLoadStore(cfg)
}
/**
* Gets a property that is used in an `additionalLoadStoreStep` where the loaded and stored property are not the same.
*
* The properties from this predicate are used as a white-list of properties for load/store steps that should always be considered in the exploratory flow.
*/
private string getAPropertyUsedInLoadStore(DataFlow::Configuration cfg) {
exists(string loadProp, string storeProp |
isAdditionalLoadStoreStep(_, _, loadProp, storeProp, cfg) and
storeProp != loadProp and
result = [storeProp, loadProp]
)
}
/**
* Holds if there exists a store-step from `pred` to `succ` under configuration `cfg`,
* and somewhere in the program there exists a load-step that could possibly read the stored value.
*/
pragma[noinline]
private predicate exploratoryForwardStoreStep(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg
) {
exists(string prop |
basicLoadStep(_, _, prop) or
isAdditionalLoadStep(_, _, prop, cfg) or
prop = getAPropertyUsedInLoadStore(cfg)
|
isAdditionalStoreStep(pred, succ, prop, cfg) or
basicStoreStep(pred, succ, prop)
)
}
/**
* Holds if there exists a store-step from `pred` to `succ` under configuration `cfg`,
* and `succ` has been found to be relevant during the backwards exploratory flow,
* and the backwards exploratory flow has found a relevant load-step with the same property as the store-step.
*/
private predicate exploratoryBackwardStoreStep(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg
) {
exists(string prop | prop = getABackwardsRelevantStoreProperty(cfg) |
isAdditionalStoreStep(pred, succ, prop, cfg) or
basicStoreStep(pred, succ, prop)
)
}
/**
* Gets a property where the backwards exploratory flow has found a relevant load-step with that property.
* The property is therefore relevant for store-steps in the backwards exploratory flow.
*
* This private predicate is only used in `exploratoryBackwardStoreStep`, and exists as a separate predicate to give the compiler a hint about join-ordering.
*/
pragma[noinline]
private string getABackwardsRelevantStoreProperty(DataFlow::Configuration cfg) {
exists(DataFlow::Node mid | isRelevant(mid, cfg) |
basicLoadStep(mid, _, result) or
isAdditionalLoadStep(mid, _, result, cfg)
)
or
result = getAPropertyUsedInLoadStore(cfg)
}
/**
* Holds if `nd` may be reachable from a source under `cfg`.
*
* No call/return matching is done, so this is a relatively coarse over-approximation.
*/
private predicate isRelevantForward(DataFlow::Node nd, DataFlow::Configuration cfg) {
isSource(nd, cfg, _) and isLive()
or
exists(DataFlow::Node mid |
exploratoryFlowStep(mid, nd, cfg)
or
isRelevantForward(mid, cfg) and
exploratoryForwardStoreStep(mid, nd, cfg)
)
}
/**
* Holds if `nd` may be on a path from a source to a sink under `cfg`.
*
* No call/return matching is done, so this is a relatively coarse over-approximation.
*/
private predicate isRelevant(DataFlow::Node nd, DataFlow::Configuration cfg) {
isRelevantForward(nd, cfg) and isSink(nd, cfg, _)
or
exists(DataFlow::Node mid | isRelevant(mid, cfg) | isRelevantBackStep(mid, nd, cfg))
}
/**
* Holds if there is backwards data-flow step from `mid` to `nd` under `cfg`.
*/
private predicate isRelevantBackStep(
DataFlow::Node mid, DataFlow::Node nd, DataFlow::Configuration cfg
) {
exploratoryFlowStep(nd, mid, cfg)
or
isRelevantForward(nd, cfg) and
exploratoryBackwardStoreStep(nd, mid, cfg)
}
/**
* Holds if `pred` is an input to `f` which is passed to `succ` at `invk`; that is,
* either `pred` is an argument of `f` and `succ` the corresponding parameter, or
* `pred` is a variable definition whose value is captured by `f` at `succ`.
*/
private predicate callInputStep(
Function f, DataFlow::Node invk, DataFlow::Node pred, DataFlow::Node succ,
DataFlow::Configuration cfg
) {
(
isRelevant(pred, cfg) and
argumentPassing(invk, pred, f, succ)
or
isRelevant(pred, cfg) and
exists(LocalVariable variable, SsaDefinition def |
pred = DataFlow::capturedVariableNode(variable) and
calls(invk, f) and
captures(f, variable, def) and
succ = DataFlow::ssaDefinitionNode(def)
)
) and
not cfg.isBarrier(succ) and
not isBarrierEdge(cfg, pred, succ)
}
/**
* Holds if `input`, which is either an argument to `f` at `invk` or a definition
* that is captured by `f`, may flow to `nd` under configuration `cfg` (possibly through
* callees, but not containing any unmatched calls or returns) along a path summarized by
* `summary`.
*
* Note that the summary does not take the initial step from argument to parameter
* into account.
*/
pragma[nomagic]
private predicate reachableFromInput(
Function f, DataFlow::Node invk, DataFlow::Node input, DataFlow::Node nd,
DataFlow::Configuration cfg, PathSummary summary
) {
callInputStep(f, invk, input, nd, cfg) and
summary = PathSummary::level() and
not cfg.isLabeledBarrier(nd, summary.getEndLabel())
or
exists(DataFlow::Node mid, PathSummary oldSummary |
reachableFromInput(f, invk, input, mid, cfg, oldSummary) and
appendStep(mid, cfg, oldSummary, nd, summary)
)
}
/**
* Holds if there is a level step from `pred` to `succ` under `cfg` that can be appended
* to a path represented by `oldSummary` yielding a path represented by `newSummary`.
*/
pragma[noinline]
private predicate appendStep(
DataFlow::Node pred, DataFlow::Configuration cfg, PathSummary oldSummary, DataFlow::Node succ,
PathSummary newSummary
) {
exists(PathSummary stepSummary |
flowStep(pred, cfg, succ, stepSummary) and
stepSummary.isLevel() and
newSummary = oldSummary.append(stepSummary)
)
}
/**
* Holds if a function invoked at `output` may return an expression into which `input`,
* which is either an argument or a definition captured by the function, flows under
* configuration `cfg`, possibly through callees.
*/
private predicate flowThroughCall(
DataFlow::Node input, DataFlow::Node output, DataFlow::Configuration cfg, PathSummary summary
) {
exists(Function f, DataFlow::FunctionReturnNode ret |
ret.getFunction() = f and
(calls(output, f) or callsBound(output, f, _)) and // Do not consider partial calls
reachableFromInput(f, output, input, ret, cfg, summary) and
not isBarrierEdge(cfg, ret, output) and
not isLabeledBarrierEdge(cfg, ret, output, summary.getEndLabel()) and
not cfg.isLabeledBarrier(output, summary.getEndLabel())
)
or
exists(Function f, LocalVariable variable |
reachableFromInput(f, _, input, output, cfg, summary) and
output = DataFlow::capturedVariableNode(variable) and
getCapturedVariableDepth(variable) < getContainerDepth(f) and // Only step outwards
not cfg.isLabeledBarrier(output, summary.getEndLabel())
)
or
exists(Function f, DataFlow::Node invk, DataFlow::Node ret |
DataFlow::exceptionalFunctionReturnNode(ret, f) and
DataFlow::exceptionalInvocationReturnNode(output, invk.asExpr()) and
(calls(invk, f) or callsBound(invk, f, _)) and
reachableFromInput(f, invk, input, ret, cfg, summary) and
not isBarrierEdge(cfg, ret, output) and
not isLabeledBarrierEdge(cfg, ret, output, summary.getEndLabel()) and
not cfg.isLabeledBarrier(output, summary.getEndLabel())
)
or
// exception thrown inside an immediately awaited function call.
exists(DataFlow::FunctionNode f, DataFlow::Node invk, DataFlow::Node ret |
f.getFunction().isAsync()
|
(calls(invk, f.getFunction()) or callsBound(invk, f.getFunction(), _)) and
reachableFromInput(f.getFunction(), invk, input, ret, cfg, summary) and
output = invk.asExpr().getExceptionTarget() and
f.getExceptionalReturn() = getThrowTarget(ret) and
invk = getAwaitOperand(_)
)
}
/**
* Holds if `pred` may flow into property `prop` of `succ` under configuration `cfg`
* along a path summarized by `summary`.
*/
pragma[nomagic]
private predicate storeStep(
DataFlow::Node pred, DataFlow::Node succ, string prop, DataFlow::Configuration cfg,
PathSummary summary
) {
isRelevant(pred, cfg) and
basicStoreStep(pred, succ, prop) and
summary = PathSummary::level()
or
isRelevant(pred, cfg) and
isAdditionalStoreStep(pred, succ, prop, cfg) and
summary = PathSummary::level()
or
exists(Function f, DataFlow::Node mid, DataFlow::Node invk |
not f.isAsyncOrGenerator() and invk = succ
or
// store in an immediately awaited function call
f.isAsync() and
invk = getAwaitOperand(succ)
|
// `f` stores its parameter `pred` in property `prop` of a value that flows back to the caller,
// and `succ` is an invocation of `f`
reachableFromInput(f, invk, pred, mid, cfg, summary) and
(
returnedPropWrite(f, _, prop, mid)
or
exists(DataFlow::SourceNode base | base.flowsToExpr(f.getAReturnedExpr()) |
isAdditionalStoreStep(mid, base, prop, cfg)
)
or
invk instanceof DataFlow::NewNode and
receiverPropWrite(f, prop, mid)
)
)
}
/**
* Gets a dataflow-node for the operand of the await-expression `await`.
*/
private DataFlow::Node getAwaitOperand(DataFlow::Node await) {
exists(AwaitExpr awaitExpr |
result = awaitExpr.getOperand().getUnderlyingValue().flow() and
await.asExpr() = awaitExpr
)
}
/**
* Holds if property `prop` of `arg` is read inside a function and returned to the call `succ`.
*/
private predicate parameterPropRead(
DataFlow::Node arg, string prop, DataFlow::Node succ, DataFlow::Configuration cfg,
PathSummary summary
) {
exists(Function f, DataFlow::Node read, DataFlow::Node invk, DataFlow::Node parm |
reachesReturn(f, read, cfg, summary) and
parameterPropReadStep(parm, read, prop, cfg, arg, invk, f, succ)
)
}
// all the non-recursive parts of parameterPropRead outlined into a precomputed predicate
pragma[noinline]
private predicate parameterPropReadStep(
DataFlow::SourceNode parm, DataFlow::Node read, string prop, DataFlow::Configuration cfg,
DataFlow::Node arg, DataFlow::Node invk, Function f, DataFlow::Node succ
) {
(
not f.isAsyncOrGenerator() and invk = succ
or
// load from an immediately awaited function call
f.isAsync() and
invk = getAwaitOperand(succ)
) and
callInputStep(f, invk, arg, parm, cfg) and
(
read = parm.getAPropertyRead(prop)
or
exists(DataFlow::Node use | parm.flowsTo(use) | isAdditionalLoadStep(use, read, prop, cfg))
)
}
/**
* Holds if `read` may flow into a return statement of `f` under configuration `cfg`
* (possibly through callees) along a path summarized by `summary`.
*/
private predicate reachesReturn(
Function f, DataFlow::Node read, DataFlow::Configuration cfg, PathSummary summary
) {
isRelevant(read, cfg) and
returnExpr(f, read, _) and
summary = PathSummary::level() and
callInputStep(f, _, _, _, _) // check that a relevant result can exist.
or
exists(DataFlow::Node mid, PathSummary oldSummary, PathSummary newSummary |
flowStep(read, cfg, mid, oldSummary) and
reachesReturn(f, mid, cfg, newSummary) and
summary = oldSummary.append(newSummary)
)
}
/**
* Holds if the property `prop` of the object `pred` should be loaded into `succ`.
*/
private predicate isAdditionalLoadStep(
DataFlow::Node pred, DataFlow::Node succ, string prop, DataFlow::Configuration cfg
) {
SharedFlowStep::loadStep(pred, succ, prop)
or
cfg.isAdditionalLoadStep(pred, succ, prop)
}
/**
* Holds if `pred` should be stored in the object `succ` under the property `prop`.
*/
private predicate isAdditionalStoreStep(
DataFlow::Node pred, DataFlow::Node succ, string prop, DataFlow::Configuration cfg
) {
SharedFlowStep::storeStep(pred, succ, prop)
or
cfg.isAdditionalStoreStep(pred, succ, prop)
}
/**
* Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
*/
private predicate isAdditionalLoadStoreStep(
DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp,
DataFlow::Configuration cfg
) {
SharedFlowStep::loadStoreStep(pred, succ, loadProp, storeProp)
or
cfg.isAdditionalLoadStoreStep(pred, succ, loadProp, storeProp)
or
loadProp = storeProp and
(
SharedFlowStep::loadStoreStep(pred, succ, loadProp)
or
cfg.isAdditionalLoadStoreStep(pred, succ, loadProp)
)
}
/**
* Holds if property `prop` of `pred` may flow into `succ` along a path summarized by
* `summary`.
*/
private predicate loadStep(
DataFlow::Node pred, DataFlow::Node succ, string prop, DataFlow::Configuration cfg,
PathSummary summary
) {
isRelevant(pred, cfg) and
basicLoadStep(pred, succ, prop) and
summary = PathSummary::level()
or
isRelevant(pred, cfg) and
isAdditionalLoadStep(pred, succ, prop, cfg) and
summary = PathSummary::level()
or
parameterPropRead(pred, prop, succ, cfg, summary)
}
/**
* Holds if there is flow to `base.startProp`, and `base.startProp` flows to `nd.endProp` under `cfg/summary`.
*
* If `onlyRelevantInCall` is true, the `base` object will not be propagated out of return edges, because
* the flow that originally reached `base.startProp` used a call edge.
*/
pragma[noopt]
private predicate reachableFromStoreBase(
string startProp, string endProp, DataFlow::Node base, DataFlow::Node nd,
DataFlow::Configuration cfg, TPathSummary summary, boolean onlyRelevantInCall
) {
exists(TPathSummary s1, TPathSummary s2, DataFlow::Node rhs |
storeStep(rhs, nd, startProp, cfg, s2) and
endProp = startProp and
base = nd and
exists(boolean hasCall, DataFlow::FlowLabel data |
hasCall = hasCall(s2) and
data = DataFlow::FlowLabel::data() and
summary = MkPathSummary(false, hasCall, data, data)
)
|
reachableFromSource(rhs, cfg, s1) and
onlyRelevantInCall = hasCall(s1)
or
reachableFromStoreBase(_, _, _, rhs, cfg, s1, onlyRelevantInCall)
)
or
exists(DataFlow::Node mid, PathSummary oldSummary, PathSummary newSummary |
reachableFromStoreBase(startProp, endProp, base, mid, cfg, oldSummary, onlyRelevantInCall) and
flowStep(mid, cfg, nd, newSummary) and
exists(boolean hasReturn |
hasReturn = newSummary.hasReturn() and
onlyRelevantInCall.booleanAnd(hasReturn) = false
)
or
exists(string midProp |
reachableFromStoreBase(startProp, midProp, base, mid, cfg, oldSummary, onlyRelevantInCall) and
isAdditionalLoadStoreStep(mid, nd, midProp, endProp, cfg) and
newSummary = PathSummary::level()
)
|
summary = oldSummary.appendValuePreserving(newSummary)
)
}
private boolean hasCall(PathSummary summary) { result = summary.hasCall() }
/**
* Holds if the value of `pred` is written to a property of some base object, and that base
* object may flow into the base of property read `succ` under configuration `cfg` along
* a path summarized by `summary`.
*
* In other words, `pred` may flow to `succ` through a property.
*/
pragma[noinline]
private predicate flowThroughProperty(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg, PathSummary summary
) {
exists(PathSummary oldSummary, PathSummary newSummary |
storeToLoad(pred, succ, cfg, oldSummary, newSummary) and
summary = oldSummary.append(newSummary)
)
}
/**
* Holds if the value of `pred` is written to a property of some base object, and that base
* object may flow into the base of property read `succ` under configuration `cfg` along
* a path whose last step is summarized by `newSummary`, and the previous steps are summarized
* by `oldSummary`.
*/
pragma[noinline]
private predicate storeToLoad(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg, PathSummary oldSummary,
PathSummary newSummary
) {
exists(
string storeProp, string loadProp, DataFlow::Node storeBase, DataFlow::Node loadBase,
PathSummary s1, PathSummary s2
|
storeStep(pred, storeBase, storeProp, cfg, s1) and
reachableFromStoreBase(storeProp, loadProp, storeBase, loadBase, cfg, s2, _) and
oldSummary = s1.appendValuePreserving(s2) and
loadStep(loadBase, succ, loadProp, cfg, newSummary)
)
}
/**
* Holds if `arg` and `cb` are passed as arguments to a function which in turn
* invokes `cb`, passing `arg` as its `i`th argument.
*
* All of this is done under configuration `cfg`, and `arg` flows along a path
* summarized by `summary`, while `cb` is only tracked locally.
*/
private predicate summarizedHigherOrderCall(
DataFlow::Node arg, DataFlow::Node cb, int i, DataFlow::Configuration cfg, PathSummary summary
) {
exists(
Function f, DataFlow::InvokeNode inner, int j, DataFlow::Node innerArg,
DataFlow::SourceNode cbParm, PathSummary oldSummary
|
// Captured flow does not need to be summarized - it is handled by the local case in `higherOrderCall`.
not arg = DataFlow::capturedVariableNode(_)
|
// direct higher-order call
summarizedHigherOrderCallAux(f, arg, innerArg, cfg, oldSummary, cbParm, inner, j, cb) and
inner = cbParm.getAnInvocation() and
i = j and
summary = oldSummary
or
// indirect higher-order call
summarizedHigherOrderCallAux(f, arg, innerArg, cfg, oldSummary, cbParm, inner, j, cb) and
exists(DataFlow::Node cbArg, PathSummary newSummary |
cbParm.flowsTo(cbArg) and
summarizedHigherOrderCall(innerArg, cbArg, i, cfg, newSummary) and
summary = oldSummary.append(PathSummary::call()).append(newSummary)
)
)
}
/**
* @see `summarizedHigherOrderCall`.
*/
pragma[noinline]
private predicate summarizedHigherOrderCallAux(
Function f, DataFlow::Node arg, DataFlow::Node innerArg, DataFlow::Configuration cfg,
PathSummary oldSummary, DataFlow::SourceNode cbParm, DataFlow::InvokeNode inner, int j,
DataFlow::Node cb
) {
exists(DataFlow::Node outer1, DataFlow::Node outer2 |
reachableFromInput(f, outer1, arg, innerArg, cfg, oldSummary) and
outer1 = pragma[only_bind_into](outer2) and
// Only track actual parameter flow.
argumentPassing(outer2, cb, f, cbParm) and
innerArg = inner.getArgument(j)
)
}
/**
* Holds if `arg` is passed as the `i`th argument to `callback` through a callback invocation.
*
* This can be a summarized call, that is, `arg` and `callback` flow into a call,
* `f(arg, callback)`, which performs the invocation.
*
* Alternatively, the callback can flow into a call `f(callback)` which itself provides the `arg`.
* That is, `arg` refers to a value defined in `f` or one of its callees.
*
* In the latter case, the summary will consists of both a `return` and `call` step, for the following reasons:
*
* - Having `return` in the summary ensures that arguments passsed to `f` can't propagate back out along this edge.
* This is, `arg` should be defined in `f` or one of its callees, since a context-dependent value (i.e. parameter)
* should not propagate to every callback passed to `f`.
* In reality, `arg` may refer to a parameter, but in that case, the `return` summary prevents the edge from ever
* being used.
*
* - Having `call` in the summary ensures that values we propagate into the callback definition along this edge
* can't propagate out to other callers of that function through a return statement.
*
* - The flow label mapping of the summary corresponds to the transformation from `arg` to the
* invocation of the callback.
*/
pragma[nomagic]
private predicate higherOrderCall(
DataFlow::Node arg, DataFlow::SourceNode callback, int i, DataFlow::Configuration cfg,
PathSummary summary
) {
// Summarized call
exists(DataFlow::Node cb |
summarizedHigherOrderCall(arg, cb, i, cfg, summary) and
callback.flowsTo(cb)
)
or
// Local invocation of a parameter
isRelevant(arg, cfg) and
exists(DataFlow::InvokeNode invoke |
arg = invoke.getArgument(i) and
invoke = callback.(DataFlow::ParameterNode).getACall() and
summary = PathSummary::call()
)
or
// Forwarding of the callback parameter (but not the argument).
exists(DataFlow::Node cbArg, DataFlow::SourceNode innerCb, PathSummary oldSummary |
higherOrderCall(arg, innerCb, i, cfg, oldSummary) and
callStep(cbArg, innerCb) and
callback.flowsTo(cbArg) and
// Prepend a 'return' summary to prevent context-dependent values (i.e. parameters) from using this edge.
summary = PathSummary::return().append(oldSummary)
)
}
/**
* Holds if `pred` is passed as an argument to a function `f` which also takes a
* callback parameter `cb` and then invokes `cb`, passing `pred` into parameter `succ`
* of `cb`.
*
* All of this is done under configuration `cfg`, and `arg` flows along a path
* summarized by `summary`, while `cb` is only tracked locally.
*/
private predicate flowIntoHigherOrderCall(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg, PathSummary summary
) {
exists(DataFlow::FunctionNode cb, int i, PathSummary oldSummary |
higherOrderCall(pred, cb, i, cfg, oldSummary) and
succ = cb.getParameter(i) and
summary = oldSummary.append(PathSummary::call())
)
or
exists(
DataFlow::SourceNode cb, DataFlow::FunctionNode f, int i, int boundArgs, PathSummary oldSummary
|
higherOrderCall(pred, cb, i, cfg, oldSummary) and
cb = CallGraph::getABoundFunctionReference(f, boundArgs, false) and
succ = f.getParameter(boundArgs + i) and
summary = oldSummary.append(PathSummary::call())
)
}
/**
* Holds if there is a flow step from `pred` to `succ` described by `summary`
* under configuration `cfg`.
*/
private predicate flowStep(
DataFlow::Node pred, DataFlow::Configuration cfg, DataFlow::Node succ, PathSummary summary
) {
(
basicFlowStepNoBarrier(pred, succ, summary, cfg) and
isRelevant(pred, cfg)
or
// Flow through a function that returns a value that depends on one of its arguments
// or a captured variable
flowThroughCall(pred, succ, cfg, summary)
or
// Flow through a property write/read pair
flowThroughProperty(pred, succ, cfg, summary)
or
// Flow into higher-order call
flowIntoHigherOrderCall(pred, succ, cfg, summary)
) and
not cfg.isBarrier(succ) and
not isBarrierEdge(cfg, pred, succ) and
not isLabeledBarrierEdge(cfg, pred, succ, summary.getEndLabel()) and
not cfg.isLabeledBarrier(succ, summary.getEndLabel())
}
/**
* Holds if `source` can flow to `sink` under configuration `cfg`
* in zero or more steps.
*/
pragma[nomagic]
private predicate flowsTo(
PathNode flowsource, DataFlow::Node source, SinkPathNode flowsink, DataFlow::Node sink,
DataFlow::Configuration cfg
) {
flowsource.wraps(source, cfg) and
flowsink = flowsource.getASuccessor*() and
flowsink.wraps(sink, id(cfg))
}
/**
* Holds if `nd` is reachable from a source under `cfg` along a path summarized by
* `summary`.
*/
pragma[nomagic]
private predicate reachableFromSource(
DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary
) {
exists(FlowLabel lbl |
isSource(nd, cfg, lbl) and
not cfg.isBarrier(nd) and
not cfg.isLabeledBarrier(nd, lbl) and
summary = PathSummary::level(lbl)
)
or
exists(DataFlow::Node pred, PathSummary oldSummary, PathSummary newSummary |
reachableFromSource(pred, cfg, oldSummary) and
flowStep(pred, cfg, nd, newSummary) and
summary = oldSummary.append(newSummary)
)
}
/**
* Holds if `nd` can be reached from a source under `cfg`, and in turn a sink is
* reachable from `nd`, where the path from the source to `nd` is summarized by `summary`.
*/
private predicate onPath(DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary) {
reachableFromSource(nd, cfg, summary) and
isSink(nd, cfg, summary.getEndLabel()) and
not cfg.isBarrier(nd) and
not cfg.isLabeledBarrier(nd, summary.getEndLabel())
or
exists(DataFlow::Node mid, PathSummary stepSummary |
onPathStep(nd, cfg, summary, stepSummary, mid) and
onPath(mid, id(cfg), summary.append(stepSummary))
)
}
/**
* Holds if `nd` can be reached from a source under `cfg`,
* and there is a flowStep from `nd` (with summary `summary`) to `mid` (with summary `stepSummary`).
*
* This predicate has been outlined from `onPath` to give the optimizer a hint about join-ordering.
*/
pragma[noinline]
private predicate onPathStep(
DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary, PathSummary stepSummary,
DataFlow::Node mid
) {
reachableFromSource(nd, cfg, summary) and
flowStep(nd, id(cfg), mid, stepSummary)
}
/**
* Holds if there is a configuration that has at least one source and at least one sink.
*/
pragma[noinline]
private predicate isLive() {
exists(DataFlow::Configuration cfg | isSource(_, cfg, _) and isSink(_, cfg, _))
}
/**
* A data flow node on an inter-procedural path from a source.
*/
private newtype TPathNode =
MkSourceNode(DataFlow::Node nd, DataFlow::Configuration cfg) { isSourceNode(nd, cfg, _) } or
MkMidNode(DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary) {
isLive() and
onPath(nd, cfg, summary)
} or
MkSinkNode(DataFlow::Node nd, DataFlow::Configuration cfg) { isSinkNode(nd, cfg, _) }
/**
* Holds if `nd` is a source node for configuration `cfg`, and there is a path from `nd` to a sink
* with the given `summary`.
*/
private predicate isSourceNode(DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary) {
exists(FlowLabel lbl | summary = PathSummary::level(lbl) |
isSource(nd, cfg, lbl) and
isLive() and
onPath(nd, cfg, summary)
)
}
/**
* Holds if `nd` is a sink node for configuration `cfg`, and there is a path from a source to `nd`
* with the given `summary`.
*/
private predicate isSinkNode(DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary) {
isSink(nd, cfg, summary.getEndLabel()) and
isLive() and
onPath(nd, cfg, summary)
}
/**
* Maps `cfg` to itself.
*
* This is an auxiliary predicate that is needed in some places to prevent us
* from computing a cross-product of all path nodes belonging to the same configuration.
*/
bindingset[cfg, result]
private DataFlow::Configuration id(DataFlow::Configuration cfg) { result >= cfg and cfg >= result }
/**
* A data-flow node on an inter-procedural path from a source to a sink.
*
* A path node wraps a data-flow node `nd` and a data-flow configuration `cfg` such that `nd` is
* on a path from a source to a sink under `cfg`.
*
* There are three kinds of path nodes:
*
* - source nodes: wrapping a source node and a configuration such that there is a path from that
* source to some sink under the configuration;
* - sink nodes: wrapping a sink node and a configuration such that there is a path from some source
* to that sink under the configuration;
* - mid nodes: wrapping a node, a configuration and a path summary such that there is a path from
* some source to the node with the given summary that can be extended to a path to some sink node,
* all under the configuration.
*/
class PathNode extends TPathNode {
DataFlow::Node nd;
Configuration cfg;
PathNode() {
this = MkSourceNode(nd, cfg) or
this = MkMidNode(nd, cfg, _) or
this = MkSinkNode(nd, cfg)
}
/** Holds if this path node wraps data-flow node `n` and configuration `c`. */
predicate wraps(DataFlow::Node n, DataFlow::Configuration c) { nd = n and cfg = c }
/** Gets the underlying configuration of this path node. */
DataFlow::Configuration getConfiguration() { result = cfg }
/** Gets the underlying data-flow node of this path node. */
DataFlow::Node getNode() { result = nd }
/** Gets a successor node of this path node. */
final PathNode getASuccessor() { result = getASuccessor(this) }
/** Gets a textual representation of this path node. */
string toString() { result = nd.toString() }
/**
* Holds if this path node is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
nd.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/**
* Gets a summary for the path node.
*/
PathSummary getPathSummary() {
this = MkMidNode(_, _, result)
or
this = MkSinkNode(_, _) and getASuccessor(MkMidNode(_, _, result)) = this
or
this = MkSourceNode(_, _) and getASuccessor(this) = MkMidNode(_, _, result)
}
/**
* Gets a flow label for the path node.
*/
FlowLabel getFlowLabel() { result = getPathSummary().getEndLabel() }
}
/** Gets the mid node corresponding to `src`. */
private MidPathNode initialMidNode(SourcePathNode src) {
exists(DataFlow::Node nd, Configuration cfg, PathSummary summary |
result.wraps(nd, cfg, summary) and
src = MkSourceNode(nd, cfg) and
isSourceNode(nd, cfg, summary)
)
}
/** Gets the mid node corresponding to `snk`. */
private MidPathNode finalMidNode(SinkPathNode snk) {
exists(DataFlow::Node nd, Configuration cfg, PathSummary summary |
result.wraps(nd, cfg, summary) and
snk = MkSinkNode(nd, cfg) and
isSinkNode(nd, cfg, summary)
)
}
/**
* Holds if `nd` is a mid node wrapping `(predNd, cfg, summary)`, and there is a flow step
* from `predNd` to `succNd` under `cfg` with summary `newSummary`.
*
* This helper predicate exists to clarify the intended join order in `getASuccessor` below.
*/
pragma[noinline]
private predicate midNodeStep(
PathNode nd, DataFlow::Node predNd, Configuration cfg, PathSummary summary, DataFlow::Node succNd,
PathSummary newSummary
) {
nd = MkMidNode(predNd, cfg, summary) and
flowStep(predNd, id(cfg), succNd, newSummary)
}
/**
* Gets a node to which data from `nd` may flow in one step.
*/
private PathNode getASuccessor(PathNode nd) {
// source node to mid node
result = initialMidNode(nd)
or
// mid node to mid node
exists(
Configuration cfg, DataFlow::Node predNd, PathSummary summary, DataFlow::Node succNd,
PathSummary newSummary
|
midNodeStep(nd, predNd, cfg, summary, succNd, newSummary) and
result = MkMidNode(succNd, id(cfg), summary.append(newSummary))
)
or
// mid node to sink node
nd = finalMidNode(result)
}
private PathNode getASuccessorIfHidden(PathNode nd) {
nd.(MidPathNode).isHidden() and
result = getASuccessor(nd)
}
/**
* A path node corresponding to an intermediate node on a path from a source to a sink.
*
* A mid node is a triple `(nd, cfg, summary)` where `nd` is a data-flow node and `cfg`
* is a configuration such that `nd` is on a path from a source to a sink under `cfg`
* summarized by `summary`.
*/
class MidPathNode extends PathNode, MkMidNode {
PathSummary summary;
MidPathNode() { this = MkMidNode(nd, cfg, summary) }
/** Holds if this path node wraps data-flow node `n`, configuration `c` and summary `s`. */
predicate wraps(DataFlow::Node n, DataFlow::Configuration c, PathSummary s) {
nd = n and cfg = c and summary = s
}
/**
* Holds if this node is hidden from paths in path explanation queries, except
* in cases where it is the source or sink.
*/
predicate isHidden() {
// Skip phi, refinement, and capture nodes
nd.(DataFlow::SsaDefinitionNode).getSsaVariable().getDefinition() instanceof
SsaImplicitDefinition
or
// Skip SSA definition of parameter as its location coincides with the parameter node
nd = DataFlow::ssaDefinitionNode(SSA::definition(any(SimpleParameter p)))
or
// Skip to the top of big left-leaning string concatenation trees.
nd = any(AddExpr add).flow() and
nd = any(AddExpr add).getAnOperand().flow()
or
// Skip the exceptional return on functions, as this highlights the entire function.
nd = any(DataFlow::FunctionNode f).getExceptionalReturn()
or
// Skip the special return node for functions, as this highlights the entire function (and the returned expr is the previous node).
nd = any(DataFlow::FunctionNode f).getReturnNode()
or
// Skip the synthetic 'this' node, as a ThisExpr will be the next node anyway
nd = DataFlow::thisNode(_)
or
// Skip captured variable nodes as the successor will be a use of that variable anyway.
nd = DataFlow::capturedVariableNode(_)
}
}
/**
* A path node corresponding to a flow source.
*/
class SourcePathNode extends PathNode, MkSourceNode {
SourcePathNode() { this = MkSourceNode(nd, cfg) }
}
/**
* A path node corresponding to a flow sink.
*/
class SinkPathNode extends PathNode, MkSinkNode {
SinkPathNode() { this = MkSinkNode(nd, cfg) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
*/
module PathGraph {
/** Holds if `nd` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode nd) { not nd.(MidPathNode).isHidden() }
/**
* Gets a node to which data from `nd` may flow in one step, skipping over hidden nodes.
*/
private PathNode succ0(PathNode nd) {
result = getASuccessorIfHidden*(nd.getASuccessor()) and
// skip hidden nodes
nodes(nd) and
nodes(result)
}
/**
* Gets a node to which data from `nd` may flow in one step, where outgoing edges from intermediate
* nodes are merged with any incoming edge from a corresponding source node.
*
* For example, assume that `src` is a source node for `nd1`, which has `nd2` as its direct
* successor. Then `succ0` will yield two edges `src` &rarr; `nd1` and `nd1` &rarr; `nd2`,
* to which `succ1` will add the edge `src` &rarr; `nd2`.
*/
private PathNode succ1(PathNode nd) {
result = succ0(nd)
or
result = succ0(initialMidNode(nd))
}
/**
* Gets a node to which data from `nd` may flow in one step, where incoming edges into intermediate
* nodes are merged with any outgoing edge to a corresponding sink node.
*
* For example, assume that `snk` is a source node for `nd2`, which has `nd1` as its direct
* predecessor. Then `succ1` will yield two edges `nd1` &rarr; `nd2` and `nd2` &rarr; `snk`,
* while `succ2` will yield just one edge `nd1` &rarr; `snk`.
*/
private PathNode succ2(PathNode nd) {
result = succ1(nd)
or
succ1(nd) = finalMidNode(result)
}
/** Holds if `pred` &rarr; `succ` is an edge in the graph of data flow path explanations. */
query predicate edges(PathNode pred, PathNode succ) {
succ = succ2(pred) and
// skip over uninteresting edges
not succ = initialMidNode(pred) and
not pred = finalMidNode(succ)
}
}
/**
* Gets a logical `and` expression, or parenthesized expression, that contains `guard`.
*/
private Expr getALogicalAndParent(BarrierGuardNode guard) {
barrierGuardIsRelevant(guard) and result = guard.asExpr()
or
result.(LogAndExpr).getAnOperand() = getALogicalAndParent(guard)
or
result.getUnderlyingValue() = getALogicalAndParent(guard)
}
/**
* Gets a logical `or` expression, or parenthesized expression, that contains `guard`.
*/
private Expr getALogicalOrParent(BarrierGuardNode guard) {
barrierGuardIsRelevant(guard) and result = guard.asExpr()
or
result.(LogOrExpr).getAnOperand() = getALogicalOrParent(guard)
or
result.getUnderlyingValue() = getALogicalOrParent(guard)
}
/**
* A `BarrierGuardNode` that controls which data flow
* configurations it is used in.
*
* Note: For performance reasons, all subclasses of this class should be part
* of the standard library. Override `Configuration::isBarrierGuard`
* for analysis-specific barrier guards.
*/
abstract class AdditionalBarrierGuardNode extends BarrierGuardNode {
abstract predicate appliesTo(Configuration cfg);
}
/**
* A function that returns the result of a barrier guard.
*/
private class BarrierGuardFunction extends Function {
DataFlow::ParameterNode sanitizedParameter;
BarrierGuardNode guard;
boolean guardOutcome;
string label;
int paramIndex;
BarrierGuardFunction() {
barrierGuardIsRelevant(guard) and
exists(Expr e |
exists(Expr returnExpr |
returnExpr = guard.asExpr()
or
// ad hoc support for conjunctions:
getALogicalAndParent(guard) = returnExpr and guardOutcome = true
or
// ad hoc support for disjunctions:
getALogicalOrParent(guard) = returnExpr and guardOutcome = false
|
exists(SsaExplicitDefinition ssa |
ssa.getDef().getSource() = returnExpr and
ssa.getVariable().getAUse() = getAReturnedExpr()
)
or
returnExpr = getAReturnedExpr()
) and
sanitizedParameter.flowsToExpr(e) and
barrierGuardBlocksExpr(guard, guardOutcome, e, label)
) and
sanitizedParameter.getParameter() = getParameter(paramIndex)
}
/**
* Holds if this function sanitizes argument `e` of call `call`, provided the call evaluates to `outcome`.
*/
predicate isBarrierCall(DataFlow::CallNode call, Expr e, boolean outcome, string lbl) {
exists(DataFlow::Node arg |
argumentPassing(pragma[only_bind_into](call), pragma[only_bind_into](arg),
pragma[only_bind_into](this), pragma[only_bind_into](sanitizedParameter)) and
arg.asExpr() = e and
arg = call.getArgument(paramIndex) and
outcome = guardOutcome and
lbl = label
)
}
/**
* Holds if this function applies to the flow in `cfg`.
*/
predicate appliesTo(Configuration cfg) { cfg.isBarrierGuard(guard) }
}
/**
* A call that sanitizes an argument.
*/
private class AdditionalBarrierGuardCall extends AdditionalBarrierGuardNode, DataFlow::CallNode {
BarrierGuardFunction f;
AdditionalBarrierGuardCall() { f.isBarrierCall(this, _, _, _) }
override predicate blocks(boolean outcome, Expr e) { f.isBarrierCall(this, e, outcome, "") }
predicate internalBlocksLabel(boolean outcome, Expr e, DataFlow::FlowLabel label) {
f.isBarrierCall(this, e, outcome, label)
}
override predicate appliesTo(Configuration cfg) { f.appliesTo(cfg) }
}
/**
* A guard node for a variable in a negative condition, such as `x` in `if(!x)`.
* Can be added to a `isBarrier` in a data-flow configuration to block flow through such checks.
*/
class VarAccessBarrier extends DataFlow::Node {
VarAccessBarrier() {
exists(ConditionGuardNode guard, SsaRefinementNode refinement |
this = DataFlow::ssaDefinitionNode(refinement) and
refinement.getGuard() = guard and
guard.getTest() instanceof VarAccess and
guard.getOutcome() = false
)
}
}
/**
* Holds if there is a path without unmatched return steps from `source` to `sink`.
*/
predicate hasPathWithoutUnmatchedReturn(SourcePathNode source, SinkPathNode sink) {
exists(MidPathNode mid |
source.getASuccessor*() = mid and
sink = mid.getASuccessor() and
mid.getPathSummary().hasReturn() = false
)
}