codeql/javascript/ql/lib/semmle/javascript/dataflow/Configuration.qll

/**
 * Provides a class for performing customized inter-procedural data flow.
 *
 * The class in this module provides an interface for performing inter-procedural
 * data flow from a custom set of source nodes to a custom set of sink nodes.
 * Additional data flow edges can be specified, and conversely certain nodes or
 * edges can be designated as _barriers_ that block flow.
 *
 * NOTE: The API of this library is not stable yet and may change in
 *       the future.
 *
 *
 * # Technical overview
 *
 * This module implements a summarization-based inter-procedural data flow
 * analysis. Data flow is tracked through local variables, imports and (some)
 * object properties, as well as into and out of function calls. The latter
 * is done by computing function summaries that record which function parameters
 * and captured variables may flow into the function's return value.
 *
 * For example, for the function
 *
 * ```
 * function choice(b, x, y) {
 *   return b ? x : y;
 * }
 * ```
 *
 * we determine that its second and third (but not the first) parameter may
 * flow into its return value.
 *
 * Hence when we see a call `a = choice(b, c, d)`, we propagate flow from `c`
 * to `a` and from `d` to `a` (but not from `b` to `a`).
 *
 * The inter-procedural data flow graph is represented by class `PathNode`
 * and its member predicate `getASuccessor`. Each `PathNode` is a pair
 * of an underlying `DataFlow::Node` and a `DataFlow::Configuration`, which
 * can be accessed through member predicates `getNode` and `getConfiguration`,
 * respectively.
 *
 * # Implementation details
 *
 * Overall, flow is tracked forwards, starting at the sources and looking
 * for an inter-procedural path to a sink.
 *
 * Function summaries are computed by predicate `flowThroughCall`.
 * Predicate `flowStep` computes a "one-step" flow relation, where, however,
 * a single step may be based on a function summary, and hence already involve
 * inter-procedural flow.
 *
 * Flow steps are classified as being "call", "return" or "level": a call step
 * goes from an argument to a parameter, an return step from a return to a caller,
 * and a level step is either a step that does not involve function calls
 * or a step through a summary.
 *
 * Predicate `reachableFromSource` computes inter-procedural paths from
 * sources along the `flowStep` relation, keeping track of whether any of
 * these steps is a call step. Return steps are only allowed if no previous
 * step was a call step to avoid confusion between different call sites.
 *
 * Predicate `onPath` builds on `reachableFromSource` to compute full
 * paths from sources to sinks, this time starting with the sinks. Similar
 * to above, it keeps track of whether any of the steps from a node to a
 * sink is a return step, and only considers call steps for paths that do
 * not contain a return step.
 *
 * Finally, we build `PathNode`s for all nodes that appear on a path
 * computed by `onPath`.
 */

private import javascript
private import internal.FlowSteps
private import internal.AccessPaths
private import internal.CallGraphs
private import semmle.javascript.Unit
private import semmle.javascript.internal.CachedStages

/**
 * A data flow tracking configuration for finding inter-procedural paths from
 * sources to sinks.
 *
 * Each use of the data flow tracking library must define its own unique extension
 * of this abstract class. A configuration defines a set of relevant sources
 * (`isSource`) and sinks (`isSink`), and may additionally
 * define additional edges beyond the standard data flow edges (`isAdditionalFlowStep`)
 * and prohibit intermediate flow nodes and edges (`isBarrier`).
 */
abstract class Configuration extends string {
  bindingset[this]
  Configuration() { any() }

  /**
   * Gets the unique identifier of this configuration among all data flow tracking
   * configurations.
   */
  string getId() { result = this }

  /**
   * Holds if `source` is a relevant data flow source for this configuration.
   */
  predicate isSource(DataFlow::Node source) { none() }

  /**
   * Gets the flow label to associate with sources added by the 1-argument `isSource` predicate.
   *
   * For taint-tracking configurations, this defaults to `taint` and for other data-flow configurations
   * it defaults to `data`.
   *
   * Overriding this predicate is rarely needed, and overriding the 2-argument `isSource` predicate
   * should be preferred when possible.
   */
  FlowLabel getDefaultSourceLabel() { result = FlowLabel::data() }

  /**
   * Holds if `source` is a source of flow labeled with `lbl` that is relevant
   * for this configuration.
   */
  predicate isSource(DataFlow::Node source, FlowLabel lbl) { none() }

  /**
   * Holds if `sink` is a relevant data flow sink for this configuration.
   */
  predicate isSink(DataFlow::Node sink) { none() }

  /**
   * Holds if `sink` is a sink of flow labeled with `lbl` that is relevant
   * for this configuration.
   */
  predicate isSink(DataFlow::Node sink, FlowLabel lbl) { none() }

  /**
   * Holds if `src -> trg` should be considered as a flow edge
   * in addition to standard data flow edges.
   */
  predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg) { none() }

  /**
   * INTERNAL: This predicate should not normally be used outside the data flow
   * library.
   *
   * Holds if `src -> trg` should be considered as a flow edge
   * in addition to standard data flow edges, with `valuePreserving`
   * indicating whether the step preserves values or just taintedness.
   */
  predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg, boolean valuePreserving) {
    isAdditionalFlowStep(src, trg) and valuePreserving = true
  }

  /**
   * Holds if `src -> trg` is a flow edge converting flow with label `inlbl` to
   * flow with label `outlbl`.
   */
  predicate isAdditionalFlowStep(
    DataFlow::Node src, DataFlow::Node trg, FlowLabel inlbl, FlowLabel outlbl
  ) {
    none()
  }

  /**
   * Holds if the intermediate flow node `node` is prohibited.
   */
  predicate isBarrier(DataFlow::Node node) {
    exists(BarrierGuardNode guard |
      isBarrierGuardInternal(guard) and
      barrierGuardBlocksNode(guard, node, "")
    )
  }

  /**
   * Holds if flow from `pred` to `succ` is prohibited.
   */
  predicate isBarrierEdge(DataFlow::Node pred, DataFlow::Node succ) { none() }

  /**
   * Holds if flow with label `lbl` cannot flow from `pred` to `succ`.
   */
  predicate isBarrierEdge(DataFlow::Node pred, DataFlow::Node succ, FlowLabel lbl) { none() }

  /**
   * Holds if flow with label `lbl` cannot flow into `node`.
   */
  predicate isLabeledBarrier(DataFlow::Node node, FlowLabel lbl) {
    exists(BarrierGuardNode guard |
      isBarrierGuardInternal(guard) and
      barrierGuardBlocksNode(guard, node, lbl)
    )
    or
    none() // relax type inference to account for overriding
  }

  /**
   * Holds if data flow node `guard` can act as a barrier when appearing
   * in a condition.
   *
   * For example, if `guard` is the comparison expression in
   * `if(x == 'some-constant'){ ... x ... }`, it could block flow of
   * `x` into the "then" branch.
   */
  predicate isBarrierGuard(BarrierGuardNode guard) { none() }

  /**
   * Holds if `guard` is a barrier guard for this configuration, added through
   * `isBarrierGuard` or `AdditionalBarrierGuardNode`.
   */
  pragma[nomagic]
  private predicate isBarrierGuardInternal(BarrierGuardNode guard) {
    isBarrierGuard(guard)
    or
    guard.(AdditionalBarrierGuardNode).appliesTo(this)
  }

  /**
   * Holds if data may flow from `source` to `sink` for this configuration.
   */
  predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
    isSource(_, this, _) and
    isSink(_, this, _) and
    exists(SourcePathNode flowsource, SinkPathNode flowsink |
      hasFlowPath(flowsource, flowsink) and
      source = flowsource.getNode() and
      sink = flowsink.getNode()
    )
  }

  /**
   * Holds if data may flow from `source` to `sink` for this configuration.
   */
  predicate hasFlowPath(SourcePathNode source, SinkPathNode sink) {
    flowsTo(source, _, sink, _, this)
  }

  /**
   * EXPERIMENTAL. This API may change in the future.
   *
   * Holds if `pred` should be stored in the object `succ` under the property `prop`.
   * The object `succ` must be a `DataFlow::SourceNode` for the object wherein the value is stored.
   */
  predicate isAdditionalStoreStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
    none()
  }

  /**
   * EXPERIMENTAL. This API may change in the future.
   *
   * Holds if the property `prop` of the object `pred` should be loaded into `succ`.
   */
  predicate isAdditionalLoadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }

  /**
   * EXPERIMENTAL. This API may change in the future.
   *
   * Holds if the property `prop` should be copied from the object `pred` to the object `succ`.
   */
  predicate isAdditionalLoadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
    none()
  }

  /**
   * EXPERIMENTAL. This API may change in the future.
   *
   * Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
   */
  predicate isAdditionalLoadStoreStep(
    DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
  ) {
    none()
  }
}

/**
 * A label describing the kind of information tracked by a flow configuration.
 *
 * There are two standard labels "data" and "taint".
 * - "data" only propagates along value-preserving data flow, such as assignments
 *   and parameter-passing, and is the default flow source for a `DataFlow::Configuration`.
 * - "taint" additionally permits flow through transformations such as string operations,
 *   and is the default flow source for a `TaintTracking::Configuration`.
 */
abstract class FlowLabel extends string {
  bindingset[this]
  FlowLabel() { any() }

  /**
   * Holds if this is the standard `FlowLabel::data()` flow label,
   * describing values that directly originate from a flow source.
   */
  final predicate isData() { this = FlowLabel::data() }

  /**
   * Holds if this is the standard `FlowLabel::taint()` flow label,
   * describing values that are influenced ("tainted") by a flow
   * source, but not necessarily directly derived from it.
   */
  final predicate isTaint() { this = FlowLabel::taint() }

  /**
   * Holds if this is one of the standard flow labels `FlowLabel::data()`
   * or `FlowLabel::taint()`.
   */
  final predicate isDataOrTaint() { isData() or isTaint() }
}

/**
 * A kind of taint tracked by a taint-tracking configuration.
 *
 * This is an alias of `FlowLabel`, so the two types can be used interchangeably.
 */
class TaintKind = FlowLabel;

/**
 * A standard flow label, that is, either `FlowLabel::data()` or `FlowLabel::taint()`.
 */
class StandardFlowLabel extends FlowLabel {
  StandardFlowLabel() { this = "data" or this = "taint" }
}

module FlowLabel {
  /**
   * Gets the standard flow label for describing values that directly originate from a flow source.
   */
  FlowLabel data() { result = "data" }

  /**
   * Gets the standard flow label for describing values that are influenced ("tainted") by a flow
   * source, but not necessarily directly derived from it.
   */
  FlowLabel taint() { result = "taint" }
}

/**
 * A node that can act as a barrier when appearing in a condition.
 *
 * To add a barrier guard to a configuration, define a subclass of this class overriding the
 * `blocks` predicate, and then extend the configuration's `isBarrierGuard` predicate to include
 * the new class.
 *
 * Note that it is generally a good idea to make the characteristic predicate of barrier guard
 * classes as precise as possible: if two subclasses of `BarrierGuardNode` overlap, their
 * implementations of `blocks` will _both_ apply to any configuration that includes either of them.
 */
abstract class BarrierGuardNode extends DataFlow::Node {
  /**
   * Holds if this node blocks expression `e` provided it evaluates to `outcome`.
   *
   * This will block all flow labels.
   */
  abstract predicate blocks(boolean outcome, Expr e);

  /**
   * Holds if this node blocks expression `e` from flow of type `label`, provided it evaluates to `outcome`.
   */
  predicate blocks(boolean outcome, Expr e, FlowLabel label) { none() }
}

/**
 * Holds if data flow node `guard` acts as a barrier for data flow.
 *
 * `label` is bound to the blocked label, or the empty string if all labels should be blocked.
 */
pragma[nomagic]
private predicate barrierGuardBlocksExpr(
  BarrierGuardNode guard, boolean outcome, Expr test, string label
) {
  guard.blocks(outcome, test) and label = ""
  or
  guard.blocks(outcome, test, label)
  or
  // Handle labelled barrier guard functions specially, to avoid negative recursion
  // through the non-abstract 3-argument version of blocks().
  guard.(AdditionalBarrierGuardCall).internalBlocksLabel(outcome, test, label)
}

/**
 * Holds if `guard` may block the flow of a value reachable through exploratory flow.
 */
pragma[nomagic]
private predicate barrierGuardIsRelevant(BarrierGuardNode guard) {
  exists(Expr e |
    barrierGuardBlocksExpr(guard, _, e, _) and
    isRelevantForward(e.flow(), _)
  )
}

/**
 * Holds if data flow node `guard` acts as a barrier for data flow due to aliasing through
 * an access path.
 *
 * `label` is bound to the blocked label, or the empty string if all labels should be blocked.
 */
pragma[nomagic]
private predicate barrierGuardBlocksAccessPath(
  BarrierGuardNode guard, boolean outcome, AccessPath ap, string label
) {
  barrierGuardIsRelevant(guard) and
  barrierGuardBlocksExpr(guard, outcome, ap.getAnInstance(), label)
}

/**
 * Holds if there exists an input variable of `ref` that blocks the label `label`.
 *
 * This predicate is outlined to give the optimizer a hint about the join ordering.
 */
pragma[nomagic]
private predicate barrierGuardBlocksSsaRefinement(
  BarrierGuardNode guard, boolean outcome, SsaRefinementNode ref, string label
) {
  barrierGuardIsRelevant(guard) and
  guard.getEnclosingExpr() = ref.getGuard().getTest() and
  forex(SsaVariable input | input = ref.getAnInput() |
    barrierGuardBlocksExpr(guard, outcome, input.getAUse(), label)
  )
}

/**
 * Holds if the result of `guard` is used in the branching condition `cond`.
 *
 * `outcome` is bound to the outcome of `cond` for join-ordering purposes.
 */
pragma[nomagic]
private predicate barrierGuardUsedInCondition(
  BarrierGuardNode guard, ConditionGuardNode cond, boolean outcome
) {
  barrierGuardIsRelevant(guard) and
  outcome = cond.getOutcome() and
  (
    cond.getTest() = guard.getEnclosingExpr()
    or
    cond.getTest().flow().getImmediatePredecessor+() = guard
  )
}

/**
 * Holds if data flow node `nd` acts as a barrier for data flow, possibly due to aliasing
 * through an access path.
 *
 * `label` is bound to the blocked label, or the empty string if all labels should be blocked.
 */
pragma[nomagic]
private predicate barrierGuardBlocksNode(BarrierGuardNode guard, DataFlow::Node nd, string label) {
  // 1) `nd` is a use of a refinement node that blocks its input variable
  exists(SsaRefinementNode ref, boolean outcome |
    nd = DataFlow::ssaDefinitionNode(ref) and
    outcome = ref.getGuard().(ConditionGuardNode).getOutcome() and
    barrierGuardBlocksSsaRefinement(guard, outcome, ref, label)
  )
  or
  // 2) `nd` is an instance of an access path `p`, and dominated by a barrier for `p`
  exists(AccessPath p, BasicBlock bb, ConditionGuardNode cond, boolean outcome |
    nd = DataFlow::valueNode(p.getAnInstanceIn(bb)) and
    barrierGuardUsedInCondition(guard, cond, outcome) and
    barrierGuardBlocksAccessPath(guard, outcome, p, label) and
    cond.dominates(bb)
  )
}

/**
 * Holds if `guard` should block flow along the edge `pred -> succ`.
 *
 * `label` is bound to the blocked label, or the empty string if all labels should be blocked.
 */
pragma[nomagic]
private predicate barrierGuardBlocksEdge(
  BarrierGuardNode guard, DataFlow::Node pred, DataFlow::Node succ, string label
) {
  exists(
    SsaVariable input, SsaPhiNode phi, BasicBlock bb, ConditionGuardNode cond, boolean outcome
  |
    bb = getADominatedBasicBlock(guard, cond) and
    pred = DataFlow::ssaDefinitionNode(input) and
    succ = DataFlow::ssaDefinitionNode(phi) and
    input = phi.getInputFromBlock(bb) and
    outcome = cond.getOutcome() and
    barrierGuardBlocksExpr(guard, outcome, input.getAUse(), label)
  )
}

/**
 * Gets a basicblock that is dominated by `cond`, where the test for `cond` cond is `guard`.
 *
 * This predicate exists to get a better join-order for the `barrierGuardBlocksEdge` predicate above.
 */
pragma[noinline]
private BasicBlock getADominatedBasicBlock(BarrierGuardNode guard, ConditionGuardNode cond) {
  barrierGuardIsRelevant(guard) and
  guard.getEnclosingExpr() = cond.getTest() and
  cond.dominates(result)
}

/**
 * Holds if there is a barrier edge `pred -> succ` in `cfg` either through an explicit barrier edge
 * or one implied by a barrier guard.
 *
 * Only holds for barriers that should apply to all flow labels.
 */
private predicate isBarrierEdge(Configuration cfg, DataFlow::Node pred, DataFlow::Node succ) {
  cfg.isBarrierEdge(pred, succ)
  or
  exists(DataFlow::BarrierGuardNode guard |
    cfg.isBarrierGuard(guard) and
    barrierGuardBlocksEdge(guard, pred, succ, "")
  )
}

/**
 * Holds if there is a labeled barrier edge `pred -> succ` in `cfg` either through an explicit barrier edge
 * or one implied by a barrier guard.
 */
private predicate isLabeledBarrierEdge(
  Configuration cfg, DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel label
) {
  cfg.isBarrierEdge(pred, succ, label)
  or
  exists(DataFlow::BarrierGuardNode guard |
    cfg.isBarrierGuard(guard) and
    barrierGuardBlocksEdge(guard, pred, succ, label)
  )
}

/**
 * A guard node that only blocks specific labels.
 */
abstract class LabeledBarrierGuardNode extends BarrierGuardNode {
  override predicate blocks(boolean outcome, Expr e) { none() }
}

/**
 * A data flow edge that should be added to all data flow configurations in
 * addition to standard data flow edges.
 *
 * This class is a singleton, and thus subclasses do not need to specify a characteristic predicate.
 *
 * Note: For performance reasons, all subclasses of this class should be part
 * of the standard library. Override `Configuration::isAdditionalFlowStep`
 * for analysis-specific flow steps.
 */
class SharedFlowStep extends Unit {
  /**
   * Holds if `pred` &rarr; `succ` should be considered a data flow edge.
   */
  predicate step(DataFlow::Node pred, DataFlow::Node succ) { none() }

  /**
   * Holds if `pred` &rarr; `succ` should be considered a data flow edge
   * transforming values with label `predlbl` to have label `succlbl`.
   */
  predicate step(
    DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel predlbl,
    DataFlow::FlowLabel succlbl
  ) {
    none()
  }

  /**
   * Holds if `pred` should be stored in the object `succ` under the property `prop`.
   * The object `succ` must be a `DataFlow::SourceNode` for the object wherein the value is stored.
   */
  predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) { none() }

  /**
   * Holds if the property `prop` of the object `pred` should be loaded into `succ`.
   */
  predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }

  /**
   * Holds if the property `prop` should be copied from the object `pred` to the object `succ`.
   */
  predicate loadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }

  /**
   * Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
   */
  predicate loadStoreStep(
    DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
  ) {
    none()
  }
}

/**
 * Contains predicates for accessing the steps contributed by `SharedFlowStep` subclasses.
 */
cached
module SharedFlowStep {
  cached
  private module Internal {
    // Forces this to be part of the `FlowSteps` stage.
    // We use a public predicate in a private module to avoid warnings about this being unused.
    cached
    predicate forceStage() { Stages::FlowSteps::ref() }
  }

  /**
   * Holds if `pred` &rarr; `succ` should be considered a data flow edge.
   */
  cached
  predicate step(DataFlow::Node pred, DataFlow::Node succ) {
    any(SharedFlowStep s).step(pred, succ)
  }

  /**
   * Holds if `pred` &rarr; `succ` should be considered a data flow edge
   * transforming values with label `predlbl` to have label `succlbl`.
   */
  cached
  predicate step(
    DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel predlbl,
    DataFlow::FlowLabel succlbl
  ) {
    any(SharedFlowStep s).step(pred, succ, predlbl, succlbl)
  }

  /**
   * Holds if `pred` should be stored in the object `succ` under the property `prop`.
   * The object `succ` must be a `DataFlow::SourceNode` for the object wherein the value is stored.
   */
  cached
  predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
    any(SharedFlowStep s).storeStep(pred, succ, prop)
  }

  /**
   * Holds if the property `prop` of the object `pred` should be loaded into `succ`.
   */
  cached
  predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
    any(SharedFlowStep s).loadStep(pred, succ, prop)
  }

  /**
   * Holds if the property `prop` should be copied from the object `pred` to the object `succ`.
   */
  cached
  predicate loadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
    any(SharedFlowStep s).loadStoreStep(pred, succ, prop)
  }

  /**
   * Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
   */
  cached
  predicate loadStoreStep(
    DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
  ) {
    any(SharedFlowStep s).loadStoreStep(pred, succ, loadProp, storeProp)
  }
}

/**
 * A collection of pseudo-properties that are used in multiple files.
 *
 * A pseudo-property represents the location where some value is stored in an object.
 *
 * For use with load/store steps in `DataFlow::SharedFlowStep` and TypeTracking.
 */
module PseudoProperties {
  bindingset[s]
  private string pseudoProperty(string s) { result = "$" + s + "$" }

  bindingset[s, v]
  private string pseudoProperty(string s, string v) { result = "$" + s + "|" + v + "$" }

  /**
   * Gets a pseudo-property for the location of elements in a `Set`
   */
  string setElement() { result = pseudoProperty("setElement") }

  /**
   * Gets a pseudo-property for the location of elements in a JavaScript iterator.
   */
  string iteratorElement() { result = pseudoProperty("iteratorElement") }

  /**
   * Gets a pseudo-property for the location of elements in an `Array`.
   */
  string arrayElement() { result = pseudoProperty("arrayElement") }

  /**
   * Gets a pseudo-property for the location of the `i`th element in an `Array`.
   */
  bindingset[i]
  string arrayElement(int i) {
    i < 5 and result = i.toString()
    or
    result = arrayElement()
  }

  /**
   * Gets a pseudo-property for the location of elements in some array-like object. (Set, Array, or Iterator).
   */
  string arrayLikeElement() { result = [setElement(), iteratorElement(), arrayElement()] }

  /**
   * Gets a pseudo-property for the location of map values, where the key is unknown.
   */
  string mapValueUnknownKey() { result = pseudoProperty("mapValueUnknownKey") }

  /**
   * Gets a pseudo-property for the location of all the values in a map.
   */
  string mapValueAll() { result = pseudoProperty("allMapValues") }

  /**
   * Gets a pseudo-property for the location of a map value where the key is `key`.
   * The string value of the `key` is encoded in the result, and there is only a result if the string value of `key` is known.
   */
  pragma[inline]
  string mapValueKnownKey(DataFlow::Node key) {
    result = mapValueKey(any(string s | key.mayHaveStringValue(s)))
  }

  /**
   * Gets a pseudo-property for the location of a map value where the key is `key`.
   */
  bindingset[key]
  string mapValueKey(string key) { result = pseudoProperty("mapValue", key) }

  /**
   * Gets a pseudo-property for the location of a map value where the key is `key`.
   */
  pragma[inline]
  string mapValue(DataFlow::Node key) {
    result = mapValueKnownKey(key)
    or
    not exists(mapValueKnownKey(key)) and
    result = mapValueUnknownKey()
  }
}

/**
 * A data flow node that should be considered a source for some specific configuration,
 * in addition to any other sources that configuration may recognize.
 */
abstract class AdditionalSource extends DataFlow::Node {
  /**
   * Holds if this data flow node should be considered a source node for
   * configuration `cfg`.
   */
  predicate isSourceFor(Configuration cfg) { none() }

  /**
   * Holds if this data flow node should be considered a source node for
   * values labeled with `lbl` under configuration `cfg`.
   */
  predicate isSourceFor(Configuration cfg, FlowLabel lbl) { none() }
}

/**
 * A data flow node that should be considered a sink for some specific configuration,
 * in addition to any other sinks that configuration may recognize.
 */
abstract class AdditionalSink extends DataFlow::Node {
  /**
   * Holds if this data flow node should be considered a sink node for
   * configuration `cfg`.
   */
  predicate isSinkFor(Configuration cfg) { none() }

  /**
   * Holds if this data flow node should be considered a sink node for
   * values labeled with `lbl` under configuration `cfg`.
   */
  predicate isSinkFor(Configuration cfg, FlowLabel lbl) { none() }
}

/**
 * Additional flow step to model flow from import specifiers into the SSA variable
 * corresponding to the imported variable.
 */
private class FlowStepThroughImport extends SharedFlowStep {
  override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
    exists(ImportSpecifier specifier |
      pred = DataFlow::valueNode(specifier) and
      succ = DataFlow::ssaDefinitionNode(SSA::definition(specifier))
    )
  }
}

/**
 * Holds if there is a flow step from `pred` to `succ` described by `summary`
 * under configuration `cfg`, disregarding barriers.
 *
 * Summary steps through function calls are not taken into account.
 */
pragma[inline]
private predicate basicFlowStepNoBarrier(
  DataFlow::Node pred, DataFlow::Node succ, PathSummary summary, DataFlow::Configuration cfg
) {
  // Local flow
  exists(FlowLabel predlbl, FlowLabel succlbl |
    localFlowStep(pred, succ, cfg, predlbl, succlbl) and
    not cfg.isBarrierEdge(pred, succ) and
    summary = MkPathSummary(false, false, predlbl, succlbl)
  )
  or
  // Flow through properties of objects
  propertyFlowStep(pred, succ) and
  summary = PathSummary::level()
  or
  // Flow through global variables
  globalFlowStep(pred, succ) and
  summary = PathSummary::level()
  or
  // Flow into function
  callStep(pred, succ) and
  summary = PathSummary::call()
  or
  // Flow out of function
  returnStep(pred, succ) and
  summary = PathSummary::return()
}

/**
 * Holds if there is a flow step from `pred` to `succ` under configuration `cfg`,
 * including both basic flow steps and steps into/out of properties.
 *
 * This predicate is field insensitive (it does not distinguish between `x` and `x.p`)
 * and hence should only be used for purposes of approximation.
 */
pragma[noinline]
private predicate exploratoryFlowStep(
  DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg
) {
  isRelevantForward(pred, cfg) and
  (
    basicFlowStepNoBarrier(pred, succ, _, cfg) or
    exploratoryLoadStep(pred, succ, cfg) or
    isAdditionalLoadStoreStep(pred, succ, _, _, cfg) or
    // the following three disjuncts taken together over-approximate flow through
    // higher-order calls
    exploratoryCallbackStep(pred, succ) or
    succ = pred.(DataFlow::FunctionNode).getAParameter() or
    exploratoryBoundInvokeStep(pred, succ)
  )
}

/**
 * Holds if `nd` is a source node for configuration `cfg`.
 */
private predicate isSource(DataFlow::Node nd, DataFlow::Configuration cfg, FlowLabel lbl) {
  (cfg.isSource(nd) or nd.(AdditionalSource).isSourceFor(cfg)) and
  lbl = cfg.getDefaultSourceLabel()
  or
  nd.(AdditionalSource).isSourceFor(cfg, lbl)
  or
  cfg.isSource(nd, lbl)
}

/**
 * Holds if `nd` is a sink node for configuration `cfg`.
 */
private predicate isSink(DataFlow::Node nd, DataFlow::Configuration cfg, FlowLabel lbl) {
  (cfg.isSink(nd) or nd.(AdditionalSink).isSinkFor(cfg)) and
  lbl = any(StandardFlowLabel f)
  or
  nd.(AdditionalSink).isSinkFor(cfg, lbl)
  or
  cfg.isSink(nd, lbl)
}

/**
 * Holds if there exists a load-step from `pred` to `succ` under configuration `cfg`,
 * and the forwards exploratory flow has found a relevant store-step with the same property as the load-step.
 */
private predicate exploratoryLoadStep(
  DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg
) {
  exists(string prop | prop = getAForwardRelevantLoadProperty(cfg) |
    isAdditionalLoadStep(pred, succ, prop, cfg)
    or
    basicLoadStep(pred, succ, prop)
  )
}

/**
 * Gets a property where the forwards exploratory flow has found a relevant store-step with that property.
 * The property is therefore relevant for load-steps in the forward exploratory flow.
 *
 * This private predicate is only used in `exploratoryLoadStep`, and exists as a separate predicate to give the compiler a hint about join-ordering.
 */
pragma[noinline]
private string getAForwardRelevantLoadProperty(DataFlow::Configuration cfg) {
  exists(DataFlow::Node previous | isRelevantForward(previous, cfg) |
    basicStoreStep(previous, _, result) or
    isAdditionalStoreStep(previous, _, result, cfg)
  )
  or
  result = getAPropertyUsedInLoadStore(cfg)
}

/**
 * Gets a property that is used in an `additionalLoadStoreStep` where the loaded and stored property are not the same.
 *
 * The properties from this predicate are used as a white-list of properties for load/store steps that should always be considered in the exploratory flow.
 */
private string getAPropertyUsedInLoadStore(DataFlow::Configuration cfg) {
  exists(string loadProp, string storeProp |
    isAdditionalLoadStoreStep(_, _, loadProp, storeProp, cfg) and
    storeProp != loadProp and
    result = [storeProp, loadProp]
  )
}

/**
 * Holds if there exists a store-step from `pred` to `succ` under configuration `cfg`,
 * and somewhere in the program there exists a load-step that could possibly read the stored value.
 */
pragma[noinline]
private predicate exploratoryForwardStoreStep(
  DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg
) {
  exists(string prop |
    basicLoadStep(_, _, prop) or
    isAdditionalLoadStep(_, _, prop, cfg) or
    prop = getAPropertyUsedInLoadStore(cfg)
  |
    isAdditionalStoreStep(pred, succ, prop, cfg) or
    basicStoreStep(pred, succ, prop)
  )
}

/**
 * Holds if there exists a store-step from `pred` to `succ` under configuration `cfg`,
 * and `succ` has been found to be relevant during the backwards exploratory flow,
 * and the backwards exploratory flow has found a relevant load-step with the same property as the store-step.
 */
private predicate exploratoryBackwardStoreStep(
  DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg
) {
  exists(string prop | prop = getABackwardsRelevantStoreProperty(cfg) |
    isAdditionalStoreStep(pred, succ, prop, cfg) or
    basicStoreStep(pred, succ, prop)
  )
}

/**
 * Gets a property where the backwards exploratory flow has found a relevant load-step with that property.
 * The property is therefore relevant for store-steps in the backwards exploratory flow.
 *
 * This private predicate is only used in `exploratoryBackwardStoreStep`, and exists as a separate predicate to give the compiler a hint about join-ordering.
 */
pragma[noinline]
private string getABackwardsRelevantStoreProperty(DataFlow::Configuration cfg) {
  exists(DataFlow::Node mid | isRelevant(mid, cfg) |
    basicLoadStep(mid, _, result) or
    isAdditionalLoadStep(mid, _, result, cfg)
  )
  or
  result = getAPropertyUsedInLoadStore(cfg)
}

/**
 * Holds if `nd` may be reachable from a source under `cfg`.
 *
 * No call/return matching is done, so this is a relatively coarse over-approximation.
 */
private predicate isRelevantForward(DataFlow::Node nd, DataFlow::Configuration cfg) {
  isSource(nd, cfg, _) and isLive()
  or
  exists(DataFlow::Node mid |
    exploratoryFlowStep(mid, nd, cfg)
    or
    isRelevantForward(mid, cfg) and
    exploratoryForwardStoreStep(mid, nd, cfg)
  )
}

/**
 * Holds if `nd` may be on a path from a source to a sink under `cfg`.
 *
 * No call/return matching is done, so this is a relatively coarse over-approximation.
 */
private predicate isRelevant(DataFlow::Node nd, DataFlow::Configuration cfg) {
  isRelevantForward(nd, cfg) and isSink(nd, cfg, _)
  or
  exists(DataFlow::Node mid | isRelevant(mid, cfg) | isRelevantBackStep(mid, nd, cfg))
}

/**
 * Holds if there is backwards data-flow step from `mid` to `nd` under `cfg`.
 */
private predicate isRelevantBackStep(
  DataFlow::Node mid, DataFlow::Node nd, DataFlow::Configuration cfg
) {
  exploratoryFlowStep(nd, mid, cfg)
  or
  isRelevantForward(nd, cfg) and
  exploratoryBackwardStoreStep(nd, mid, cfg)
}

/**
 * Holds if `pred` is an input to `f` which is passed to `succ` at `invk`; that is,
 * either `pred` is an argument of `f` and `succ` the corresponding parameter, or
 * `pred` is a variable definition whose value is captured by `f` at `succ`.
 */
private predicate callInputStep(
  Function f, DataFlow::Node invk, DataFlow::Node pred, DataFlow::Node succ,
  DataFlow::Configuration cfg
) {
  (
    isRelevant(pred, cfg) and
    argumentPassing(invk, pred, f, succ)
    or
    isRelevant(pred, cfg) and
    exists(LocalVariable variable, SsaDefinition def |
      pred = DataFlow::capturedVariableNode(variable) and
      calls(invk, f) and
      captures(f, variable, def) and
      succ = DataFlow::ssaDefinitionNode(def)
    )
  ) and
  not cfg.isBarrier(succ) and
  not isBarrierEdge(cfg, pred, succ)
}

/**
 * Holds if `input`, which is either an argument to `f` at `invk` or a definition
 * that is captured by `f`, may flow to `nd` under configuration `cfg` (possibly through
 * callees, but not containing any unmatched calls or returns) along a path summarized by
 * `summary`.
 *
 * Note that the summary does not take the initial step from argument to parameter
 * into account.
 */
pragma[nomagic]
private predicate reachableFromInput(
  Function f, DataFlow::Node invk, DataFlow::Node input, DataFlow::Node nd,
  DataFlow::Configuration cfg, PathSummary summary
) {
  callInputStep(f, invk, input, nd, cfg) and
  summary = PathSummary::level() and
  not cfg.isLabeledBarrier(nd, summary.getEndLabel())
  or
  exists(DataFlow::Node mid, PathSummary oldSummary |
    reachableFromInput(f, invk, input, mid, cfg, oldSummary) and
    appendStep(mid, cfg, oldSummary, nd, summary)
  )
}

/**
 * Holds if there is a level step from `pred` to `succ` under `cfg` that can be appended
 * to a path represented by `oldSummary` yielding a path represented by `newSummary`.
 */
pragma[noinline]
private predicate appendStep(
  DataFlow::Node pred, DataFlow::Configuration cfg, PathSummary oldSummary, DataFlow::Node succ,
  PathSummary newSummary
) {
  exists(PathSummary stepSummary |
    flowStep(pred, cfg, succ, stepSummary) and
    stepSummary.isLevel() and
    newSummary = oldSummary.append(stepSummary)
  )
}

/**
 * Holds if a function invoked at `output` may return an expression into which `input`,
 * which is either an argument or a definition captured by the function, flows under
 * configuration `cfg`, possibly through callees.
 */
private predicate flowThroughCall(
  DataFlow::Node input, DataFlow::Node output, DataFlow::Configuration cfg, PathSummary summary
) {
  exists(Function f, DataFlow::FunctionReturnNode ret |
    ret.getFunction() = f and
    (calls(output, f) or callsBound(output, f, _)) and // Do not consider partial calls
    reachableFromInput(f, output, input, ret, cfg, summary) and
    not isBarrierEdge(cfg, ret, output) and
    not isLabeledBarrierEdge(cfg, ret, output, summary.getEndLabel()) and
    not cfg.isLabeledBarrier(output, summary.getEndLabel())
  )
  or
  exists(Function f, LocalVariable variable |
    reachableFromInput(f, _, input, output, cfg, summary) and
    output = DataFlow::capturedVariableNode(variable) and
    getCapturedVariableDepth(variable) < getContainerDepth(f) and // Only step outwards
    not cfg.isLabeledBarrier(output, summary.getEndLabel())
  )
  or
  exists(Function f, DataFlow::Node invk, DataFlow::Node ret |
    DataFlow::exceptionalFunctionReturnNode(ret, f) and
    DataFlow::exceptionalInvocationReturnNode(output, invk.asExpr()) and
    (calls(invk, f) or callsBound(invk, f, _)) and
    reachableFromInput(f, invk, input, ret, cfg, summary) and
    not isBarrierEdge(cfg, ret, output) and
    not isLabeledBarrierEdge(cfg, ret, output, summary.getEndLabel()) and
    not cfg.isLabeledBarrier(output, summary.getEndLabel())
  )
  or
  // exception thrown inside an immediately awaited function call.
  exists(DataFlow::FunctionNode f, DataFlow::Node invk, DataFlow::Node ret |
    f.getFunction().isAsync()
  |
    (calls(invk, f.getFunction()) or callsBound(invk, f.getFunction(), _)) and
    reachableFromInput(f.getFunction(), invk, input, ret, cfg, summary) and
    output = invk.asExpr().getExceptionTarget() and
    f.getExceptionalReturn() = getThrowTarget(ret) and
    invk = getAwaitOperand(_)
  )
}

/**
 * Holds if `pred` may flow into property `prop` of `succ` under configuration `cfg`
 * along a path summarized by `summary`.
 */
pragma[nomagic]
private predicate storeStep(
  DataFlow::Node pred, DataFlow::Node succ, string prop, DataFlow::Configuration cfg,
  PathSummary summary
) {
  isRelevant(pred, cfg) and
  basicStoreStep(pred, succ, prop) and
  summary = PathSummary::level()
  or
  isRelevant(pred, cfg) and
  isAdditionalStoreStep(pred, succ, prop, cfg) and
  summary = PathSummary::level()
  or
  exists(Function f, DataFlow::Node mid, DataFlow::Node invk |
    not f.isAsyncOrGenerator() and invk = succ
    or
    // store in an immediately awaited function call
    f.isAsync() and
    invk = getAwaitOperand(succ)
  |
    // `f` stores its parameter `pred` in property `prop` of a value that flows back to the caller,
    // and `succ` is an invocation of `f`
    reachableFromInput(f, invk, pred, mid, cfg, summary) and
    (
      returnedPropWrite(f, _, prop, mid)
      or
      exists(DataFlow::SourceNode base | base.flowsToExpr(f.getAReturnedExpr()) |
        isAdditionalStoreStep(mid, base, prop, cfg)
      )
      or
      invk instanceof DataFlow::NewNode and
      receiverPropWrite(f, prop, mid)
    )
  )
}

/**
 * Gets a dataflow-node for the operand of the await-expression `await`.
 */
private DataFlow::Node getAwaitOperand(DataFlow::Node await) {
  exists(AwaitExpr awaitExpr |
    result = awaitExpr.getOperand().getUnderlyingValue().flow() and
    await.asExpr() = awaitExpr
  )
}

/**
 * Holds if property `prop` of `arg` is read inside a function and returned to the call `succ`.
 */
private predicate parameterPropRead(
  DataFlow::Node arg, string prop, DataFlow::Node succ, DataFlow::Configuration cfg,
  PathSummary summary
) {
  exists(Function f, DataFlow::Node read, DataFlow::Node invk, DataFlow::Node parm |
    reachesReturn(f, read, cfg, summary) and
    parameterPropReadStep(parm, read, prop, cfg, arg, invk, f, succ)
  )
}

// all the non-recursive parts of parameterPropRead outlined into a precomputed predicate
pragma[noinline]
private predicate parameterPropReadStep(
  DataFlow::SourceNode parm, DataFlow::Node read, string prop, DataFlow::Configuration cfg,
  DataFlow::Node arg, DataFlow::Node invk, Function f, DataFlow::Node succ
) {
  (
    not f.isAsyncOrGenerator() and invk = succ
    or
    // load from an immediately awaited function call
    f.isAsync() and
    invk = getAwaitOperand(succ)
  ) and
  callInputStep(f, invk, arg, parm, cfg) and
  (
    read = parm.getAPropertyRead(prop)
    or
    exists(DataFlow::Node use | parm.flowsTo(use) | isAdditionalLoadStep(use, read, prop, cfg))
  )
}

/**
 * Holds if `read` may flow into a return statement of `f` under configuration `cfg`
 * (possibly through callees) along a path summarized by `summary`.
 */
private predicate reachesReturn(
  Function f, DataFlow::Node read, DataFlow::Configuration cfg, PathSummary summary
) {
  isRelevant(read, cfg) and
  returnExpr(f, read, _) and
  summary = PathSummary::level() and
  callInputStep(f, _, _, _, _) // check that a relevant result can exist.
  or
  exists(DataFlow::Node mid, PathSummary oldSummary, PathSummary newSummary |
    flowStep(read, cfg, mid, oldSummary) and
    reachesReturn(f, mid, cfg, newSummary) and
    summary = oldSummary.append(newSummary)
  )
}

/**
 * Holds if the property `prop` of the object `pred` should be loaded into `succ`.
 */
private predicate isAdditionalLoadStep(
  DataFlow::Node pred, DataFlow::Node succ, string prop, DataFlow::Configuration cfg
) {
  SharedFlowStep::loadStep(pred, succ, prop)
  or
  cfg.isAdditionalLoadStep(pred, succ, prop)
}

/**
 * Holds if `pred` should be stored in the object `succ` under the property `prop`.
 */
private predicate isAdditionalStoreStep(
  DataFlow::Node pred, DataFlow::Node succ, string prop, DataFlow::Configuration cfg
) {
  SharedFlowStep::storeStep(pred, succ, prop)
  or
  cfg.isAdditionalStoreStep(pred, succ, prop)
}

/**
 * Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
 */
private predicate isAdditionalLoadStoreStep(
  DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp,
  DataFlow::Configuration cfg
) {
  SharedFlowStep::loadStoreStep(pred, succ, loadProp, storeProp)
  or
  cfg.isAdditionalLoadStoreStep(pred, succ, loadProp, storeProp)
  or
  loadProp = storeProp and
  (
    SharedFlowStep::loadStoreStep(pred, succ, loadProp)
    or
    cfg.isAdditionalLoadStoreStep(pred, succ, loadProp)
  )
}

/**
 * Holds if property `prop` of `pred` may flow into `succ` along a path summarized by
 * `summary`.
 */
private predicate loadStep(
  DataFlow::Node pred, DataFlow::Node succ, string prop, DataFlow::Configuration cfg,
  PathSummary summary
) {
  isRelevant(pred, cfg) and
  basicLoadStep(pred, succ, prop) and
  summary = PathSummary::level()
  or
  isRelevant(pred, cfg) and
  isAdditionalLoadStep(pred, succ, prop, cfg) and
  summary = PathSummary::level()
  or
  parameterPropRead(pred, prop, succ, cfg, summary)
}

/**
 * Holds if there is flow to `base.startProp`, and `base.startProp` flows to `nd.endProp` under `cfg/summary`.
 *
 * If `onlyRelevantInCall` is true, the `base` object will not be propagated out of return edges, because
 * the flow that originally reached `base.startProp` used a call edge.
 */
pragma[noopt]
private predicate reachableFromStoreBase(
  string startProp, string endProp, DataFlow::Node base, DataFlow::Node nd,
  DataFlow::Configuration cfg, TPathSummary summary, boolean onlyRelevantInCall
) {
  exists(TPathSummary s1, TPathSummary s2, DataFlow::Node rhs |
    storeStep(rhs, nd, startProp, cfg, s2) and
    endProp = startProp and
    base = nd and
    exists(boolean hasCall, DataFlow::FlowLabel data |
      hasCall = hasCall(s2) and
      data = DataFlow::FlowLabel::data() and
      summary = MkPathSummary(false, hasCall, data, data)
    )
  |
    reachableFromSource(rhs, cfg, s1) and
    onlyRelevantInCall = hasCall(s1)
    or
    reachableFromStoreBase(_, _, _, rhs, cfg, s1, onlyRelevantInCall)
  )
  or
  exists(DataFlow::Node mid, PathSummary oldSummary, PathSummary newSummary |
    reachableFromStoreBase(startProp, endProp, base, mid, cfg, oldSummary, onlyRelevantInCall) and
    flowStep(mid, cfg, nd, newSummary) and
    exists(boolean hasReturn |
      hasReturn = newSummary.hasReturn() and
      onlyRelevantInCall.booleanAnd(hasReturn) = false
    )
    or
    exists(string midProp |
      reachableFromStoreBase(startProp, midProp, base, mid, cfg, oldSummary, onlyRelevantInCall) and
      isAdditionalLoadStoreStep(mid, nd, midProp, endProp, cfg) and
      newSummary = PathSummary::level()
    )
  |
    summary = oldSummary.appendValuePreserving(newSummary)
  )
}

private boolean hasCall(PathSummary summary) { result = summary.hasCall() }

/**
 * Holds if the value of `pred` is written to a property of some base object, and that base
 * object may flow into the base of property read `succ` under configuration `cfg` along
 * a path summarized by `summary`.
 *
 * In other words, `pred` may flow to `succ` through a property.
 */
pragma[noinline]
private predicate flowThroughProperty(
  DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg, PathSummary summary
) {
  exists(PathSummary oldSummary, PathSummary newSummary |
    storeToLoad(pred, succ, cfg, oldSummary, newSummary) and
    summary = oldSummary.append(newSummary)
  )
}

/**
 * Holds if the value of `pred` is written to a property of some base object, and that base
 * object may flow into the base of property read `succ` under configuration `cfg` along
 * a path whose last step is summarized by `newSummary`, and the previous steps are summarized
 * by `oldSummary`.
 */
pragma[noinline]
private predicate storeToLoad(
  DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg, PathSummary oldSummary,
  PathSummary newSummary
) {
  exists(
    string storeProp, string loadProp, DataFlow::Node storeBase, DataFlow::Node loadBase,
    PathSummary s1, PathSummary s2
  |
    storeStep(pred, storeBase, storeProp, cfg, s1) and
    reachableFromStoreBase(storeProp, loadProp, storeBase, loadBase, cfg, s2, _) and
    oldSummary = s1.appendValuePreserving(s2) and
    loadStep(loadBase, succ, loadProp, cfg, newSummary)
  )
}

/**
 * Holds if `arg` and `cb` are passed as arguments to a function which in turn
 * invokes `cb`, passing `arg` as its `i`th argument.
 *
 * All of this is done under configuration `cfg`, and `arg` flows along a path
 * summarized by `summary`, while `cb` is only tracked locally.
 */
private predicate summarizedHigherOrderCall(
  DataFlow::Node arg, DataFlow::Node cb, int i, DataFlow::Configuration cfg, PathSummary summary
) {
  exists(
    Function f, DataFlow::InvokeNode inner, int j, DataFlow::Node innerArg,
    DataFlow::SourceNode cbParm, PathSummary oldSummary
  |
    // Captured flow does not need to be summarized - it is handled by the local case in `higherOrderCall`.
    not arg = DataFlow::capturedVariableNode(_)
  |
    // direct higher-order call
    summarizedHigherOrderCallAux(f, arg, innerArg, cfg, oldSummary, cbParm, inner, j, cb) and
    inner = cbParm.getAnInvocation() and
    i = j and
    summary = oldSummary
    or
    // indirect higher-order call
    summarizedHigherOrderCallAux(f, arg, innerArg, cfg, oldSummary, cbParm, inner, j, cb) and
    exists(DataFlow::Node cbArg, PathSummary newSummary |
      cbParm.flowsTo(cbArg) and
      summarizedHigherOrderCall(innerArg, cbArg, i, cfg, newSummary) and
      summary = oldSummary.append(PathSummary::call()).append(newSummary)
    )
  )
}

/**
 * @see `summarizedHigherOrderCall`.
 */
pragma[noinline]
private predicate summarizedHigherOrderCallAux(
  Function f, DataFlow::Node arg, DataFlow::Node innerArg, DataFlow::Configuration cfg,
  PathSummary oldSummary, DataFlow::SourceNode cbParm, DataFlow::InvokeNode inner, int j,
  DataFlow::Node cb
) {
  exists(DataFlow::Node outer1, DataFlow::Node outer2 |
    reachableFromInput(f, outer1, arg, innerArg, cfg, oldSummary) and
    outer1 = pragma[only_bind_into](outer2) and
    // Only track actual parameter flow.
    argumentPassing(outer2, cb, f, cbParm) and
    innerArg = inner.getArgument(j)
  )
}

/**
 * Holds if `arg` is passed as the `i`th argument to `callback` through a callback invocation.
 *
 * This can be a summarized call, that is, `arg` and `callback` flow into a call,
 * `f(arg, callback)`, which performs the invocation.
 *
 * Alternatively, the callback can flow into a call `f(callback)` which itself provides the `arg`.
 * That is, `arg` refers to a value defined in `f` or one of its callees.
 *
 * In the latter case, the summary will consists of both a `return` and `call` step, for the following reasons:
 *
 * - Having `return` in the summary ensures that arguments passsed to `f` can't propagate back out along this edge.
 *   This is, `arg` should be defined in `f` or one of its callees, since a context-dependent value (i.e. parameter)
 *   should not propagate to every callback passed to `f`.
 *   In reality, `arg` may refer to a parameter, but in that case, the `return` summary prevents the edge from ever
 *   being used.
 *
 * - Having `call` in the summary ensures that values we propagate into the callback definition along this edge
 *   can't propagate out to other callers of that function through a return statement.
 *
 * - The flow label mapping of the summary corresponds to the transformation from `arg` to the
 *   invocation of the callback.
 */
pragma[nomagic]
private predicate higherOrderCall(
  DataFlow::Node arg, DataFlow::SourceNode callback, int i, DataFlow::Configuration cfg,
  PathSummary summary
) {
  // Summarized call
  exists(DataFlow::Node cb |
    summarizedHigherOrderCall(arg, cb, i, cfg, summary) and
    callback.flowsTo(cb)
  )
  or
  // Local invocation of a parameter
  isRelevant(arg, cfg) and
  exists(DataFlow::InvokeNode invoke |
    arg = invoke.getArgument(i) and
    invoke = callback.(DataFlow::ParameterNode).getACall() and
    summary = PathSummary::call()
  )
  or
  // Forwarding of the callback parameter (but not the argument).
  exists(DataFlow::Node cbArg, DataFlow::SourceNode innerCb, PathSummary oldSummary |
    higherOrderCall(arg, innerCb, i, cfg, oldSummary) and
    callStep(cbArg, innerCb) and
    callback.flowsTo(cbArg) and
    // Prepend a 'return' summary to prevent context-dependent values (i.e. parameters) from using this edge.
    summary = PathSummary::return().append(oldSummary)
  )
}

/**
 * Holds if `pred` is passed as an argument to a function `f` which also takes a
 * callback parameter `cb` and then invokes `cb`, passing `pred` into parameter `succ`
 * of `cb`.
 *
 * All of this is done under configuration `cfg`, and `arg` flows along a path
 * summarized by `summary`, while `cb` is only tracked locally.
 */
private predicate flowIntoHigherOrderCall(
  DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg, PathSummary summary
) {
  exists(DataFlow::FunctionNode cb, int i, PathSummary oldSummary |
    higherOrderCall(pred, cb, i, cfg, oldSummary) and
    succ = cb.getParameter(i) and
    summary = oldSummary.append(PathSummary::call())
  )
  or
  exists(
    DataFlow::SourceNode cb, DataFlow::FunctionNode f, int i, int boundArgs, PathSummary oldSummary
  |
    higherOrderCall(pred, cb, i, cfg, oldSummary) and
    cb = CallGraph::getABoundFunctionReference(f, boundArgs, false) and
    succ = f.getParameter(boundArgs + i) and
    summary = oldSummary.append(PathSummary::call())
  )
}

/**
 * Holds if there is a flow step from `pred` to `succ` described by `summary`
 * under configuration `cfg`.
 */
private predicate flowStep(
  DataFlow::Node pred, DataFlow::Configuration cfg, DataFlow::Node succ, PathSummary summary
) {
  (
    basicFlowStepNoBarrier(pred, succ, summary, cfg) and
    isRelevant(pred, cfg)
    or
    // Flow through a function that returns a value that depends on one of its arguments
    // or a captured variable
    flowThroughCall(pred, succ, cfg, summary)
    or
    // Flow through a property write/read pair
    flowThroughProperty(pred, succ, cfg, summary)
    or
    // Flow into higher-order call
    flowIntoHigherOrderCall(pred, succ, cfg, summary)
  ) and
  not cfg.isBarrier(succ) and
  not isBarrierEdge(cfg, pred, succ) and
  not isLabeledBarrierEdge(cfg, pred, succ, summary.getEndLabel()) and
  not cfg.isLabeledBarrier(succ, summary.getEndLabel())
}

/**
 * Holds if `source` can flow to `sink` under configuration `cfg`
 * in zero or more steps.
 */
pragma[nomagic]
private predicate flowsTo(
  PathNode flowsource, DataFlow::Node source, SinkPathNode flowsink, DataFlow::Node sink,
  DataFlow::Configuration cfg
) {
  flowsource.wraps(source, cfg) and
  flowsink = flowsource.getASuccessor*() and
  flowsink.wraps(sink, id(cfg))
}

/**
 * Holds if `nd` is reachable from a source under `cfg` along a path summarized by
 * `summary`.
 */
pragma[nomagic]
private predicate reachableFromSource(
  DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary
) {
  exists(FlowLabel lbl |
    isSource(nd, cfg, lbl) and
    not cfg.isBarrier(nd) and
    not cfg.isLabeledBarrier(nd, lbl) and
    summary = PathSummary::level(lbl)
  )
  or
  exists(DataFlow::Node pred, PathSummary oldSummary, PathSummary newSummary |
    reachableFromSource(pred, cfg, oldSummary) and
    flowStep(pred, cfg, nd, newSummary) and
    summary = oldSummary.append(newSummary)
  )
}

/**
 * Holds if `nd` can be reached from a source under `cfg`, and in turn a sink is
 * reachable from `nd`, where the path from the source to `nd` is summarized by `summary`.
 */
private predicate onPath(DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary) {
  reachableFromSource(nd, cfg, summary) and
  isSink(nd, cfg, summary.getEndLabel()) and
  not cfg.isBarrier(nd) and
  not cfg.isLabeledBarrier(nd, summary.getEndLabel())
  or
  exists(DataFlow::Node mid, PathSummary stepSummary |
    onPathStep(nd, cfg, summary, stepSummary, mid) and
    onPath(mid, id(cfg), summary.append(stepSummary))
  )
}

/**
 * Holds if `nd` can be reached from a source under `cfg`,
 * and there is a flowStep from `nd` (with summary `summary`) to `mid` (with summary `stepSummary`).
 *
 * This predicate has been outlined from `onPath` to give the optimizer a hint about join-ordering.
 */
pragma[noinline]
private predicate onPathStep(
  DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary, PathSummary stepSummary,
  DataFlow::Node mid
) {
  reachableFromSource(nd, cfg, summary) and
  flowStep(nd, id(cfg), mid, stepSummary)
}

/**
 * Holds if there is a configuration that has at least one source and at least one sink.
 */
pragma[noinline]
private predicate isLive() {
  exists(DataFlow::Configuration cfg | isSource(_, cfg, _) and isSink(_, cfg, _))
}

/**
 * A data flow node on an inter-procedural path from a source.
 */
private newtype TPathNode =
  MkSourceNode(DataFlow::Node nd, DataFlow::Configuration cfg) { isSourceNode(nd, cfg, _) } or
  MkMidNode(DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary) {
    isLive() and
    onPath(nd, cfg, summary)
  } or
  MkSinkNode(DataFlow::Node nd, DataFlow::Configuration cfg) { isSinkNode(nd, cfg, _) }

/**
 * Holds if `nd` is a source node for configuration `cfg`, and there is a path from `nd` to a sink
 * with the given `summary`.
 */
private predicate isSourceNode(DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary) {
  exists(FlowLabel lbl | summary = PathSummary::level(lbl) |
    isSource(nd, cfg, lbl) and
    isLive() and
    onPath(nd, cfg, summary)
  )
}

/**
 * Holds if `nd` is a sink node for configuration `cfg`, and there is a path from a source to `nd`
 * with the given `summary`.
 */
private predicate isSinkNode(DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary) {
  isSink(nd, cfg, summary.getEndLabel()) and
  isLive() and
  onPath(nd, cfg, summary)
}

/**
 * Maps `cfg` to itself.
 *
 * This is an auxiliary predicate that is needed in some places to prevent us
 * from computing a cross-product of all path nodes belonging to the same configuration.
 */
bindingset[cfg, result]
private DataFlow::Configuration id(DataFlow::Configuration cfg) { result >= cfg and cfg >= result }

/**
 * A data-flow node on an inter-procedural path from a source to a sink.
 *
 * A path node wraps a data-flow node `nd` and a data-flow configuration `cfg` such that `nd` is
 * on a path from a source to a sink under `cfg`.
 *
 * There are three kinds of path nodes:
 *
 *  - source nodes: wrapping a source node and a configuration such that there is a path from that
 *    source to some sink under the configuration;
 *  - sink nodes: wrapping a sink node and a configuration such that there is a path from some source
 *    to that sink under the configuration;
 *  - mid nodes: wrapping a node, a configuration and a path summary such that there is a path from
 *    some source to the node with the given summary that can be extended to a path to some sink node,
 *    all under the configuration.
 */
class PathNode extends TPathNode {
  DataFlow::Node nd;
  Configuration cfg;

  PathNode() {
    this = MkSourceNode(nd, cfg) or
    this = MkMidNode(nd, cfg, _) or
    this = MkSinkNode(nd, cfg)
  }

  /** Holds if this path node wraps data-flow node `n` and configuration `c`. */
  predicate wraps(DataFlow::Node n, DataFlow::Configuration c) { nd = n and cfg = c }

  /** Gets the underlying configuration of this path node. */
  DataFlow::Configuration getConfiguration() { result = cfg }

  /** Gets the underlying data-flow node of this path node. */
  DataFlow::Node getNode() { result = nd }

  /** Gets a successor node of this path node. */
  final PathNode getASuccessor() { result = getASuccessor(this) }

  /** Gets a textual representation of this path node. */
  string toString() { result = nd.toString() }

  /**
   * Holds if this path node is at the specified location.
   * The location spans column `startcolumn` of line `startline` to
   * column `endcolumn` of line `endline` in file `filepath`.
   * For more information, see
   * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
   */
  predicate hasLocationInfo(
    string filepath, int startline, int startcolumn, int endline, int endcolumn
  ) {
    nd.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
  }

  /**
   * Gets a summary for the path node.
   */
  PathSummary getPathSummary() {
    this = MkMidNode(_, _, result)
    or
    this = MkSinkNode(_, _) and getASuccessor(MkMidNode(_, _, result)) = this
    or
    this = MkSourceNode(_, _) and getASuccessor(this) = MkMidNode(_, _, result)
  }

  /**
   * Gets a flow label for the path node.
   */
  FlowLabel getFlowLabel() { result = getPathSummary().getEndLabel() }
}

/** Gets the mid node corresponding to `src`. */
private MidPathNode initialMidNode(SourcePathNode src) {
  exists(DataFlow::Node nd, Configuration cfg, PathSummary summary |
    result.wraps(nd, cfg, summary) and
    src = MkSourceNode(nd, cfg) and
    isSourceNode(nd, cfg, summary)
  )
}

/** Gets the mid node corresponding to `snk`. */
private MidPathNode finalMidNode(SinkPathNode snk) {
  exists(DataFlow::Node nd, Configuration cfg, PathSummary summary |
    result.wraps(nd, cfg, summary) and
    snk = MkSinkNode(nd, cfg) and
    isSinkNode(nd, cfg, summary)
  )
}

/**
 * Holds if `nd` is a mid node wrapping `(predNd, cfg, summary)`, and there is a flow step
 * from `predNd` to `succNd` under `cfg` with summary `newSummary`.
 *
 * This helper predicate exists to clarify the intended join order in `getASuccessor` below.
 */
pragma[noinline]
private predicate midNodeStep(
  PathNode nd, DataFlow::Node predNd, Configuration cfg, PathSummary summary, DataFlow::Node succNd,
  PathSummary newSummary
) {
  nd = MkMidNode(predNd, cfg, summary) and
  flowStep(predNd, id(cfg), succNd, newSummary)
}

/**
 * Gets a node to which data from `nd` may flow in one step.
 */
private PathNode getASuccessor(PathNode nd) {
  // source node to mid node
  result = initialMidNode(nd)
  or
  // mid node to mid node
  exists(
    Configuration cfg, DataFlow::Node predNd, PathSummary summary, DataFlow::Node succNd,
    PathSummary newSummary
  |
    midNodeStep(nd, predNd, cfg, summary, succNd, newSummary) and
    result = MkMidNode(succNd, id(cfg), summary.append(newSummary))
  )
  or
  // mid node to sink node
  nd = finalMidNode(result)
}

private PathNode getASuccessorIfHidden(PathNode nd) {
  nd.(MidPathNode).isHidden() and
  result = getASuccessor(nd)
}

/**
 * A path node corresponding to an intermediate node on a path from a source to a sink.
 *
 * A mid node is a triple `(nd, cfg, summary)` where `nd` is a data-flow node and `cfg`
 * is a configuration such that `nd` is on a path from a source to a sink under `cfg`
 * summarized by `summary`.
 */
class MidPathNode extends PathNode, MkMidNode {
  PathSummary summary;

  MidPathNode() { this = MkMidNode(nd, cfg, summary) }

  /** Holds if this path node wraps data-flow node `n`, configuration `c` and summary `s`. */
  predicate wraps(DataFlow::Node n, DataFlow::Configuration c, PathSummary s) {
    nd = n and cfg = c and summary = s
  }

  /**
   * Holds if this node is hidden from paths in path explanation queries, except
   * in cases where it is the source or sink.
   */
  predicate isHidden() {
    // Skip phi, refinement, and capture nodes
    nd.(DataFlow::SsaDefinitionNode).getSsaVariable().getDefinition() instanceof
      SsaImplicitDefinition
    or
    // Skip SSA definition of parameter as its location coincides with the parameter node
    nd = DataFlow::ssaDefinitionNode(SSA::definition(any(SimpleParameter p)))
    or
    // Skip to the top of big left-leaning string concatenation trees.
    nd = any(AddExpr add).flow() and
    nd = any(AddExpr add).getAnOperand().flow()
    or
    // Skip the exceptional return on functions, as this highlights the entire function.
    nd = any(DataFlow::FunctionNode f).getExceptionalReturn()
    or
    // Skip the special return node for functions, as this highlights the entire function (and the returned expr is the previous node).
    nd = any(DataFlow::FunctionNode f).getReturnNode()
    or
    // Skip the synthetic 'this' node, as a ThisExpr will be the next node anyway
    nd = DataFlow::thisNode(_)
    or
    // Skip captured variable nodes as the successor will be a use of that variable anyway.
    nd = DataFlow::capturedVariableNode(_)
  }
}

/**
 * A path node corresponding to a flow source.
 */
class SourcePathNode extends PathNode, MkSourceNode {
  SourcePathNode() { this = MkSourceNode(nd, cfg) }
}

/**
 * A path node corresponding to a flow sink.
 */
class SinkPathNode extends PathNode, MkSinkNode {
  SinkPathNode() { this = MkSinkNode(nd, cfg) }
}

/**
 * Provides the query predicates needed to include a graph in a path-problem query.
 */
module PathGraph {
  /** Holds if `nd` is a node in the graph of data flow path explanations. */
  query predicate nodes(PathNode nd) { not nd.(MidPathNode).isHidden() }

  /**
   * Gets a node to which data from `nd` may flow in one step, skipping over hidden nodes.
   */
  private PathNode succ0(PathNode nd) {
    result = getASuccessorIfHidden*(nd.getASuccessor()) and
    // skip hidden nodes
    nodes(nd) and
    nodes(result)
  }

  /**
   * Gets a node to which data from `nd` may flow in one step, where outgoing edges from intermediate
   * nodes are merged with any incoming edge from a corresponding source node.
   *
   * For example, assume that `src` is a source node for `nd1`, which has `nd2` as its direct
   * successor. Then `succ0` will yield two edges `src` &rarr; `nd1` and `nd1` &rarr; `nd2`,
   * to which `succ1` will add the edge `src` &rarr; `nd2`.
   */
  private PathNode succ1(PathNode nd) {
    result = succ0(nd)
    or
    result = succ0(initialMidNode(nd))
  }

  /**
   * Gets a node to which data from `nd` may flow in one step, where incoming edges into intermediate
   * nodes are merged with any outgoing edge to a corresponding sink node.
   *
   * For example, assume that `snk` is a source node for `nd2`, which has `nd1` as its direct
   * predecessor. Then `succ1` will yield two edges `nd1` &rarr; `nd2` and `nd2` &rarr; `snk`,
   * while `succ2` will yield just one edge `nd1` &rarr; `snk`.
   */
  private PathNode succ2(PathNode nd) {
    result = succ1(nd)
    or
    succ1(nd) = finalMidNode(result)
  }

  /** Holds if `pred` &rarr; `succ` is an edge in the graph of data flow path explanations. */
  query predicate edges(PathNode pred, PathNode succ) {
    succ = succ2(pred) and
    // skip over uninteresting edges
    not succ = initialMidNode(pred) and
    not pred = finalMidNode(succ)
  }
}

/**
 * Gets a logical `and` expression, or parenthesized expression, that contains `guard`.
 */
private Expr getALogicalAndParent(BarrierGuardNode guard) {
  barrierGuardIsRelevant(guard) and result = guard.asExpr()
  or
  result.(LogAndExpr).getAnOperand() = getALogicalAndParent(guard)
  or
  result.getUnderlyingValue() = getALogicalAndParent(guard)
}

/**
 * Gets a logical `or` expression, or parenthesized expression, that contains `guard`.
 */
private Expr getALogicalOrParent(BarrierGuardNode guard) {
  barrierGuardIsRelevant(guard) and result = guard.asExpr()
  or
  result.(LogOrExpr).getAnOperand() = getALogicalOrParent(guard)
  or
  result.getUnderlyingValue() = getALogicalOrParent(guard)
}

/**
 * A `BarrierGuardNode` that controls which data flow
 * configurations it is used in.
 *
 * Note: For performance reasons, all subclasses of this class should be part
 * of the standard library. Override `Configuration::isBarrierGuard`
 * for analysis-specific barrier guards.
 */
abstract class AdditionalBarrierGuardNode extends BarrierGuardNode {
  abstract predicate appliesTo(Configuration cfg);
}

/**
 * A function that returns the result of a barrier guard.
 */
private class BarrierGuardFunction extends Function {
  DataFlow::ParameterNode sanitizedParameter;
  BarrierGuardNode guard;
  boolean guardOutcome;
  string label;
  int paramIndex;

  BarrierGuardFunction() {
    barrierGuardIsRelevant(guard) and
    exists(Expr e |
      exists(Expr returnExpr |
        returnExpr = guard.asExpr()
        or
        // ad hoc support for conjunctions:
        getALogicalAndParent(guard) = returnExpr and guardOutcome = true
        or
        // ad hoc support for disjunctions:
        getALogicalOrParent(guard) = returnExpr and guardOutcome = false
      |
        exists(SsaExplicitDefinition ssa |
          ssa.getDef().getSource() = returnExpr and
          ssa.getVariable().getAUse() = getAReturnedExpr()
        )
        or
        returnExpr = getAReturnedExpr()
      ) and
      sanitizedParameter.flowsToExpr(e) and
      barrierGuardBlocksExpr(guard, guardOutcome, e, label)
    ) and
    sanitizedParameter.getParameter() = getParameter(paramIndex)
  }

  /**
   * Holds if this function sanitizes argument `e` of call `call`, provided the call evaluates to `outcome`.
   */
  predicate isBarrierCall(DataFlow::CallNode call, Expr e, boolean outcome, string lbl) {
    exists(DataFlow::Node arg |
      argumentPassing(pragma[only_bind_into](call), pragma[only_bind_into](arg),
        pragma[only_bind_into](this), pragma[only_bind_into](sanitizedParameter)) and
      arg.asExpr() = e and
      arg = call.getArgument(paramIndex) and
      outcome = guardOutcome and
      lbl = label
    )
  }

  /**
   * Holds if this function applies to the flow in `cfg`.
   */
  predicate appliesTo(Configuration cfg) { cfg.isBarrierGuard(guard) }
}

/**
 * A call that sanitizes an argument.
 */
private class AdditionalBarrierGuardCall extends AdditionalBarrierGuardNode, DataFlow::CallNode {
  BarrierGuardFunction f;

  AdditionalBarrierGuardCall() { f.isBarrierCall(this, _, _, _) }

  override predicate blocks(boolean outcome, Expr e) { f.isBarrierCall(this, e, outcome, "") }

  predicate internalBlocksLabel(boolean outcome, Expr e, DataFlow::FlowLabel label) {
    f.isBarrierCall(this, e, outcome, label)
  }

  override predicate appliesTo(Configuration cfg) { f.appliesTo(cfg) }
}

/**
 * A guard node for a variable in a negative condition, such as `x` in `if(!x)`.
 * Can be added to a `isBarrier` in a data-flow configuration to block flow through such checks.
 */
class VarAccessBarrier extends DataFlow::Node {
  VarAccessBarrier() {
    exists(ConditionGuardNode guard, SsaRefinementNode refinement |
      this = DataFlow::ssaDefinitionNode(refinement) and
      refinement.getGuard() = guard and
      guard.getTest() instanceof VarAccess and
      guard.getOutcome() = false
    )
  }
}

/**
 *  Holds if there is a path without unmatched return steps from `source` to `sink`.
 */
predicate hasPathWithoutUnmatchedReturn(SourcePathNode source, SinkPathNode sink) {
  exists(MidPathNode mid |
    source.getASuccessor*() = mid and
    sink = mid.getASuccessor() and
    mid.getPathSummary().hasReturn() = false
  )
}