Sync dataflow libraries again

2026-01-29 22:32:58 +01:00 · 2021-12-07 07:00:06 -05:00
parent 1a299d2e09
commit 01bfbde9ae
7 changed files with 1333 additions and 549 deletions
--- a/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll
+++ b/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll
--- a/ql/lib/semmle/go/dataflow/internal/DataFlowImpl2.qll
+++ b/ql/lib/semmle/go/dataflow/internal/DataFlowImpl2.qll
--- a/ql/lib/semmle/go/dataflow/internal/DataFlowImplCommon.qll
+++ b/ql/lib/semmle/go/dataflow/internal/DataFlowImplCommon.qll
@@ -2,6 +2,42 @@ private import DataFlowImplSpecific::Private
 private import DataFlowImplSpecific::Public
 import Cached

+module DataFlowImplCommonPublic {
+  private newtype TFlowFeature =
+    TFeatureHasSourceCallContext() or
+    TFeatureHasSinkCallContext() or
+    TFeatureEqualSourceSinkCallContext()
+
+  /** A flow configuration feature for use in `Configuration::getAFeature()`. */
+  class FlowFeature extends TFlowFeature {
+    string toString() { none() }
+  }
+
+  /**
+   * A flow configuration feature that implies that sources have some existing
+   * call context.
+   */
+  class FeatureHasSourceCallContext extends FlowFeature, TFeatureHasSourceCallContext {
+    override string toString() { result = "FeatureHasSourceCallContext" }
+  }
+
+  /**
+   * A flow configuration feature that implies that sinks have some existing
+   * call context.
+   */
+  class FeatureHasSinkCallContext extends FlowFeature, TFeatureHasSinkCallContext {
+    override string toString() { result = "FeatureHasSinkCallContext" }
+  }
+
+  /**
+   * A flow configuration feature that implies that source-sink pairs have some
+   * shared existing call context.
+   */
+  class FeatureEqualSourceSinkCallContext extends FlowFeature, TFeatureEqualSourceSinkCallContext {
+    override string toString() { result = "FeatureEqualSourceSinkCallContext" }
+  }
+}
+
 /**
 * The cost limits for the `AccessPathFront` to `AccessPathApprox` expansion.
 *
@@ -251,7 +287,7 @@ private module Cached {
  predicate forceCachingInSameStage() { any() }

  cached
-  predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() }
+  predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = nodeGetEnclosingCallable(n) }

  cached
  predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) {
@@ -316,9 +352,7 @@ private module Cached {
  }

  cached
-  predicate parameterNode(Node n, DataFlowCallable c, int i) {
-    n.(ParameterNode).isParameterOf(c, i)
-  }
+  predicate parameterNode(Node p, DataFlowCallable c, int pos) { isParameterNode(p, c, pos) }

  cached
  predicate argumentNode(Node n, DataFlowCall call, int pos) {
@@ -801,6 +835,9 @@ private module Cached {
    exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
  }

+  cached
+  predicate allowParameterReturnInSelfCached(ParamNode p) { allowParameterReturnInSelf(p) }
+
  cached
  newtype TCallContext =
    TAnyCallContext() or
@@ -1236,6 +1273,13 @@ class TypedContent extends MkTypedContent {

  /** Gets a textual representation of this content. */
  string toString() { result = c.toString() }
+
+  /**
+   * Holds if access paths with this `TypedContent` at their head always should
+   * be tracked at high precision. This disables adaptive access path precision
+   * for such access paths.
+   */
+  predicate forceHighPrecision() { forceHighPrecision(c) }
 }

 /**
--- a/ql/lib/semmle/go/dataflow/internal/DataFlowImplConsistency.qll
+++ b/ql/lib/semmle/go/dataflow/internal/DataFlowImplConsistency.qll
@@ -0,0 +1,196 @@
+/**
+ * Provides consistency queries for checking invariants in the language-specific
+ * data-flow classes and predicates.
+ */
+
+private import DataFlowImplSpecific::Private
+private import DataFlowImplSpecific::Public
+private import tainttracking1.TaintTrackingParameter::Private
+private import tainttracking1.TaintTrackingParameter::Public
+
+module Consistency {
+  private newtype TConsistencyConfiguration = MkConsistencyConfiguration()
+
+  /** A class for configuring the consistency queries. */
+  class ConsistencyConfiguration extends TConsistencyConfiguration {
+    string toString() { none() }
+
+    /** Holds if `n` should be excluded from the consistency test `postWithInFlow`. */
+    predicate postWithInFlowExclude(Node n) { none() }
+
+    /** Holds if `n` should be excluded from the consistency test `argHasPostUpdate`. */
+    predicate argHasPostUpdateExclude(ArgumentNode n) { none() }
+  }
+
+  private class RelevantNode extends Node {
+    RelevantNode() {
+      this instanceof ArgumentNode or
+      this instanceof ParameterNode or
+      this instanceof ReturnNode or
+      this = getAnOutNode(_, _) or
+      simpleLocalFlowStep(this, _) or
+      simpleLocalFlowStep(_, this) or
+      jumpStep(this, _) or
+      jumpStep(_, this) or
+      storeStep(this, _, _) or
+      storeStep(_, _, this) or
+      readStep(this, _, _) or
+      readStep(_, _, this) or
+      defaultAdditionalTaintStep(this, _) or
+      defaultAdditionalTaintStep(_, this)
+    }
+  }
+
+  query predicate uniqueEnclosingCallable(Node n, string msg) {
+    exists(int c |
+      n instanceof RelevantNode and
+      c = count(nodeGetEnclosingCallable(n)) and
+      c != 1 and
+      msg = "Node should have one enclosing callable but has " + c + "."
+    )
+  }
+
+  query predicate uniqueType(Node n, string msg) {
+    exists(int c |
+      n instanceof RelevantNode and
+      c = count(getNodeType(n)) and
+      c != 1 and
+      msg = "Node should have one type but has " + c + "."
+    )
+  }
+
+  query predicate uniqueNodeLocation(Node n, string msg) {
+    exists(int c |
+      c =
+        count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
+          n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+        ) and
+      c != 1 and
+      msg = "Node should have one location but has " + c + "."
+    )
+  }
+
+  query predicate missingLocation(string msg) {
+    exists(int c |
+      c =
+        strictcount(Node n |
+          not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
+            n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+          )
+        ) and
+      msg = "Nodes without location: " + c
+    )
+  }
+
+  query predicate uniqueNodeToString(Node n, string msg) {
+    exists(int c |
+      c = count(n.toString()) and
+      c != 1 and
+      msg = "Node should have one toString but has " + c + "."
+    )
+  }
+
+  query predicate missingToString(string msg) {
+    exists(int c |
+      c = strictcount(Node n | not exists(n.toString())) and
+      msg = "Nodes without toString: " + c
+    )
+  }
+
+  query predicate parameterCallable(ParameterNode p, string msg) {
+    exists(DataFlowCallable c | isParameterNode(p, c, _) and c != nodeGetEnclosingCallable(p)) and
+    msg = "Callable mismatch for parameter."
+  }
+
+  query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
+    simpleLocalFlowStep(n1, n2) and
+    nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
+    msg = "Local flow step does not preserve enclosing callable."
+  }
+
+  private DataFlowType typeRepr() { result = getNodeType(_) }
+
+  query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
+    t = typeRepr() and
+    not compatibleTypes(t, t) and
+    msg = "Type compatibility predicate is not reflexive."
+  }
+
+  query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
+    isUnreachableInCall(n, call) and
+    exists(DataFlowCallable c |
+      c = nodeGetEnclosingCallable(n) and
+      not viableCallable(call) = c
+    ) and
+    msg = "Call context for isUnreachableInCall is inconsistent with call graph."
+  }
+
+  query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
+    (
+      n = getAnOutNode(call, _) and
+      msg = "OutNode and call does not share enclosing callable."
+      or
+      n.(ArgumentNode).argumentOf(call, _) and
+      msg = "ArgumentNode and call does not share enclosing callable."
+    ) and
+    nodeGetEnclosingCallable(n) != call.getEnclosingCallable()
+  }
+
+  // This predicate helps the compiler forget that in some languages
+  // it is impossible for a result of `getPreUpdateNode` to be an
+  // instance of `PostUpdateNode`.
+  private Node getPre(PostUpdateNode n) {
+    result = n.getPreUpdateNode()
+    or
+    none()
+  }
+
+  query predicate postIsNotPre(PostUpdateNode n, string msg) {
+    getPre(n) = n and
+    msg = "PostUpdateNode should not equal its pre-update node."
+  }
+
+  query predicate postHasUniquePre(PostUpdateNode n, string msg) {
+    exists(int c |
+      c = count(n.getPreUpdateNode()) and
+      c != 1 and
+      msg = "PostUpdateNode should have one pre-update node but has " + c + "."
+    )
+  }
+
+  query predicate uniquePostUpdate(Node n, string msg) {
+    1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
+    msg = "Node has multiple PostUpdateNodes."
+  }
+
+  query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
+    nodeGetEnclosingCallable(n) != nodeGetEnclosingCallable(n.getPreUpdateNode()) and
+    msg = "PostUpdateNode does not share callable with its pre-update node."
+  }
+
+  private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }
+
+  query predicate reverseRead(Node n, string msg) {
+    exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
+    msg = "Origin of readStep is missing a PostUpdateNode."
+  }
+
+  query predicate argHasPostUpdate(ArgumentNode n, string msg) {
+    not hasPost(n) and
+    not any(ConsistencyConfiguration c).argHasPostUpdateExclude(n) and
+    msg = "ArgumentNode is missing PostUpdateNode."
+  }
+
+  // This predicate helps the compiler forget that in some languages
+  // it is impossible for a `PostUpdateNode` to be the target of
+  // `simpleLocalFlowStep`.
+  private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
+
+  query predicate postWithInFlow(Node n, string msg) {
+    isPostUpdateNode(n) and
+    not clearsContent(n, _) and
+    simpleLocalFlowStep(_, n) and
+    not any(ConsistencyConfiguration c).postWithInFlowExclude(n) and
+    msg = "PostUpdateNode should not be the target of local flow."
+  }
+}
--- a/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll
+++ b/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll
@@ -85,6 +85,9 @@ module Public {
    /** Holds if this stack contains summary component `c`. */
    predicate contains(SummaryComponent c) { c = this.drop(_).head() }

+    /** Gets the bottom element of this stack. */
+    SummaryComponent bottom() { result = this.drop(this.length() - 1).head() }
+
    /** Gets a textual representation of this stack. */
    string toString() {
      exists(SummaryComponent head, SummaryComponentStack tail |
@@ -124,6 +127,38 @@ module Public {
    SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) }
  }

+  private predicate noComponentSpecificCsv(SummaryComponent sc) {
+    not exists(getComponentSpecificCsv(sc))
+  }
+
+  /** Gets a textual representation of this component used for flow summaries. */
+  private string getComponentCsv(SummaryComponent sc) {
+    result = getComponentSpecificCsv(sc)
+    or
+    noComponentSpecificCsv(sc) and
+    (
+      exists(int i | sc = TParameterSummaryComponent(i) and result = "Parameter[" + i + "]")
+      or
+      exists(int i | sc = TArgumentSummaryComponent(i) and result = "Argument[" + i + "]")
+      or
+      sc = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue"
+    )
+  }
+
+  /** Gets a textual representation of this stack used for flow summaries. */
+  string getComponentStackCsv(SummaryComponentStack stack) {
+    exists(SummaryComponent head, SummaryComponentStack tail |
+      head = stack.head() and
+      tail = stack.tail() and
+      result = getComponentCsv(head) + " of " + getComponentStackCsv(tail)
+    )
+    or
+    exists(SummaryComponent c |
+      stack = TSingletonSummaryComponentStack(c) and
+      result = getComponentCsv(c)
+    )
+  }
+
  /**
   * A class that exists for QL technical reasons only (the IPA type used
   * to represent component stacks needs to be bounded).
@@ -186,10 +221,19 @@ module Private {
    TArgumentSummaryComponent(int i) { parameterPosition(i) } or
    TReturnSummaryComponent(ReturnKind rk)

+  private TSummaryComponent thisParam() {
+    result = TParameterSummaryComponent(instanceParameterPosition())
+  }
+
  newtype TSummaryComponentStack =
    TSingletonSummaryComponentStack(SummaryComponent c) or
    TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) {
      tail.(RequiredSummaryComponentStack).required(head)
+      or
+      tail.(RequiredSummaryComponentStack).required(TParameterSummaryComponent(_)) and
+      head = thisParam()
+      or
+      derivedFluentFlowPush(_, _, _, head, tail, _)
    }

  pragma[nomagic]
@@ -198,20 +242,130 @@ module Private {
    boolean preservesValue
  ) {
    c.propagatesFlow(input, output, preservesValue)
+    or
+    // observe side effects of callbacks on input arguments
+    c.propagatesFlow(output, input, preservesValue) and
+    preservesValue = true and
+    isCallbackParameter(input) and
+    isContentOfArgument(output, _)
+    or
+    // flow from the receiver of a callback into the instance-parameter
+    exists(SummaryComponentStack s, SummaryComponentStack callbackRef |
+      c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _)
+    |
+      callbackRef = s.drop(_) and
+      (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and
+      input = callbackRef.tail() and
+      output = TConsSummaryComponentStack(thisParam(), input) and
+      preservesValue = true
+    )
+    or
+    exists(SummaryComponentStack arg, SummaryComponentStack return |
+      derivedFluentFlow(c, input, arg, return, preservesValue)
+    |
+      arg.length() = 1 and
+      output = return
+      or
+      exists(SummaryComponent head, SummaryComponentStack tail |
+        derivedFluentFlowPush(c, input, arg, head, tail, 0) and
+        output = SummaryComponentStack::push(head, tail)
+      )
+    )
+    or
+    // Chain together summaries where values get passed into callbacks along the way
+    exists(SummaryComponentStack mid, boolean preservesValue1, boolean preservesValue2 |
+      c.propagatesFlow(input, mid, preservesValue1) and
+      c.propagatesFlow(mid, output, preservesValue2) and
+      mid.drop(mid.length() - 2) =
+        SummaryComponentStack::push(TParameterSummaryComponent(_),
+          SummaryComponentStack::singleton(TArgumentSummaryComponent(_))) and
+      preservesValue = preservesValue1.booleanAnd(preservesValue2)
+    )
+  }
+
+  /**
+   * Holds if `c` has a flow summary from `input` to `arg`, where `arg`
+   * writes to (contents of) the `i`th argument, and `c` has a
+   * value-preserving flow summary from the `i`th argument to a return value
+   * (`return`).
+   *
+   * In such a case, we derive flow from `input` to (contents of) the return
+   * value.
+   *
+   * As an example, this simplifies modeling of fluent methods:
+   * for `StringBuilder.append(x)` with a specified value flow from qualifier to
+   * return value and taint flow from argument 0 to the qualifier, then this
+   * allows us to infer taint flow from argument 0 to the return value.
+   */
+  pragma[nomagic]
+  private predicate derivedFluentFlow(
+    SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg,
+    SummaryComponentStack return, boolean preservesValue
+  ) {
+    exists(int i |
+      summary(c, input, arg, preservesValue) and
+      isContentOfArgument(arg, i) and
+      summary(c, SummaryComponentStack::singleton(TArgumentSummaryComponent(i)), return, true) and
+      return.bottom() = TReturnSummaryComponent(_)
+    )
+  }
+
+  pragma[nomagic]
+  private predicate derivedFluentFlowPush(
+    SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg,
+    SummaryComponent head, SummaryComponentStack tail, int i
+  ) {
+    derivedFluentFlow(c, input, arg, tail, _) and
+    head = arg.drop(i).head() and
+    i = arg.length() - 2
+    or
+    exists(SummaryComponent head0, SummaryComponentStack tail0 |
+      derivedFluentFlowPush(c, input, arg, head0, tail0, i + 1) and
+      head = arg.drop(i).head() and
+      tail = SummaryComponentStack::push(head0, tail0)
+    )
+  }
+
+  private predicate isCallbackParameter(SummaryComponentStack s) {
+    s.head() = TParameterSummaryComponent(_) and exists(s.tail())
+  }
+
+  private predicate isContentOfArgument(SummaryComponentStack s, int i) {
+    s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail(), i)
+    or
+    s = TSingletonSummaryComponentStack(TArgumentSummaryComponent(i))
+  }
+
+  private predicate outputState(SummarizedCallable c, SummaryComponentStack s) {
+    summary(c, _, s, _)
+    or
+    exists(SummaryComponentStack out |
+      outputState(c, out) and
+      out.head() = TContentSummaryComponent(_) and
+      s = out.tail()
+    )
+    or
+    // Add the argument node corresponding to the requested post-update node
+    inputState(c, s) and isCallbackParameter(s)
+  }
+
+  private predicate inputState(SummarizedCallable c, SummaryComponentStack s) {
+    summary(c, s, _, _)
+    or
+    exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail())
+    or
+    exists(SummaryComponentStack out |
+      outputState(c, out) and
+      out.head() = TParameterSummaryComponent(_) and
+      s = out.tail()
+    )
  }

  private newtype TSummaryNodeState =
-    TSummaryNodeInputState(SummaryComponentStack s) {
-      exists(SummaryComponentStack input |
-        summary(_, input, _, _) and
-        s = input.drop(_)
-      )
-    } or
-    TSummaryNodeOutputState(SummaryComponentStack s) {
-      exists(SummaryComponentStack output |
-        summary(_, _, output, _) and
-        s = output.drop(_)
-      )
+    TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or
+    TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } or
+    TSummaryNodeClearsContentState(int i, boolean post) {
+      any(SummarizedCallable sc).clearsContent(i, _) and post in [false, true]
    }

  /**
@@ -238,20 +392,14 @@ module Private {
    pragma[nomagic]
    predicate isInputState(SummarizedCallable c, SummaryComponentStack s) {
      this = TSummaryNodeInputState(s) and
-      exists(SummaryComponentStack input |
-        summary(c, input, _, _) and
-        s = input.drop(_)
-      )
+      inputState(c, s)
    }

    /** Holds if this state is a valid output state for `c`. */
    pragma[nomagic]
    predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) {
      this = TSummaryNodeOutputState(s) and
-      exists(SummaryComponentStack output |
-        summary(c, _, output, _) and
-        s = output.drop(_)
-      )
+      outputState(c, s)
    }

    /** Gets a textual representation of this state. */
@@ -265,6 +413,12 @@ module Private {
        this = TSummaryNodeOutputState(s) and
        result = "to write: " + s
      )
+      or
+      exists(int i, boolean post, string postStr |
+        this = TSummaryNodeClearsContentState(i, post) and
+        (if post = true then postStr = " (post)" else postStr = "") and
+        result = "clear: " + i + postStr
+      )
    }
  }

@@ -286,6 +440,11 @@ module Private {
    not parameterReadState(c, state, _)
    or
    state.isOutputState(c, _)
+    or
+    exists(int i |
+      c.clearsContent(i, _) and
+      state = TSummaryNodeClearsContentState(i, _)
+    )
  }

  pragma[noinline]
@@ -321,6 +480,8 @@ module Private {
    parameterReadState(c, _, i)
    or
    isParameterPostUpdate(_, c, i)
+    or
+    c.clearsContent(i, _)
  }

  private predicate callbackOutput(
@@ -331,19 +492,12 @@ module Private {
    receiver = summaryNodeInputState(c, s.drop(1))
  }

-  private Node pre(Node post) {
-    summaryPostUpdateNode(post, result)
-    or
-    not summaryPostUpdateNode(post, _) and
-    result = post
-  }
-
  private predicate callbackInput(
    SummarizedCallable c, SummaryComponentStack s, Node receiver, int i
  ) {
    any(SummaryNodeState state).isOutputState(c, s) and
    s.head() = TParameterSummaryComponent(i) and
-    receiver = pre(summaryNodeOutputState(c, s.drop(1)))
+    receiver = summaryNodeInputState(c, s.drop(1))
  }

  /** Holds if a call targeting `receiver` should be synthesized inside `c`. */
@@ -395,11 +549,17 @@ module Private {
        or
        exists(int i | head = TParameterSummaryComponent(i) |
          result =
-            getCallbackParameterType(getNodeType(summaryNodeOutputState(pragma[only_bind_out](c),
+            getCallbackParameterType(getNodeType(summaryNodeInputState(pragma[only_bind_out](c),
                  s.drop(1))), i)
        )
      )
    )
+    or
+    exists(SummarizedCallable c, int i, ParamNode p |
+      n = summaryNode(c, TSummaryNodeClearsContentState(i, false)) and
+      p.isParameterOf(c, i) and
+      result = getNodeType(p)
+    )
  }

  /** Holds if summary node `out` contains output of kind `rk` from call `c`. */
@@ -421,10 +581,19 @@ module Private {
  }

  /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */
-  predicate summaryPostUpdateNode(Node post, ParamNode pre) {
+  predicate summaryPostUpdateNode(Node post, Node pre) {
    exists(SummarizedCallable c, int i |
      isParameterPostUpdate(post, c, i) and
-      pre.isParameterOf(c, i)
+      pre.(ParamNode).isParameterOf(c, i)
+      or
+      pre = summaryNode(c, TSummaryNodeClearsContentState(i, false)) and
+      post = summaryNode(c, TSummaryNodeClearsContentState(i, true))
+    )
+    or
+    exists(SummarizedCallable callable, SummaryComponentStack s |
+      callbackInput(callable, s, _, _) and
+      pre = summaryNodeOutputState(callable, s) and
+      post = summaryNodeInputState(callable, s)
    )
  }

@@ -436,6 +605,22 @@ module Private {
    )
  }

+  /**
+   * Holds if flow is allowed to pass from parameter `p`, to a return
+   * node, and back out to `p`.
+   */
+  predicate summaryAllowParameterReturnInSelf(ParamNode p) {
+    exists(SummarizedCallable c, int i | p.isParameterOf(c, i) |
+      c.clearsContent(i, _)
+      or
+      exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents |
+        summary(c, inputContents, outputContents, _) and
+        inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(i)) and
+        outputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(i))
+      )
+    )
+  }
+
  /** Provides a compilation of flow summaries to atomic data-flow steps. */
  module Steps {
    /**
@@ -456,13 +641,11 @@ module Private {
        preservesValue = false and not summary(c, inputContents, outputContents, true)
      )
      or
-      // If flow through a method updates a parameter from some input A, and that
-      // parameter also is returned through B, then we'd like a combined flow from A
-      // to B as well. As an example, this simplifies modeling of fluent methods:
-      // for `StringBuilder.append(x)` with a specified value flow from qualifier to
-      // return value and taint flow from argument 0 to the qualifier, then this
-      // allows us to infer taint flow from argument 0 to the return value.
-      summaryPostUpdateNode(pred, succ) and preservesValue = true
+      exists(SummarizedCallable c, int i |
+        pred.(ParamNode).isParameterOf(c, i) and
+        succ = summaryNode(c, TSummaryNodeClearsContentState(i, _)) and
+        preservesValue = true
+      )
    }

    /**
@@ -490,10 +673,39 @@ module Private {
    }

    /**
-     * Holds if values stored inside content `c` are cleared when passed as
-     * input of type `input` in `call`.
+     * Holds if values stored inside content `c` are cleared at `n`. `n` is a
+     * synthesized summary node, so in order for values to be cleared at calls
+     * to the relevant method, it is important that flow does not pass over
+     * the argument, either via use-use flow or def-use flow.
+     *
+     * Example:
+     *
+     * ```
+     * a.b = taint;
+     * a.clearB(); // assume we have a flow summary for `clearB` that clears `b` on the qualifier
+     * sink(a.b);
+     * ```
+     *
+     * In the above, flow should not pass from `a` on the first line (or the second
+     * line) to `a` on the third line. Instead, there will be synthesized flow from
+     * `a` on line 2 to the post-update node for `a` on that line (via an intermediate
+     * node where field `b` is cleared).
     */
-    predicate summaryClearsContent(ArgNode arg, Content c) {
+    predicate summaryClearsContent(Node n, Content c) {
+      exists(SummarizedCallable sc, int i |
+        n = summaryNode(sc, TSummaryNodeClearsContentState(i, true)) and
+        sc.clearsContent(i, c)
+      )
+    }
+
+    /**
+     * Holds if values stored inside content `c` are cleared inside a
+     * callable to which `arg` is an argument.
+     *
+     * In such cases, it is important to prevent use-use flow out of
+     * `arg` (see comment for `summaryClearsContent`).
+     */
+    predicate summaryClearsContentArg(ArgNode arg, Content c) {
      exists(DataFlowCall call, int i |
        viableCallable(call).(SummarizedCallable).clearsContent(i, c) and
        arg.argumentOf(call, i)
@@ -553,25 +765,6 @@ module Private {
        ret.getKind() = rk
      )
    }
-
-    /**
-     * Holds if data is written into content `c` of argument `arg` using a flow summary.
-     *
-     * Depending on the type of `c`, this predicate may be relevant to include in the
-     * definition of `clearsContent()`.
-     */
-    predicate summaryStoresIntoArg(Content c, Node arg) {
-      exists(ParamUpdateReturnKind rk, ReturnNodeExt ret, PostUpdateNode out |
-        exists(DataFlowCall call, SummarizedCallable callable |
-          getNodeEnclosingCallable(ret) = callable and
-          viableCallable(call) = callable and
-          summaryStoreStep(_, c, ret) and
-          ret.getKind() = pragma[only_bind_into](rk) and
-          out = rk.getAnOutNode(call) and
-          arg = out.getPreUpdateNode()
-        )
-      )
-    }
  }

  /**
@@ -629,22 +822,6 @@ module Private {
      )
    }

-    /** Holds if specification component `c` parses as return value `n`. */
-    predicate parseReturn(string c, int n) {
-      specSplit(_, c, _) and
-      (
-        c = "ReturnValue" and n = 0
-        or
-        c.regexpCapture("ReturnValue\\[([-0-9]+)\\]", 1).toInt() = n
-        or
-        exists(int n1, int n2 |
-          c.regexpCapture("ReturnValue\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = n1 and
-          c.regexpCapture("ReturnValue\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = n2 and
-          n = [n1 .. n2]
-        )
-      )
-    }
-
    private SummaryComponent interpretComponent(string c) {
      specSplit(_, c, _) and
      (
@@ -652,9 +829,7 @@ module Private {
        or
        exists(int pos | parseParam(c, pos) and result = SummaryComponent::parameter(pos))
        or
-        exists(int pos |
-          parseReturn(c, pos) and result = SummaryComponent::return(getReturnKind(pos))
-        )
+        c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
        or
        result = interpretComponentSpecific(c)
      )
@@ -721,12 +896,15 @@ module Private {
      not exists(interpretComponent(c))
    }

-    private predicate inputNeedsReference(string c) { parseArg(c, _) }
+    private predicate inputNeedsReference(string c) {
+      c = "Argument" or
+      parseArg(c, _)
+    }

    private predicate outputNeedsReference(string c) {
+      c = "Argument" or
      parseArg(c, _) or
-      c = "ReturnValue" or
-      parseReturn(c, _)
+      c = "ReturnValue"
    }

    private predicate sourceElementRef(InterpretNode ref, string output, string kind) {
@@ -759,20 +937,15 @@ module Private {
        exists(int pos |
          node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), pos)
        |
-          parseArg(c, pos)
+          c = "Argument" or parseArg(c, pos)
        )
        or
        exists(int pos | node.asNode().(ParamNode).isParameterOf(mid.asCallable(), pos) |
          c = "Parameter" or parseParam(c, pos)
        )
        or
-        exists(int pos |
-          node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnKind(pos)))
-        |
-          c = "ReturnValue" and pos = 0
-          or
-          parseReturn(c, pos)
-        )
+        c = "ReturnValue" and
+        node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind()))
        or
        interpretOutputSpecific(c, mid, node)
      )
@@ -787,16 +960,14 @@ module Private {
        interpretInput(input, idx + 1, ref, mid) and
        specSplit(input, c, idx)
      |
-        exists(int pos | node.asNode().(ArgNode).argumentOf(mid.asCall(), pos) | parseArg(c, pos))
+        exists(int pos | node.asNode().(ArgNode).argumentOf(mid.asCall(), pos) |
+          c = "Argument" or parseArg(c, pos)
+        )
        or
-        exists(int pos, ReturnNodeExt ret |
-          (
-            c = "ReturnValue" and pos = 0
-            or
-            parseReturn(c, pos)
-          ) and
+        exists(ReturnNodeExt ret |
+          c = "ReturnValue" and
          ret = node.asNode() and
-          ret.getKind().(ValueReturnKind).getKind() = getReturnKind(pos) and
+          ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and
          mid.asCallable() = getNodeEnclosingCallable(ret)
        )
        or
@@ -831,19 +1002,130 @@ module Private {
  module TestOutput {
    /** A flow summary to include in the `summary/3` query predicate. */
    abstract class RelevantSummarizedCallable extends SummarizedCallable {
-      /** Gets the string representation of this callable used by `summary/3`. */
-      string getFullString() { result = this.toString() }
+      /** Gets the string representation of this callable used by `summary/1`. */
+      abstract string getCallableCsv();
+
+      /** Holds if flow is propagated between `input` and `output`. */
+      predicate relevantSummary(
+        SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
+      ) {
+        this.propagatesFlow(input, output, preservesValue)
+      }
    }

-    /** A query predicate for outputting flow summaries in QL tests. */
-    query predicate summary(string callable, string flow, boolean preservesValue) {
+    /** Render the kind in the format used in flow summaries. */
+    private string renderKind(boolean preservesValue) {
+      preservesValue = true and result = "value"
+      or
+      preservesValue = false and result = "taint"
+    }
+
+    /**
+     * A query predicate for outputting flow summaries in semi-colon separated format in QL tests.
+     * The syntax is: "namespace;type;overrides;name;signature;ext;inputspec;outputspec;kind",
+     * ext is hardcoded to empty.
+     */
+    query predicate summary(string csv) {
      exists(
-        RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output
+        RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output,
+        boolean preservesValue
      |
-        callable = c.getFullString() and
-        c.propagatesFlow(input, output, preservesValue) and
-        flow = input + " -> " + output
+        c.relevantSummary(input, output, preservesValue) and
+        csv =
+          c.getCallableCsv() + ";;" + getComponentStackCsv(input) + ";" +
+            getComponentStackCsv(output) + ";" + renderKind(preservesValue)
      )
    }
  }
+
+  /**
+   * Provides query predicates for rendering the generated data flow graph for
+   * a summarized callable.
+   *
+   * Import this module into a `.ql` file of `@kind graph` to render the graph.
+   * The graph is restricted to callables from `RelevantSummarizedCallable`.
+   */
+  module RenderSummarizedCallable {
+    /** A summarized callable to include in the graph. */
+    abstract class RelevantSummarizedCallable extends SummarizedCallable { }
+
+    private newtype TNodeOrCall =
+      MkNode(Node n) {
+        exists(RelevantSummarizedCallable c |
+          n = summaryNode(c, _)
+          or
+          n.(ParamNode).isParameterOf(c, _)
+        )
+      } or
+      MkCall(DataFlowCall call) {
+        call = summaryDataFlowCall(_) and
+        call.getEnclosingCallable() instanceof RelevantSummarizedCallable
+      }
+
+    private class NodeOrCall extends TNodeOrCall {
+      Node asNode() { this = MkNode(result) }
+
+      DataFlowCall asCall() { this = MkCall(result) }
+
+      string toString() {
+        result = this.asNode().toString()
+        or
+        result = this.asCall().toString()
+      }
+
+      /**
+       * Holds if this element is at the specified location.
+       * The location spans column `startcolumn` of line `startline` to
+       * column `endcolumn` of line `endline` in file `filepath`.
+       * For more information, see
+       * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+       */
+      predicate hasLocationInfo(
+        string filepath, int startline, int startcolumn, int endline, int endcolumn
+      ) {
+        this.asNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+        or
+        this.asCall().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+      }
+    }
+
+    query predicate nodes(NodeOrCall n, string key, string val) {
+      key = "semmle.label" and val = n.toString()
+    }
+
+    private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) {
+      exists(boolean preservesValue |
+        Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and
+        if preservesValue = true then value = "value" else value = "taint"
+      )
+      or
+      exists(Content c |
+        Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and
+        value = "read (" + c + ")"
+        or
+        Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and
+        value = "store (" + c + ")"
+        or
+        Private::Steps::summaryClearsContent(a.asNode(), c) and
+        b = a and
+        value = "clear (" + c + ")"
+      )
+      or
+      summaryPostUpdateNode(b.asNode(), a.asNode()) and
+      value = "post-update"
+      or
+      b.asCall() = summaryDataFlowCall(a.asNode()) and
+      value = "receiver"
+      or
+      exists(int i |
+        summaryArgumentNode(b.asCall(), a.asNode(), i) and
+        value = "argument (" + i + ")"
+      )
+    }
+
+    query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) {
+      key = "semmle.label" and
+      value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ")
+    }
+  }
 }
--- a/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingImpl.qll
+++ b/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingImpl.qll
@@ -75,25 +75,25 @@ abstract class Configuration extends DataFlow::Configuration {
  predicate isSanitizer(DataFlow::Node node) { none() }

  final override predicate isBarrier(DataFlow::Node node) {
-    isSanitizer(node) or
+    this.isSanitizer(node) or
    defaultTaintSanitizer(node)
  }

  /** Holds if taint propagation into `node` is prohibited. */
  predicate isSanitizerIn(DataFlow::Node node) { none() }

-  final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+  final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }

  /** Holds if taint propagation out of `node` is prohibited. */
  predicate isSanitizerOut(DataFlow::Node node) { none() }

-  final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+  final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }

  /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
  predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }

  final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
-    isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
+    this.isSanitizerGuard(guard)
  }

  /**
@@ -103,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration {
  predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }

  final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
-    isAdditionalTaintStep(node1, node2) or
+    this.isAdditionalTaintStep(node1, node2) or
    defaultAdditionalTaintStep(node1, node2)
  }

--- a/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll
+++ b/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll
@@ -75,25 +75,25 @@ abstract class Configuration extends DataFlow::Configuration {
  predicate isSanitizer(DataFlow::Node node) { none() }

  final override predicate isBarrier(DataFlow::Node node) {
-    isSanitizer(node) or
+    this.isSanitizer(node) or
    defaultTaintSanitizer(node)
  }

  /** Holds if taint propagation into `node` is prohibited. */
  predicate isSanitizerIn(DataFlow::Node node) { none() }

-  final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+  final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }

  /** Holds if taint propagation out of `node` is prohibited. */
  predicate isSanitizerOut(DataFlow::Node node) { none() }

-  final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+  final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }

  /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
  predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }

  final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
-    isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
+    this.isSanitizerGuard(guard)
  }

  /**
@@ -103,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration {
  predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }

  final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
-    isAdditionalTaintStep(node1, node2) or
+    this.isAdditionalTaintStep(node1, node2) or
    defaultAdditionalTaintStep(node1, node2)
  }