Merge branch 'master' into java/spring-3653-2

2026-03-01 05:13:41 +01:00 · 2020-07-08 13:06:51 +02:00
parent 3fef5cabf1 6ef7288848
commit 48e4759632
1579 changed files with 80355 additions and 44092 deletions
--- a/java/ql/src/semmle/code/Location.qll
+++ b/java/ql/src/semmle/code/Location.qll
@@ -90,16 +90,16 @@ class Top extends @top {

 /** A location maps language elements to positions in source files. */
 class Location extends @location {
-  /** Gets the line number where this location starts. */
+  /** Gets the 1-based line number (inclusive) where this location starts. */
  int getStartLine() { locations_default(this, _, result, _, _, _) }

-  /** Gets the column number where this location starts. */
+  /** Gets the 1-based column number (inclusive) where this location starts. */
  int getStartColumn() { locations_default(this, _, _, result, _, _) }

-  /** Gets the line number where this location ends. */
+  /** Gets the 1-based line number (inclusive) where this location ends. */
  int getEndLine() { locations_default(this, _, _, _, result, _) }

-  /** Gets the column number where this location ends. */
+  /** Gets the 1-based column number (inclusive) where this location ends. */
  int getEndColumn() { locations_default(this, _, _, _, _, result) }

  /**
--- a/java/ql/src/semmle/code/java/Expr.qll
+++ b/java/ql/src/semmle/code/java/Expr.qll
@@ -60,6 +60,12 @@ class Expr extends ExprParent, @expr {
  /** Gets the statement containing this expression, if any. */
  Stmt getEnclosingStmt() { statementEnclosingExpr(this, result) }

+  /**
+   * Gets a statement that directly or transitively contains this expression, if any.
+   * This is equivalent to `this.getEnclosingStmt().getEnclosingStmt*()`.
+   */
+  Stmt getAnEnclosingStmt() { result = this.getEnclosingStmt().getEnclosingStmt*() }
+
  /** Gets a child of this expression. */
  Expr getAChildExpr() { exprs(result, _, _, this, _) }

@@ -1237,7 +1243,7 @@ class VariableAssign extends VariableUpdate {
  }

  /**
-   * Gets the source of this assignment, if any.
+   * Gets the source (right-hand side) of this assignment, if any.
   *
   * An initialization in a `CatchClause` or `EnhancedForStmt` is implicit and
   * does not have a source.
--- a/java/ql/src/semmle/code/java/Javadoc.qll
+++ b/java/ql/src/semmle/code/java/Javadoc.qll
@@ -79,7 +79,7 @@ abstract class JavadocElement extends @javadocElement, Top {
  abstract string getText();
 }

-/** A Javadoc tag. */
+/** A Javadoc block tag. This does not include inline tags. */
 class JavadocTag extends JavadocElement, JavadocParent, @javadocTag {
  /** Gets the name of this Javadoc tag. */
  string getTagName() { javadocTag(this, result, _, _) }
--- a/java/ql/src/semmle/code/java/dataflow/internal/ContainerFlow.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/ContainerFlow.qll
@@ -159,6 +159,57 @@ private predicate taintPreservingArgumentToQualifier(Method method, int arg) {
  method.(CollectionMethod).hasName("offer") and arg = 0
 }

+/**
+ * Holds if `method` is a library method that returns tainted data if its
+ * `arg`th argument is tainted.
+ */
+private predicate taintPreservingArgumentToMethod(Method method, int arg) {
+  method.getDeclaringType().hasQualifiedName("java.util", "Collections") and
+  (
+    method
+        .hasName(["checkedCollection", "checkedList", "checkedMap", "checkedNavigableMap",
+              "checkedNavigableSet", "checkedSet", "checkedSortedMap", "checkedSortedSet",
+              "enumeration", "list", "max", "min", "singleton", "singletonList",
+              "synchronizedCollection", "synchronizedList", "synchronizedMap",
+              "synchronizedNavigableMap", "synchronizedNavigableSet", "synchronizedSet",
+              "synchronizedSortedMap", "synchronizedSortedSet", "unmodifiableCollection",
+              "unmodifiableList", "unmodifiableMap", "unmodifiableNavigableMap",
+              "unmodifiableNavigableSet", "unmodifiableSet", "unmodifiableSortedMap",
+              "unmodifiableSortedSet"]) and
+    arg = 0
+    or
+    method.hasName(["nCopies", "singletonMap"]) and arg = 1
+  )
+  or
+  method.getDeclaringType().hasQualifiedName("java.util", "Arrays") and
+  (
+    method.hasName(["copyOf", "copyOfRange", "spliterator", "stream"]) and
+    arg = 0
+  )
+}
+
+/**
+ * Holds if `method` is a library method that writes tainted data to the
+ * `output`th argument if the `input`th argument is tainted.
+ */
+private predicate taintPreservingArgToArg(Method method, int input, int output) {
+  method.getDeclaringType().hasQualifiedName("java.util", "Collections") and
+  (
+    method.hasName(["copy", "fill"]) and
+    input = 1 and
+    output = 0
+    or
+    method.hasName("replaceAll") and input = 2 and output = 0
+  )
+  or
+  method.getDeclaringType().hasQualifiedName("java.util", "Arrays") and
+  (
+    method.hasName("fill") and
+    output = 0 and
+    input = method.getNumberOfParameters() - 1
+  )
+}
+
 private predicate argToQualifierStep(Expr tracked, Expr sink) {
  exists(Method m, int i, MethodAccess ma |
    taintPreservingArgumentToQualifier(m, i) and
@@ -168,13 +219,52 @@ private predicate argToQualifierStep(Expr tracked, Expr sink) {
  )
 }

+/** Access to a method that passes taint from an argument. */
+private predicate argToMethodStep(Expr tracked, MethodAccess sink) {
+  exists(Method m |
+    m = sink.getMethod() and
+    (
+      exists(int i |
+        taintPreservingArgumentToMethod(m, i) and
+        tracked = sink.getArgument(i)
+      )
+      or
+      m.getDeclaringType().hasQualifiedName("java.util", "Arrays") and
+      m.hasName("asList") and
+      tracked = sink.getAnArgument()
+    )
+  )
+}
+
+/**
+ * Holds if `tracked` and `sink` are arguments to a method that transfers taint
+ * between arguments.
+ */
+private predicate argToArgStep(Expr tracked, Expr sink) {
+  exists(MethodAccess ma, Method method, int input, int output |
+    ma.getMethod() = method and
+    ma.getArgument(input) = tracked and
+    ma.getArgument(output) = sink and
+    (
+      taintPreservingArgToArg(method, input, output)
+      or
+      method.getDeclaringType().hasQualifiedName("java.util", "Collections") and
+      method.hasName("addAll") and
+      input >= 1 and
+      output = 0
+    )
+  )
+}
+
 /**
 * Holds if the step from `n1` to `n2` is either extracting a value from a
 * container, inserting a value into a container, or transforming one container
 * to another. This is restricted to cases where `n2` is the returned value of
 * a call.
 */
-predicate containerReturnValueStep(Expr n1, Expr n2) { qualifierToMethodStep(n1, n2) }
+predicate containerReturnValueStep(Expr n1, Expr n2) {
+  qualifierToMethodStep(n1, n2) or argToMethodStep(n1, n2)
+}

 /**
 * Holds if the step from `n1` to `n2` is either extracting a value from a
@@ -183,7 +273,8 @@ predicate containerReturnValueStep(Expr n1, Expr n2) { qualifierToMethodStep(n1,
 */
 predicate containerUpdateStep(Expr n1, Expr n2) {
  qualifierToArgumentStep(n1, n2) or
-  argToQualifierStep(n1, n2)
+  argToQualifierStep(n1, n2) or
+  argToArgStep(n1, n2)
 }

 /**
--- a/java/ql/src/semmle/code/java/dataflow/internal/DataFlowDispatch.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/DataFlowDispatch.qll
@@ -2,14 +2,13 @@ private import java
 private import DataFlowPrivate
 import semmle.code.java.dispatch.VirtualDispatch

-cached
 private module DispatchImpl {
  /**
   * Holds if the set of viable implementations that can be called by `ma`
   * might be improved by knowing the call context. This is the case if the
   * qualifier is the `i`th parameter of the enclosing callable `c`.
   */
-  private predicate benefitsFromCallContext(MethodAccess ma, Callable c, int i) {
+  private predicate mayBenefitFromCallContext(MethodAccess ma, Callable c, int i) {
    exists(Parameter p |
      2 <= strictcount(viableImpl(ma)) and
      ma.getQualifier().(VarAccess).getVariable() = p and
@@ -28,7 +27,7 @@ private module DispatchImpl {
  pragma[nomagic]
  private predicate relevantContext(Call ctx, int i) {
    exists(Callable c |
-      benefitsFromCallContext(_, c, i) and
+      mayBenefitFromCallContext(_, c, i) and
      c = viableCallable(ctx)
    )
  }
@@ -53,14 +52,23 @@ private module DispatchImpl {
    )
  }

+  /**
+   * Holds if the set of viable implementations that can be called by `ma`
+   * might be improved by knowing the call context. This is the case if the
+   * qualifier is a parameter of the enclosing callable `c`.
+   */
+  predicate mayBenefitFromCallContext(MethodAccess ma, Callable c) {
+    mayBenefitFromCallContext(ma, c, _)
+  }
+
  /**
   * Gets a viable dispatch target of `ma` in the context `ctx`. This is
   * restricted to those `ma`s for which a context might make a difference.
   */
-  private Method viableImplInCallContext(MethodAccess ma, Call ctx) {
+  Method viableImplInCallContext(MethodAccess ma, Call ctx) {
    result = viableImpl(ma) and
    exists(int i, Callable c, Method def, RefType t, boolean exact |
-      benefitsFromCallContext(ma, c, i) and
+      mayBenefitFromCallContext(ma, c, i) and
      c = viableCallable(ctx) and
      contextArgHasType(ctx, i, t, exact) and
      ma.getMethod() = def
@@ -136,57 +144,6 @@ private module DispatchImpl {
      )
    )
  }
-
-  /**
-   * Holds if the call context `ctx` reduces the set of viable dispatch
-   * targets of `ma` in `c`.
-   */
-  cached
-  predicate reducedViableImplInCallContext(MethodAccess ma, Callable c, Call ctx) {
-    exists(int tgts, int ctxtgts |
-      benefitsFromCallContext(ma, c, _) and
-      c = viableCallable(ctx) and
-      ctxtgts = count(viableImplInCallContext(ma, ctx)) and
-      tgts = strictcount(viableImpl(ma)) and
-      ctxtgts < tgts
-    )
-  }
-
-  /**
-   * Gets a viable dispatch target of `ma` in the context `ctx`. This is
-   * restricted to those `ma`s for which the context makes a difference.
-   */
-  cached
-  Method prunedViableImplInCallContext(MethodAccess ma, Call ctx) {
-    result = viableImplInCallContext(ma, ctx) and
-    reducedViableImplInCallContext(ma, _, ctx)
-  }
-
-  /**
-   * Holds if flow returning from `m` to `ma` might return further and if
-   * this path restricts the set of call sites that can be returned to.
-   */
-  cached
-  predicate reducedViableImplInReturn(Method m, MethodAccess ma) {
-    exists(int tgts, int ctxtgts |
-      benefitsFromCallContext(ma, _, _) and
-      m = viableImpl(ma) and
-      ctxtgts = count(Call ctx | m = viableImplInCallContext(ma, ctx)) and
-      tgts = strictcount(Call ctx | viableCallable(ctx) = ma.getEnclosingCallable()) and
-      ctxtgts < tgts
-    )
-  }
-
-  /**
-   * Gets a viable dispatch target of `ma` in the context `ctx`. This is
-   * restricted to those `ma`s and results for which the return flow from the
-   * result to `ma` restricts the possible context `ctx`.
-   */
-  cached
-  Method prunedViableImplInCallContextReverse(MethodAccess ma, Call ctx) {
-    result = viableImplInCallContext(ma, ctx) and
-    reducedViableImplInReturn(result, ma)
-  }
 }

 import DispatchImpl
--- a/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl.qll
--- a/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl2.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl2.qll
--- a/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl3.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl3.qll
--- a/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl4.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl4.qll
--- a/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl5.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl5.qll
--- a/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll
@@ -22,7 +22,7 @@ private module Cached {
    exists(int i |
      viableParam(call, i, p) and
      arg.argumentOf(call, i) and
-      compatibleTypes(getErasedNodeTypeBound(arg), getErasedNodeTypeBound(p))
+      compatibleTypes(getNodeType(arg), getNodeType(p))
    )
  }

@@ -147,174 +147,140 @@ private module Cached {
      }
    }

-    private module LocalFlowBigStep {
-      private predicate localFlowEntry(Node n) {
-        Cand::cand(_, n) and
-        (
-          n instanceof ParameterNode or
-          n instanceof OutNode or
-          readStep(_, _, n) or
-          n instanceof CastNode
-        )
-      }
-
-      private predicate localFlowExit(Node n) {
-        Cand::cand(_, n) and
-        (
-          n instanceof ArgumentNode
-          or
-          n instanceof ReturnNode
-          or
-          readStep(n, _, _)
-          or
-          n instanceof CastNode
-          or
-          n =
-            any(PostUpdateNode pun | Cand::parameterValueFlowsToPreUpdateCand(_, pun))
-                .getPreUpdateNode()
-        )
-      }
-
-      pragma[nomagic]
-      private predicate localFlowStepPlus(Node node1, Node node2) {
-        localFlowEntry(node1) and
-        simpleLocalFlowStep(node1, node2) and
-        node1 != node2
-        or
-        exists(Node mid |
-          localFlowStepPlus(node1, mid) and
-          simpleLocalFlowStep(mid, node2) and
-          not mid instanceof CastNode
-        )
-      }
-
-      pragma[nomagic]
-      predicate localFlowBigStep(Node node1, Node node2) {
-        localFlowStepPlus(node1, node2) and
-        localFlowExit(node2)
-      }
-    }
-
    /**
     * The final flow-through calculation:
     *
-     * - Input access paths are abstracted with a `ContentOption` parameter
-     *   that represents the head of the access path. `TContentNone()` means that
-     *   the access path is unrestricted.
+     * - Calculated flow is either value-preserving (`read = TReadStepTypesNone()`)
+     *   or summarized as a single read step with before and after types recorded
+     *   in the `ReadStepTypesOption` parameter.
     * - Types are checked using the `compatibleTypes()` relation.
     */
    private module Final {
      /**
       * Holds if `p` can flow to `node` in the same callable using only
-       * value-preserving steps, not taking call contexts into account.
+       * value-preserving steps and possibly a single read step, not taking
+       * call contexts into account.
       *
-       * `contentIn` describes the content of `p` that can flow to `node`
-       * (if any).
+       * If a read step was taken, then `read` captures the `Content`, the
+       * container type, and the content type.
       */
-      predicate parameterValueFlow(ParameterNode p, Node node, ContentOption contentIn) {
-        parameterValueFlow0(p, node, contentIn) and
+      predicate parameterValueFlow(ParameterNode p, Node node, ReadStepTypesOption read) {
+        parameterValueFlow0(p, node, read) and
        if node instanceof CastingNode
        then
          // normal flow through
-          contentIn = TContentNone() and
-          compatibleTypes(getErasedNodeTypeBound(p), getErasedNodeTypeBound(node))
+          read = TReadStepTypesNone() and
+          compatibleTypes(getNodeType(p), getNodeType(node))
          or
          // getter
-          exists(Content fIn |
-            contentIn.getContent() = fIn and
-            compatibleTypes(fIn.getType(), getErasedNodeTypeBound(node))
-          )
+          compatibleTypes(read.getContentType(), getNodeType(node))
        else any()
      }

      pragma[nomagic]
-      private predicate parameterValueFlow0(ParameterNode p, Node node, ContentOption contentIn) {
+      private predicate parameterValueFlow0(ParameterNode p, Node node, ReadStepTypesOption read) {
        p = node and
        Cand::cand(p, _) and
-        contentIn = TContentNone()
+        read = TReadStepTypesNone()
        or
        // local flow
        exists(Node mid |
-          parameterValueFlow(p, mid, contentIn) and
-          LocalFlowBigStep::localFlowBigStep(mid, node)
+          parameterValueFlow(p, mid, read) and
+          simpleLocalFlowStep(mid, node)
        )
        or
        // read
-        exists(Node mid, Content f |
-          parameterValueFlow(p, mid, TContentNone()) and
-          readStep(mid, f, node) and
-          contentIn.getContent() = f and
+        exists(Node mid |
+          parameterValueFlow(p, mid, TReadStepTypesNone()) and
+          readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
+            read.getContentType()) and
          Cand::parameterValueFlowReturnCand(p, _, true) and
-          compatibleTypes(getErasedNodeTypeBound(p), f.getContainerType())
+          compatibleTypes(getNodeType(p), read.getContainerType())
        )
        or
+        parameterValueFlow0_0(TReadStepTypesNone(), p, node, read)
+      }
+
+      pragma[nomagic]
+      private predicate parameterValueFlow0_0(
+        ReadStepTypesOption mustBeNone, ParameterNode p, Node node, ReadStepTypesOption read
+      ) {
        // flow through: no prior read
        exists(ArgumentNode arg |
-          parameterValueFlowArg(p, arg, TContentNone()) and
-          argumentValueFlowsThrough(arg, contentIn, node)
+          parameterValueFlowArg(p, arg, mustBeNone) and
+          argumentValueFlowsThrough(arg, read, node)
        )
        or
        // flow through: no read inside method
        exists(ArgumentNode arg |
-          parameterValueFlowArg(p, arg, contentIn) and
-          argumentValueFlowsThrough(arg, TContentNone(), node)
+          parameterValueFlowArg(p, arg, read) and
+          argumentValueFlowsThrough(arg, mustBeNone, node)
        )
      }

      pragma[nomagic]
      private predicate parameterValueFlowArg(
-        ParameterNode p, ArgumentNode arg, ContentOption contentIn
+        ParameterNode p, ArgumentNode arg, ReadStepTypesOption read
      ) {
-        parameterValueFlow(p, arg, contentIn) and
+        parameterValueFlow(p, arg, read) and
        Cand::argumentValueFlowsThroughCand(arg, _, _)
      }

      pragma[nomagic]
      private predicate argumentValueFlowsThrough0(
-        DataFlowCall call, ArgumentNode arg, ReturnKind kind, ContentOption contentIn
+        DataFlowCall call, ArgumentNode arg, ReturnKind kind, ReadStepTypesOption read
      ) {
        exists(ParameterNode param | viableParamArg(call, param, arg) |
-          parameterValueFlowReturn(param, kind, contentIn)
+          parameterValueFlowReturn(param, kind, read)
        )
      }

      /**
-       * Holds if `arg` flows to `out` through a call using only value-preserving steps,
-       * not taking call contexts into account.
+       * Holds if `arg` flows to `out` through a call using only
+       * value-preserving steps and possibly a single read step, not taking
+       * call contexts into account.
       *
-       * `contentIn` describes the content of `arg` that can flow to `out` (if any).
+       * If a read step was taken, then `read` captures the `Content`, the
+       * container type, and the content type.
       */
      pragma[nomagic]
-      predicate argumentValueFlowsThrough(ArgumentNode arg, ContentOption contentIn, Node out) {
+      predicate argumentValueFlowsThrough(ArgumentNode arg, ReadStepTypesOption read, Node out) {
        exists(DataFlowCall call, ReturnKind kind |
-          argumentValueFlowsThrough0(call, arg, kind, contentIn) and
+          argumentValueFlowsThrough0(call, arg, kind, read) and
          out = getAnOutNode(call, kind)
        |
          // normal flow through
-          contentIn = TContentNone() and
-          compatibleTypes(getErasedNodeTypeBound(arg), getErasedNodeTypeBound(out))
+          read = TReadStepTypesNone() and
+          compatibleTypes(getNodeType(arg), getNodeType(out))
          or
          // getter
-          exists(Content fIn |
-            contentIn.getContent() = fIn and
-            compatibleTypes(getErasedNodeTypeBound(arg), fIn.getContainerType()) and
-            compatibleTypes(fIn.getType(), getErasedNodeTypeBound(out))
-          )
+          compatibleTypes(getNodeType(arg), read.getContainerType()) and
+          compatibleTypes(read.getContentType(), getNodeType(out))
        )
      }

+      /**
+       * Holds if `arg` flows to `out` through a call using only
+       * value-preserving steps and a single read step, not taking call
+       * contexts into account, thus representing a getter-step.
+       */
+      predicate getterStep(ArgumentNode arg, Content c, Node out) {
+        argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
+      }
+
      /**
       * Holds if `p` can flow to a return node of kind `kind` in the same
-       * callable using only value-preserving steps.
+       * callable using only value-preserving steps and possibly a single read
+       * step.
       *
-       * `contentIn` describes the content of `p` that can flow to the return
-       * node (if any).
+       * If a read step was taken, then `read` captures the `Content`, the
+       * container type, and the content type.
       */
      private predicate parameterValueFlowReturn(
-        ParameterNode p, ReturnKind kind, ContentOption contentIn
+        ParameterNode p, ReturnKind kind, ReadStepTypesOption read
      ) {
        exists(ReturnNode ret |
-          parameterValueFlow(p, ret, contentIn) and
+          parameterValueFlow(p, ret, read) and
          kind = ret.getKind()
        )
      }
@@ -323,13 +289,94 @@ private module Cached {
    import Final
  }

+  import FlowThrough
+
+  cached
+  private module DispatchWithCallContext {
+    /**
+     * Holds if the call context `ctx` reduces the set of viable run-time
+     * dispatch targets of call `call` in `c`.
+     */
+    cached
+    predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) {
+      exists(int tgts, int ctxtgts |
+        mayBenefitFromCallContext(call, c) and
+        c = viableCallable(ctx) and
+        ctxtgts = count(viableImplInCallContext(call, ctx)) and
+        tgts = strictcount(viableCallable(call)) and
+        ctxtgts < tgts
+      )
+    }
+
+    /**
+     * Gets a viable run-time dispatch target for the call `call` in the
+     * context `ctx`. This is restricted to those calls for which a context
+     * makes a difference.
+     */
+    cached
+    DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
+      result = viableImplInCallContext(call, ctx) and
+      reducedViableImplInCallContext(call, _, ctx)
+    }
+
+    /**
+     * Holds if flow returning from callable `c` to call `call` might return
+     * further and if this path restricts the set of call sites that can be
+     * returned to.
+     */
+    cached
+    predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) {
+      exists(int tgts, int ctxtgts |
+        mayBenefitFromCallContext(call, _) and
+        c = viableCallable(call) and
+        ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContext(call, ctx)) and
+        tgts = strictcount(DataFlowCall ctx | viableCallable(ctx) = call.getEnclosingCallable()) and
+        ctxtgts < tgts
+      )
+    }
+
+    /**
+     * Gets a viable run-time dispatch target for the call `call` in the
+     * context `ctx`. This is restricted to those calls and results for which
+     * the return flow from the result to `call` restricts the possible context
+     * `ctx`.
+     */
+    cached
+    DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) {
+      result = viableImplInCallContext(call, ctx) and
+      reducedViableImplInReturn(result, call)
+    }
+  }
+
+  import DispatchWithCallContext
+
  /**
   * Holds if `p` can flow to the pre-update node associated with post-update
   * node `n`, in the same callable, using only value-preserving steps.
   */
  cached
  predicate parameterValueFlowsToPreUpdate(ParameterNode p, PostUpdateNode n) {
-    parameterValueFlow(p, n.getPreUpdateNode(), TContentNone())
+    parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
+  }
+
+  private predicate store(
+    Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
+  ) {
+    storeStep(node1, c, node2) and
+    readStep(_, c, _) and
+    contentType = getNodeType(node1) and
+    containerType = getNodeType(node2)
+    or
+    exists(Node n1, Node n2 |
+      n1 = node1.(PostUpdateNode).getPreUpdateNode() and
+      n2 = node2.(PostUpdateNode).getPreUpdateNode()
+    |
+      argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
+      or
+      readStep(n2, c, n1) and
+      contentType = getNodeType(n1) and
+      containerType = getNodeType(n2)
+    )
  }

  /**
@@ -340,21 +387,10 @@ private module Cached {
   * been stored into, in order to handle cases like `x.f1.f2 = y`.
   */
  cached
-  predicate store(Node node1, Content f, Node node2) {
-    storeStep(node1, f, node2) and readStep(_, f, _)
-    or
-    exists(Node n1, Node n2 |
-      n1 = node1.(PostUpdateNode).getPreUpdateNode() and
-      n2 = node2.(PostUpdateNode).getPreUpdateNode()
-    |
-      argumentValueFlowsThrough(n2, TContentSome(f), n1)
-      or
-      readStep(n2, f, n1)
-    )
+  predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) {
+    store(node1, tc.getContent(), node2, contentType, tc.getContainerType())
  }

-  import FlowThrough
-
  /**
   * Holds if the call context `call` either improves virtual dispatch in
   * `callable` or if it allows us to prune unreachable nodes in `callable`.
@@ -397,10 +433,13 @@ private module Cached {
    TBooleanNone() or
    TBooleanSome(boolean b) { b = true or b = false }

+  cached
+  newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) }
+
  cached
  newtype TAccessPathFront =
    TFrontNil(DataFlowType t) or
-    TFrontHead(Content f)
+    TFrontHead(TypedContent tc)

  cached
  newtype TAccessPathFrontOption =
@@ -415,25 +454,38 @@ class CastingNode extends Node {
  CastingNode() {
    this instanceof ParameterNode or
    this instanceof CastNode or
-    this instanceof OutNodeExt
+    this instanceof OutNodeExt or
+    // For reads, `x.f`, we want to check that the tracked type after the read (which
+    // is obtained by popping the head of the access path stack) is compatible with
+    // the type of `x.f`.
+    readStep(_, _, this)
  }
 }

-newtype TContentOption =
-  TContentNone() or
-  TContentSome(Content f)
+private predicate readStepWithTypes(
+  Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
+) {
+  readStep(n1, c, n2) and
+  container = getNodeType(n1) and
+  content = getNodeType(n2)
+}

-private class ContentOption extends TContentOption {
-  Content getContent() { this = TContentSome(result) }
-
-  predicate hasContent() { exists(this.getContent()) }
-
-  string toString() {
-    result = this.getContent().toString()
-    or
-    not this.hasContent() and
-    result = "<none>"
+private newtype TReadStepTypesOption =
+  TReadStepTypesNone() or
+  TReadStepTypesSome(DataFlowType container, Content c, DataFlowType content) {
+    readStepWithTypes(_, container, c, _, content)
  }
+
+private class ReadStepTypesOption extends TReadStepTypesOption {
+  predicate isSome() { this instanceof TReadStepTypesSome }
+
+  DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) }
+
+  Content getContent() { this = TReadStepTypesSome(_, result, _) }
+
+  DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) }
+
+  string toString() { if this.isSome() then result = "Some(..)" else result = "None()" }
 }

 /**
@@ -678,9 +730,6 @@ DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
  result = viableCallable(call) and cc instanceof CallContextReturn
 }

-pragma[noinline]
-DataFlowType getErasedNodeTypeBound(Node n) { result = getErasedRepr(n.getTypeBound()) }
-
 predicate read = readStep/3;

 /** An optional Boolean value. */
@@ -692,6 +741,23 @@ class BooleanOption extends TBooleanOption {
  }
 }

+/** Content tagged with the type of a containing object. */
+class TypedContent extends MkTypedContent {
+  private Content c;
+  private DataFlowType t;
+
+  TypedContent() { this = MkTypedContent(c, t) }
+
+  /** Gets the content. */
+  Content getContent() { result = c }
+
+  /** Gets the container type. */
+  DataFlowType getContainerType() { result = t }
+
+  /** Gets a textual representation of this content. */
+  string toString() { result = c.toString() }
+}
+
 /**
 * The front of an access path. This is either a head or a nil.
 */
@@ -702,25 +768,36 @@ abstract class AccessPathFront extends TAccessPathFront {

  abstract boolean toBoolNonEmpty();

-  predicate headUsesContent(Content f) { this = TFrontHead(f) }
+  predicate headUsesContent(TypedContent tc) { this = TFrontHead(tc) }
+
+  predicate isClearedAt(Node n) {
+    exists(TypedContent tc |
+      this.headUsesContent(tc) and
+      clearsContent(n, tc.getContent())
+    )
+  }
 }

 class AccessPathFrontNil extends AccessPathFront, TFrontNil {
-  override string toString() {
-    exists(DataFlowType t | this = TFrontNil(t) | result = ppReprType(t))
-  }
+  private DataFlowType t;

-  override DataFlowType getType() { this = TFrontNil(result) }
+  AccessPathFrontNil() { this = TFrontNil(t) }
+
+  override string toString() { result = ppReprType(t) }
+
+  override DataFlowType getType() { result = t }

  override boolean toBoolNonEmpty() { result = false }
 }

 class AccessPathFrontHead extends AccessPathFront, TFrontHead {
-  override string toString() { exists(Content f | this = TFrontHead(f) | result = f.toString()) }
+  private TypedContent tc;

-  override DataFlowType getType() {
-    exists(Content head | this = TFrontHead(head) | result = head.getContainerType())
-  }
+  AccessPathFrontHead() { this = TFrontHead(tc) }
+
+  override string toString() { result = tc.toString() }
+
+  override DataFlowType getType() { result = tc.getContainerType() }

  override boolean toBoolNonEmpty() { result = true }
 }
--- a/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll
@@ -37,21 +37,12 @@ module Consistency {
    )
  }

-  query predicate uniqueTypeBound(Node n, string msg) {
+  query predicate uniqueType(Node n, string msg) {
    exists(int c |
      n instanceof RelevantNode and
-      c = count(n.getTypeBound()) and
+      c = count(getNodeType(n)) and
      c != 1 and
-      msg = "Node should have one type bound but has " + c + "."
-    )
-  }
-
-  query predicate uniqueTypeRepr(Node n, string msg) {
-    exists(int c |
-      n instanceof RelevantNode and
-      c = count(getErasedRepr(n.getTypeBound())) and
-      c != 1 and
-      msg = "Node should have one type representation but has " + c + "."
+      msg = "Node should have one type but has " + c + "."
    )
  }

@@ -104,7 +95,7 @@ module Consistency {
    msg = "Local flow step does not preserve enclosing callable."
  }

-  private DataFlowType typeRepr() { result = getErasedRepr(any(Node n).getTypeBound()) }
+  private DataFlowType typeRepr() { result = getNodeType(_) }

  query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
    t = typeRepr() and
--- a/java/ql/src/semmle/code/java/dataflow/internal/DataFlowPrivate.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/DataFlowPrivate.qll
@@ -129,15 +129,6 @@ private predicate instanceFieldAssign(Expr src, FieldAccess fa) {
  )
 }

-/**
- * Gets an upper bound on the type of `f`.
- */
-private Type getFieldTypeBound(Field f) {
-  fieldTypeFlow(f, result, _)
-  or
-  not fieldTypeFlow(f, _, _) and result = f.getType()
-}
-
 private newtype TContent =
  TFieldContent(InstanceField f) or
  TCollectionContent() or
@@ -154,12 +145,6 @@ class Content extends TContent {
  predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) {
    path = "" and sl = 0 and sc = 0 and el = 0 and ec = 0
  }
-
-  /** Gets the erased type of the object containing this content. */
-  abstract DataFlowType getContainerType();
-
-  /** Gets the erased type of this content. */
-  abstract DataFlowType getType();
 }

 private class FieldContent extends Content, TFieldContent {
@@ -174,26 +159,14 @@ private class FieldContent extends Content, TFieldContent {
  override predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) {
    f.getLocation().hasLocationInfo(path, sl, sc, el, ec)
  }
-
-  override DataFlowType getContainerType() { result = getErasedRepr(f.getDeclaringType()) }
-
-  override DataFlowType getType() { result = getErasedRepr(getFieldTypeBound(f)) }
 }

 private class CollectionContent extends Content, TCollectionContent {
  override string toString() { result = "collection" }
-
-  override DataFlowType getContainerType() { none() }
-
-  override DataFlowType getType() { none() }
 }

 private class ArrayContent extends Content, TArrayContent {
  override string toString() { result = "array" }
-
-  override DataFlowType getContainerType() { none() }
-
-  override DataFlowType getType() { none() }
 }

 /**
@@ -222,12 +195,21 @@ predicate readStep(Node node1, Content f, Node node2) {
  )
 }

+/**
+ * Holds if values stored inside content `c` are cleared at node `n`. For example,
+ * any value stored inside `f` is cleared at the pre-update node associated with `x`
+ * in `x.f = newValue`.
+ */
+predicate clearsContent(Node n, Content c) {
+  n = any(PostUpdateNode pun | storeStep(_, c, pun)).getPreUpdateNode()
+}
+
 /**
 * Gets a representative (boxed) type for `t` for the purpose of pruning
 * possible flow. A single type is used for all numeric types to account for
 * numeric conversions, and otherwise the erasure is used.
 */
-DataFlowType getErasedRepr(Type t) {
+private DataFlowType getErasedRepr(Type t) {
  exists(Type e | e = t.getErasure() |
    if e instanceof NumericOrCharType
    then result.(BoxedType).getPrimitiveType().getName() = "double"
@@ -240,6 +222,9 @@ DataFlowType getErasedRepr(Type t) {
  t instanceof NullType and result instanceof TypeObject
 }

+pragma[noinline]
+DataFlowType getNodeType(Node n) { result = getErasedRepr(n.getTypeBound()) }
+
 /** Gets a string representation of a type returned by `getErasedRepr`. */
 string ppReprType(Type t) {
  if t.(BoxedType).getPrimitiveType().getName() = "double"
--- a/java/ql/src/semmle/code/java/dataflow/internal/TaintTrackingUtil.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/TaintTrackingUtil.qll
@@ -416,9 +416,9 @@ private predicate unsafeEscape(MethodAccess ma) {
 /** Access to a method that passes taint from an argument. */
 private predicate argToMethodStep(Expr tracked, MethodAccess sink) {
  exists(Method m, int i |
-    m = sink.(MethodAccess).getMethod() and
+    m = sink.getMethod() and
    taintPreservingArgumentToMethod(m, i) and
-    tracked = sink.(MethodAccess).getArgument(i)
+    tracked = sink.getArgument(i)
  )
  or
  exists(MethodAccess ma |
--- a/java/ql/src/semmle/code/java/dataflow/internal/tainttracking1/TaintTrackingImpl.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/tainttracking1/TaintTrackingImpl.qll
@@ -26,7 +26,7 @@ private import TaintTrackingParameter::Private
 * To create a configuration, extend this class with a subclass whose
 * characteristic predicate is a unique singleton string. For example, write
 *
- * ```
+ * ```ql
 * class MyAnalysisConfiguration extends TaintTracking::Configuration {
 *   MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
 *   // Override `isSource` and `isSink`.
@@ -41,7 +41,7 @@ private import TaintTrackingParameter::Private
 * Then, to query whether there is flow between some `source` and `sink`,
 * write
 *
- * ```
+ * ```ql
 * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
 * ```
 *
--- a/java/ql/src/semmle/code/java/dataflow/internal/tainttracking2/TaintTrackingImpl.qll
+++ b/java/ql/src/semmle/code/java/dataflow/internal/tainttracking2/TaintTrackingImpl.qll
@@ -26,7 +26,7 @@ private import TaintTrackingParameter::Private
 * To create a configuration, extend this class with a subclass whose
 * characteristic predicate is a unique singleton string. For example, write
 *
- * ```
+ * ```ql
 * class MyAnalysisConfiguration extends TaintTracking::Configuration {
 *   MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
 *   // Override `isSource` and `isSink`.
@@ -41,7 +41,7 @@ private import TaintTrackingParameter::Private
 * Then, to query whether there is flow between some `source` and `sink`,
 * write
 *
- * ```
+ * ```ql
 * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
 * ```
 *
--- a/java/ql/src/semmle/code/java/security/Encryption.qll
+++ b/java/ql/src/semmle/code/java/security/Encryption.qll
@@ -1,3 +1,7 @@
+/**
+ * Provides predicates and classes relating to encryption in Java.
+ */
+
 import java

 class SSLClass extends RefType {
@@ -85,8 +89,10 @@ private string algorithmRegex(string algorithmString) {
      "((^|.*[A-Z]{2}|.*[^a-zA-Z])(" + algorithmString.toLowerCase() + ")([^a-z].*|$))"
 }

-/** Gets a blacklist of algorithms that are known to be insecure. */
-private string algorithmBlacklist() {
+/**
+ * Gets the name of an algorithm that is known to be insecure.
+ */
+string getAnInsecureAlgorithmName() {
  result = "DES" or
  result = "RC2" or
  result = "RC4" or
@@ -94,32 +100,40 @@ private string algorithmBlacklist() {
  result = "ARCFOUR" // a variant of RC4
 }

-// These are only bad if they're being used for encryption.
-private string hashAlgorithmBlacklist() {
+/**
+ * Gets the name of a hash algorithm that is insecure if it is being used for
+ * encryption.
+ */
+string getAnInsecureHashAlgorithmName() {
  result = "SHA1" or
  result = "MD5"
 }

-private string rankedAlgorithmBlacklist(int i) {
+private string rankedInsecureAlgorithm(int i) {
  // In this case we know these are being used for encryption, so we want to match
  // weak hash algorithms too.
-  result = rank[i](string s | s = algorithmBlacklist() or s = hashAlgorithmBlacklist())
-}
-
-private string algorithmBlacklistString(int i) {
-  i = 1 and result = rankedAlgorithmBlacklist(i)
-  or
-  result = rankedAlgorithmBlacklist(i) + "|" + algorithmBlacklistString(i - 1)
-}
-
-/** Gets a regex for matching strings that look like they contain a blacklisted algorithm. */
-string algorithmBlacklistRegex() {
  result =
-    algorithmRegex(algorithmBlacklistString(max(int i | exists(rankedAlgorithmBlacklist(i)))))
+    rank[i](string s | s = getAnInsecureAlgorithmName() or s = getAnInsecureHashAlgorithmName())
 }

-/** Gets a whitelist of algorithms that are known to be secure. */
-private string algorithmWhitelist() {
+private string insecureAlgorithmString(int i) {
+  i = 1 and result = rankedInsecureAlgorithm(i)
+  or
+  result = rankedInsecureAlgorithm(i) + "|" + insecureAlgorithmString(i - 1)
+}
+
+/**
+ * Gets the regular expression used for matching strings that look like they
+ * contain an algorithm that is known to be insecure.
+ */
+string getInsecureAlgorithmRegex() {
+  result = algorithmRegex(insecureAlgorithmString(max(int i | exists(rankedInsecureAlgorithm(i)))))
+}
+
+/**
+ * Gets the name of an algorithm that is known to be secure.
+ */
+string getASecureAlgorithmName() {
  result = "RSA" or
  result = "SHA256" or
  result = "SHA512" or
@@ -130,20 +144,50 @@ private string algorithmWhitelist() {
  result = "ECIES"
 }

-private string rankedAlgorithmWhitelist(int i) { result = rank[i](algorithmWhitelist()) }
+private string rankedSecureAlgorithm(int i) { result = rank[i](getASecureAlgorithmName()) }

-private string algorithmWhitelistString(int i) {
-  i = 1 and result = rankedAlgorithmWhitelist(i)
+private string secureAlgorithmString(int i) {
+  i = 1 and result = rankedSecureAlgorithm(i)
  or
-  result = rankedAlgorithmWhitelist(i) + "|" + algorithmWhitelistString(i - 1)
+  result = rankedSecureAlgorithm(i) + "|" + secureAlgorithmString(i - 1)
 }

-/** Gets a regex for matching strings that look like they contain a whitelisted algorithm. */
-string algorithmWhitelistRegex() {
-  result =
-    algorithmRegex(algorithmWhitelistString(max(int i | exists(rankedAlgorithmWhitelist(i)))))
+/**
+ * Gets a regular expression for matching strings that look like they
+ * contain an algorithm that is known to be secure.
+ */
+string getSecureAlgorithmRegex() {
+  result = algorithmRegex(secureAlgorithmString(max(int i | exists(rankedSecureAlgorithm(i)))))
 }

+/**
+ * DEPRECATED: Terminology has been updated. Use `getAnInsecureAlgorithmName()`
+ * instead.
+ */
+deprecated string algorithmBlacklist() { result = getAnInsecureAlgorithmName() }
+
+/**
+ * DEPRECATED: Terminology has been updated. Use
+ * `getAnInsecureHashAlgorithmName()` instead.
+ */
+deprecated string hashAlgorithmBlacklist() { result = getAnInsecureHashAlgorithmName() }
+
+/**
+ * DEPRECATED: Terminology has been updated. Use `getInsecureAlgorithmRegex()` instead.
+ */
+deprecated string algorithmBlacklistRegex() { result = getInsecureAlgorithmRegex() }
+
+/**
+ * DEPRECATED: Terminology has been updated. Use `getASecureAlgorithmName()`
+ * instead.
+ */
+deprecated string algorithmWhitelist() { result = getASecureAlgorithmName() }
+
+/**
+ * DEPRECATED: Terminology has been updated. Use `getSecureAlgorithmRegex()` instead.
+ */
+deprecated string algorithmWhitelistRegex() { result = getSecureAlgorithmRegex() }
+
 /**
 * Any use of a cryptographic element that specifies an encryption
 * algorithm. For example, methods returning ciphers, decryption methods,
--- a/java/ql/src/semmle/code/java/security/FileReadWrite.qll
+++ b/java/ql/src/semmle/code/java/security/FileReadWrite.qll
@@ -9,9 +9,9 @@ private predicate fileRead(VarAccess fileAccess, Expr fileReadingExpr) {
    cie = fileReadingExpr and
    cie.getArgument(0) = fileAccess
  |
-    cie.getConstructedType().hasQualifiedName("java.io", "RandomAccessFile") or
-    cie.getConstructedType().hasQualifiedName("java.io", "FileReader") or
-    cie.getConstructedType().hasQualifiedName("java.io", "FileInputStream")
+    cie
+        .getConstructedType()
+        .hasQualifiedName("java.io", ["RandomAccessFile", "FileReader", "FileInputStream"])
  )
  or
  exists(MethodAccess ma, Method filesMethod |
@@ -22,13 +22,9 @@ private predicate fileRead(VarAccess fileAccess, Expr fileReadingExpr) {
      // represented by the first argument.
      filesMethod.getDeclaringType().hasQualifiedName("java.nio.file", "Files") and
      fileAccess = ma.getArgument(0) and
-      (
-        filesMethod.hasName("readAllBytes") or
-        filesMethod.hasName("readAllLines") or
-        filesMethod.hasName("newBufferedReader") or
-        filesMethod.hasName("newInputReader") or
-        filesMethod.hasName("newByteChannel")
-      )
+      filesMethod
+          .hasName(["readAllBytes", "readAllLines", "readString", "lines", "newBufferedReader",
+                "newInputStream", "newByteChannel"])
    )
  )
  or