From ad461a87b45113fb29a63dd1ebf2274021b4a5e1 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Mon, 8 May 2023 13:56:26 +0200 Subject: [PATCH] Dataflow: Strengthen tracked types. --- .../java/dataflow/internal/DataFlowImpl.qll | 92 +++++++++++++++---- .../dataflow/internal/DataFlowPrivate.qll | 13 ++- 2 files changed, 82 insertions(+), 23 deletions(-) diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl.qll index 77110c0db0d..8520f308f55 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl.qll @@ -1135,8 +1135,8 @@ module Impl { DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow ); - bindingset[node, state, t, ap] - predicate filter(NodeEx node, FlowState state, Typ t, Ap ap); + bindingset[node, state, t0, ap] + predicate filter(NodeEx node, FlowState state, Typ t0, Ap ap, Typ t); bindingset[typ, contentType] predicate typecheckStore(Typ typ, DataFlowType contentType); @@ -1199,9 +1199,20 @@ module Impl { NodeEx node, FlowState state, Cc cc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa ) { - fwdFlow0(node, state, cc, summaryCtx, argT, argAp, t, ap, apa) and + fwdFlow1(node, state, cc, summaryCtx, argT, argAp, _, t, ap, apa) + } + + private predicate fwdFlow1( + NodeEx node, FlowState state, Cc cc, ParamNodeOption summaryCtx, TypOption argT, + ApOption argAp, Typ t0, Typ t, Ap ap, ApApprox apa + ) { + fwdFlow0(node, state, cc, summaryCtx, argT, argAp, t0, ap, apa) and PrevStage::revFlow(node, state, apa) and - filter(node, state, t, ap) + filter(node, state, t0, ap, t) + } + + private predicate typeStrengthen(Typ t0, Ap ap, Typ t) { + fwdFlow1(_, _, _, _, _, _, t0, t, ap, _) and t0 != t } pragma[assume_small_delta] @@ -1331,6 +1342,11 @@ module Impl { private predicate fwdFlowConsCand(Typ t2, Ap cons, Content c, Typ t1, Ap tail) { fwdFlowStore(_, t1, tail, c, t2, _, _, _, _, _, _) and cons = apCons(c, t1, tail) + or + exists(Typ t0 | + typeStrengthen(t0, cons, t2) and + fwdFlowConsCand(t0, cons, c, t1, tail) + ) } pragma[nomagic] @@ -1955,10 +1971,10 @@ module Impl { ) } - bindingset[node, state, t, ap] - predicate filter(NodeEx node, FlowState state, Typ t, Ap ap) { + bindingset[node, state, t0, ap] + predicate filter(NodeEx node, FlowState state, Typ t0, Ap ap, Typ t) { PrevStage::revFlowState(state) and - exists(t) and + t0 = t and exists(ap) and not stateBarrier(node, state) and ( @@ -2210,10 +2226,16 @@ module Impl { ) } - bindingset[node, state, t, ap] - predicate filter(NodeEx node, FlowState state, Typ t, Ap ap) { + bindingset[node, state, t0, ap] + predicate filter(NodeEx node, FlowState state, Typ t0, Ap ap, Typ t) { exists(state) and - (if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), t) else any()) and + // We can get away with not using type strengthening here, since we aren't + // going to use the tracked types in the construction of Stage 4 access + // paths. For Stage 4 and onwards, the tracked types must be consistent as + // the cons candidates including types are used to construct subsequent + // access path approximations. + t0 = t and + (if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), t0) else any()) and ( notExpectsContent(node) or @@ -2325,11 +2347,18 @@ module Impl { ) } - bindingset[node, state, t, ap] - predicate filter(NodeEx node, FlowState state, Typ t, Ap ap) { + bindingset[node, state, t0, ap] + predicate filter(NodeEx node, FlowState state, Typ t0, Ap ap, Typ t) { exists(state) and not clear(node, ap) and - (if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), t) else any()) and + ( + if castingNodeEx(node) + then + exists(Typ nt | nt = node.getDataFlowType() | + if typeStrongerThan(nt, t0) then t = nt else (compatibleTypes(nt, t0) and t = t0) + ) + else t = t0 + ) and ( notExpectsContent(node) or @@ -2601,9 +2630,16 @@ module Impl { ) } - bindingset[node, state, t, ap] - predicate filter(NodeEx node, FlowState state, Typ t, Ap ap) { - (if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), t) else any()) and + bindingset[node, state, t0, ap] + predicate filter(NodeEx node, FlowState state, Typ t0, Ap ap, Typ t) { + ( + if castingNodeEx(node) + then + exists(Typ nt | nt = node.getDataFlowType() | + if typeStrongerThan(nt, t0) then t = nt else (compatibleTypes(nt, t0) and t = t0) + ) + else t = t0 + ) and exists(state) and exists(ap) } @@ -2812,9 +2848,7 @@ module Impl { ap = TAccessPathNil() or // ... or a step from an existing PathNode to another node. - pathStep(_, node, state, cc, sc, t, ap) and - Stage5::revFlow(node, state, ap.getApprox()) and - (if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), t) else any()) + pathStep(_, node, state, cc, sc, t, ap) } or TPathNodeSink(NodeEx node, FlowState state) { exists(PathNodeMid sink | @@ -3332,13 +3366,31 @@ module Impl { ap = mid.getAp() } + private predicate pathStep( + PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, DataFlowType t, + AccessPath ap + ) { + exists(DataFlowType t0 | + pathStep0(mid, node, state, cc, sc, t0, ap) and + Stage5::revFlow(node, state, ap.getApprox()) and + ( + if castingNodeEx(node) + then + exists(DataFlowType nt | nt = node.getDataFlowType() | + if typeStrongerThan(nt, t0) then t = nt else (compatibleTypes(nt, t0) and t = t0) + ) + else t = t0 + ) + ) + } + /** * Holds if data may flow from `mid` to `node`. The last step in or out of * a callable is recorded by `cc`. */ pragma[assume_small_delta] pragma[nomagic] - private predicate pathStep( + private predicate pathStep0( PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, DataFlowType t, AccessPath ap ) { diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll index 22f84241c96..4782e65b7e7 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll @@ -176,7 +176,7 @@ predicate expectsContent(Node n, ContentSet c) { * possible flow. A single type is used for all numeric types to account for * numeric conversions, and otherwise the erasure is used. */ -DataFlowType getErasedRepr(Type t) { +RefType getErasedRepr(Type t) { exists(Type e | e = t.getErasure() | if e instanceof NumericOrCharType then result.(BoxedType).getPrimitiveType().getName() = "double" @@ -189,6 +189,15 @@ DataFlowType getErasedRepr(Type t) { t instanceof NullType and result instanceof TypeObject } +class DataFlowType extends SrcRefType { + DataFlowType() { this = getErasedRepr(_) } +} + +pragma[nomagic] +predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { + t1.getASourceSupertype+() = t2 +} + pragma[noinline] DataFlowType getNodeType(Node n) { result = getErasedRepr(n.getTypeBound()) @@ -259,8 +268,6 @@ class DataFlowCallable extends TDataFlowCallable { class DataFlowExpr = Expr; -class DataFlowType = RefType; - private newtype TDataFlowCall = TCall(Call c) or TSummaryCall(SummarizedCallable c, Node receiver) {