Merge pull request #17863 from aschackmull/shared/universal-flow

Shared: Add a Universal Flow library and refactor TypeFlow to use it.
This commit is contained in:
Anders Schack-Mulligen
2024-11-06 13:46:13 +01:00
committed by GitHub
5 changed files with 397 additions and 248 deletions

View File

@@ -28,14 +28,9 @@ signature module TypeFlowInput<LocationSig Location> {
}
/**
* Holds if data can flow from `n1` to `n2` in one step, and `n1` is not
* necessarily functionally determined by `n2`.
*/
predicate joinStep(TypeFlowNode n1, TypeFlowNode n2);
/**
* Holds if data can flow from `n1` to `n2` in one step, and `n1` is
* functionally determined by `n2`.
* Holds if data can flow from `n1` to `n2` in one step.
*
* For a given `n2`, this predicate must include all possible `n1` that can flow to `n2`.
*/
predicate step(TypeFlowNode n1, TypeFlowNode n2);

View File

@@ -0,0 +1,351 @@
/**
* Provides predicates for proving data flow properties that hold for all
* paths, that is, reachability is computed using universal quantification over
* the step relation.
*
* Regular data flow search for the existence of a path, that is, reachability
* using existential quantification over the step relation. Hence, this library
* computes the dual reachability predicate that states that a certain property
* always holds for a given node regardless of the path taken.
*
* As a simple comparison, the computed predicate is essentially equivalent to
* the folllowing:
* ```ql
* predicate hasProperty(FlowNode n, Prop t) {
* basecase(n, t)
* or
* forex(FlowNode mid | step(mid, n) | hasProperty(mid, t))
* }
* ```
* More complex property propagation is supported, and strongly connected
* components in the flow graph are handled.
*
* As an initial such property calculation, the library computes the set of
* nodes that are always null. These are then subtracted from the graph such
* that subsequently calculated properties hold under the assumption that the
* value is not null.
*/
private import codeql.util.Location
private import codeql.util.Unit
/** Provides the input specification. */
signature module UniversalFlowInput<LocationSig Location> {
/**
* A node for which certain data flow properties may be proved. For example,
* expressions and method declarations.
*/
class FlowNode {
/** Gets a textual representation of this node. */
string toString();
/** Gets the location of this node. */
Location getLocation();
}
/**
* Holds if data can flow from `n1` to `n2` in one step.
*
* For a given `n2`, this predicate must include all possible `n1` that can flow to `n2`.
*/
predicate step(FlowNode n1, FlowNode n2);
/** Holds if `n` represents a `null` value. */
predicate isNullValue(FlowNode n);
/**
* Holds if `n` should be excluded from the set of null values even if
* the null analysis determines that `n` is always null.
*/
default predicate isExcludedFromNullAnalysis(FlowNode n) { none() }
}
/**
* Provides an implementation of universal flow using input `I`.
*/
module Make<LocationSig Location, UniversalFlowInput<Location> I> {
private import I
/**
* Holds if data can flow from `n1` to `n2` in one step, and `n1` is
* functionally determined by `n2`.
*/
private predicate uniqStep(FlowNode n1, FlowNode n2) { n1 = unique(FlowNode n | step(n, n2)) }
/**
* Holds if data can flow from `n1` to `n2` in one step, and `n1` is not
* functionally determined by `n2`.
*/
private predicate joinStep(FlowNode n1, FlowNode n2) { step(n1, n2) and not uniqStep(n1, n2) }
/** Holds if `null` is the only value that flows to `n`. */
private predicate isNull(FlowNode n) {
isNullValue(n)
or
not isExcludedFromNullAnalysis(n) and
(
exists(FlowNode mid | isNull(mid) and uniqStep(mid, n))
or
forex(FlowNode mid | joinStep(mid, n) | isNull(mid))
)
}
private predicate uniqStepNotNull(FlowNode n1, FlowNode n2) {
uniqStep(n1, n2) and not isNull(n1)
}
private import Internal
/** Provides access to internal step relations. */
module Internal {
/**
* Holds if data can flow from `n1` to `n2` in one step, `n1` is not necessarily
* functionally determined by `n2`, and `n1` might take a non-null value.
*/
predicate joinStepNotNull(FlowNode n1, FlowNode n2) { joinStep(n1, n2) and not isNull(n1) }
/**
* Holds if data can flow from `n1` to `n2` in one step, excluding join
* steps from nodes that are always null.
*/
predicate anyStep(FlowNode n1, FlowNode n2) {
joinStepNotNull(n1, n2) or uniqStepNotNull(n1, n2)
}
}
private predicate sccEdge(FlowNode n1, FlowNode n2) { anyStep(n1, n2) and anyStep+(n2, n1) }
private module Scc = QlBuiltins::EquivalenceRelation<FlowNode, sccEdge/2>;
private class FlowScc = Scc::EquivalenceClass;
/** Holds if `n` is part of an SCC of size 2 or more represented by `scc`. */
private predicate sccRepr(FlowNode n, FlowScc scc) { scc = Scc::getEquivalenceClass(n) }
private predicate sccJoinStepNotNull(FlowNode n, FlowScc scc) {
exists(FlowNode mid |
joinStepNotNull(n, mid) and
sccRepr(mid, scc) and
not sccRepr(n, scc)
)
}
private signature class NodeSig;
private signature module Edge {
class Node;
predicate edge(FlowNode n1, Node n2);
}
private signature module RankedEdge<NodeSig Node> {
predicate edgeRank(int r, FlowNode n1, Node n2);
int lastRank(Node n);
}
private module RankEdge<Edge E> implements RankedEdge<E::Node> {
private import E
/**
* Holds if `r` is a ranking of the incoming edges `(n1,n2)` to `n2`. The used
* ordering is not necessarily total, so the ranking may have gaps.
*/
private predicate edgeRank1(int r, FlowNode n1, Node n2) {
n1 =
rank[r](FlowNode n, int startline, int startcolumn |
edge(n, n2) and
n.getLocation().hasLocationInfo(_, startline, startcolumn, _, _)
|
n order by startline, startcolumn
)
}
/**
* Holds if `r2` is a ranking of the ranks from `edgeRank1`. This removes the
* gaps from the ranking.
*/
private predicate edgeRank2(int r2, int r1, Node n) {
r1 = rank[r2](int r | edgeRank1(r, _, n) | r)
}
/** Holds if `r` is a ranking of the incoming edges `(n1,n2)` to `n2`. */
predicate edgeRank(int r, FlowNode n1, Node n2) {
exists(int r1 |
edgeRank1(r1, n1, n2) and
edgeRank2(r, r1, n2)
)
}
int lastRank(Node n) { result = max(int r | edgeRank(r, _, n)) }
}
private signature module PropPropagation {
class Prop;
predicate candProp(FlowNode n, Prop t);
bindingset[t]
predicate supportsProp(FlowNode n, Prop t);
}
/** Implements recursion through `forall` by way of edge ranking. */
private module ForAll<NodeSig Node, RankedEdge<Node> E, PropPropagation T> {
/**
* Holds if `t` is a property that holds on one of the incoming edges to `n` and
* thus is a candidate property for `n`.
*/
pragma[nomagic]
private predicate candJoinProp(Node n, T::Prop t) {
exists(FlowNode mid |
T::candProp(mid, t) and
E::edgeRank(_, mid, n)
)
}
/**
* Holds if `t` is a candidate property for `n` that is also valid for data coming
* through the edges into `n` ranked from `1` to `r`.
*/
private predicate flowJoin(int r, Node n, T::Prop t) {
(
r = 1 and candJoinProp(n, t)
or
flowJoin(r - 1, n, t) and E::edgeRank(r, _, n)
) and
forall(FlowNode mid | E::edgeRank(r, mid, n) | T::supportsProp(mid, t))
}
/**
* Holds if `t` is a candidate property for `n` that is also valid for data
* coming through all the incoming edges, and therefore is a valid property for
* `n`.
*/
predicate flowJoin(Node n, T::Prop t) { flowJoin(E::lastRank(n), n, t) }
}
private module JoinStep implements Edge {
class Node = FlowNode;
predicate edge = joinStepNotNull/2;
}
private module SccJoinStep implements Edge {
class Node = FlowScc;
predicate edge = sccJoinStepNotNull/2;
}
private module RankedJoinStep = RankEdge<JoinStep>;
private module RankedSccJoinStep = RankEdge<SccJoinStep>;
signature module NullaryPropertySig {
predicate hasPropertyBase(FlowNode n);
default predicate barrier(FlowNode n) { none() }
}
/**
* Calculates a (nullary) property using universal flow given a base case
* relation.
*/
module FlowNullary<NullaryPropertySig P> {
private module Propagation implements PropPropagation {
class Prop = Unit;
predicate candProp(FlowNode n, Unit u) { hasProperty(n) and exists(u) }
predicate supportsProp = candProp/2;
}
/**
* Holds if all flow reaching `n` originates from nodes in
* `hasPropertyBase`.
*/
predicate hasProperty(FlowNode n) {
P::hasPropertyBase(n)
or
not P::barrier(n) and
(
exists(FlowNode mid | hasProperty(mid) and uniqStepNotNull(mid, n))
or
// The following is an optimized version of
// `forex(FlowNode mid | joinStepNotNull(mid, n) | hasPropery(mid))`
ForAll<FlowNode, RankedJoinStep, Propagation>::flowJoin(n, _)
or
exists(FlowScc scc |
sccRepr(n, scc) and
// Optimized version of
// `forex(FlowNode mid | sccJoinStepNotNull(mid, scc) | hasPropery(mid))`
ForAll<FlowScc, RankedSccJoinStep, Propagation>::flowJoin(scc, _)
)
)
}
}
signature module PropertySig {
class Prop;
bindingset[t1, t2]
default predicate propImplies(Prop t1, Prop t2) { t1 = t2 }
predicate hasPropertyBase(FlowNode n, Prop t);
default predicate barrier(FlowNode n) { none() }
}
/**
* Calculates a unary property using universal flow given a base case
* relation.
*/
module Flow<PropertySig P> {
private module Propagation implements PropPropagation {
class Prop = P::Prop;
predicate candProp = hasProperty/2;
bindingset[t]
predicate supportsProp(FlowNode n, Prop t) {
exists(Prop t0 | hasProperty(n, t0) and P::propImplies(t0, t))
}
}
/**
* Holds if all flow reaching `n` originates from nodes in
* `hasPropertyBase`. The property `t` is taken from one of those origins
* such that all other origins imply `t`.
*/
predicate hasProperty(FlowNode n, P::Prop t) {
P::hasPropertyBase(n, t)
or
not P::barrier(n) and
(
exists(FlowNode mid | hasProperty(mid, t) and uniqStepNotNull(mid, n))
or
// The following is an optimized version of
// ```
// exists(FlowNode mid | joinStepNotNull(mid, n) | hasPropery(mid, t)) and
// forall(FlowNode mid | joinStepNotNull(mid, n) | hasPropery(mid, _)) and
// forall(FlowNode mid, P::Prop t0 | joinStepNotNull(mid, n) and hasPropery(mid, t0) |
// P::propImplies(t0, t)
// )
// ```
ForAll<FlowNode, RankedJoinStep, Propagation>::flowJoin(n, t)
or
exists(FlowScc scc |
sccRepr(n, scc) and
// Optimized version of
// ```
// exists(FlowNode mid | sccJoinStepNotNull(mid, n) | hasPropery(mid, t)) and
// forall(FlowNode mid | sccJoinStepNotNull(mid, n) | hasPropery(mid, _)) and
// forall(FlowNode mid, P::Prop t0 | sccJoinStepNotNull(mid, n) and hasPropery(mid, t0) |
// P::propImplies(t0, t)
// )
// ```
ForAll<FlowScc, RankedSccJoinStep, Propagation>::flowJoin(scc, t)
)
)
}
}
}

View File

@@ -1,189 +1,34 @@
private import codeql.typeflow.TypeFlow
private import codeql.typeflow.UniversalFlow as UniversalFlow
private import codeql.util.Location
private import codeql.util.Unit
module TypeFlow<LocationSig Location, TypeFlowInput<Location> I> {
private import I
/** Holds if `null` is the only value that flows to `n`. */
predicate isNull(TypeFlowNode n) {
isNullValue(n)
or
exists(TypeFlowNode mid | isNull(mid) and step(mid, n))
or
forex(TypeFlowNode mid | joinStep(mid, n) | isNull(mid)) and
not isExcludedFromNullAnalysis(n)
private module UfInput implements UniversalFlow::UniversalFlowInput<Location> {
class FlowNode = TypeFlowNode;
predicate step = I::step/2;
predicate isNullValue = I::isNullValue/1;
predicate isExcludedFromNullAnalysis = I::isExcludedFromNullAnalysis/1;
}
/**
* Holds if data can flow from `n1` to `n2` in one step, `n1` is not necessarily
* functionally determined by `n2`, and `n1` might take a non-null value.
*/
private predicate joinStepNotNull(TypeFlowNode n1, TypeFlowNode n2) {
joinStep(n1, n2) and not isNull(n1)
}
private module UnivFlow = UniversalFlow::Make<Location, UfInput>;
private predicate anyStep(TypeFlowNode n1, TypeFlowNode n2) {
joinStepNotNull(n1, n2) or step(n1, n2)
}
private module ExactTypeProperty implements UnivFlow::PropertySig {
class Prop = Type;
private predicate sccEdge(TypeFlowNode n1, TypeFlowNode n2) {
anyStep(n1, n2) and anyStep+(n2, n1)
}
private module Scc = QlBuiltins::EquivalenceRelation<TypeFlowNode, sccEdge/2>;
private class TypeFlowScc = Scc::EquivalenceClass;
/** Holds if `n` is part of an SCC of size 2 or more represented by `scc`. */
private predicate sccRepr(TypeFlowNode n, TypeFlowScc scc) { scc = Scc::getEquivalenceClass(n) }
private predicate sccJoinStepNotNull(TypeFlowNode n, TypeFlowScc scc) {
exists(TypeFlowNode mid |
joinStepNotNull(n, mid) and
sccRepr(mid, scc) and
not sccRepr(n, scc)
)
}
private signature class NodeSig;
private signature module Edge {
class Node;
predicate edge(TypeFlowNode n1, Node n2);
}
private signature module RankedEdge<NodeSig Node> {
predicate edgeRank(int r, TypeFlowNode n1, Node n2);
int lastRank(Node n);
}
private module RankEdge<Edge E> implements RankedEdge<E::Node> {
private import E
/**
* Holds if `r` is a ranking of the incoming edges `(n1,n2)` to `n2`. The used
* ordering is not necessarily total, so the ranking may have gaps.
*/
private predicate edgeRank1(int r, TypeFlowNode n1, Node n2) {
n1 =
rank[r](TypeFlowNode n, int startline, int startcolumn |
edge(n, n2) and
n.getLocation().hasLocationInfo(_, startline, startcolumn, _, _)
|
n order by startline, startcolumn
)
}
/**
* Holds if `r2` is a ranking of the ranks from `edgeRank1`. This removes the
* gaps from the ranking.
*/
private predicate edgeRank2(int r2, int r1, Node n) {
r1 = rank[r2](int r | edgeRank1(r, _, n) | r)
}
/** Holds if `r` is a ranking of the incoming edges `(n1,n2)` to `n2`. */
predicate edgeRank(int r, TypeFlowNode n1, Node n2) {
exists(int r1 |
edgeRank1(r1, n1, n2) and
edgeRank2(r, r1, n2)
)
}
int lastRank(Node n) { result = max(int r | edgeRank(r, _, n)) }
}
private signature module TypePropagation {
class Typ;
predicate candType(TypeFlowNode n, Typ t);
bindingset[t]
predicate supportsType(TypeFlowNode n, Typ t);
}
/** Implements recursion through `forall` by way of edge ranking. */
private module ForAll<NodeSig Node, RankedEdge<Node> E, TypePropagation T> {
/**
* Holds if `t` is a bound that holds on one of the incoming edges to `n` and
* thus is a candidate bound for `n`.
*/
pragma[nomagic]
private predicate candJoinType(Node n, T::Typ t) {
exists(TypeFlowNode mid |
T::candType(mid, t) and
E::edgeRank(_, mid, n)
)
}
/**
* Holds if `t` is a candidate bound for `n` that is also valid for data coming
* through the edges into `n` ranked from `1` to `r`.
*/
private predicate flowJoin(int r, Node n, T::Typ t) {
(
r = 1 and candJoinType(n, t)
or
flowJoin(r - 1, n, t) and E::edgeRank(r, _, n)
) and
forall(TypeFlowNode mid | E::edgeRank(r, mid, n) | T::supportsType(mid, t))
}
/**
* Holds if `t` is a candidate bound for `n` that is also valid for data
* coming through all the incoming edges, and therefore is a valid bound for
* `n`.
*/
predicate flowJoin(Node n, T::Typ t) { flowJoin(E::lastRank(n), n, t) }
}
private module JoinStep implements Edge {
class Node = TypeFlowNode;
predicate edge = joinStepNotNull/2;
}
private module SccJoinStep implements Edge {
class Node = TypeFlowScc;
predicate edge = sccJoinStepNotNull/2;
}
private module RankedJoinStep = RankEdge<JoinStep>;
private module RankedSccJoinStep = RankEdge<SccJoinStep>;
private module ExactTypePropagation implements TypePropagation {
class Typ = Type;
predicate candType = exactType/2;
predicate supportsType = exactType/2;
predicate hasPropertyBase = exactTypeBase/2;
}
/**
* Holds if the runtime type of `n` is exactly `t` and if this bound is a
* non-trivial lower bound, that is, `t` has a subtype.
*/
private predicate exactType(TypeFlowNode n, Type t) {
exactTypeBase(n, t)
or
exists(TypeFlowNode mid | exactType(mid, t) and step(mid, n))
or
// The following is an optimized version of
// `forex(TypeFlowNode mid | joinStepNotNull(mid, n) | exactType(mid, t))`
ForAll<TypeFlowNode, RankedJoinStep, ExactTypePropagation>::flowJoin(n, t)
or
exists(TypeFlowScc scc |
sccRepr(n, scc) and
// Optimized version of
// `forex(TypeFlowNode mid | sccJoinStepNotNull(mid, scc) | exactType(mid, t))`
ForAll<TypeFlowScc, RankedSccJoinStep, ExactTypePropagation>::flowJoin(scc, t)
)
}
private predicate exactType = UnivFlow::Flow<ExactTypeProperty>::hasProperty/2;
/**
* Gets the source declaration of a direct supertype of this type, excluding itself.
@@ -214,17 +59,13 @@ module TypeFlow<LocationSig Location, TypeFlowInput<Location> I> {
)
}
private module TypeFlowPropagation implements TypePropagation {
class Typ = Type;
private module TypeFlowProperty implements UnivFlow::PropertySig {
class Prop = Type;
predicate candType = typeFlow/2;
bindingset[t1, t2]
predicate propImplies(Type t1, Type t2) { getAnAncestor(pragma[only_bind_out](t1)) = t2 }
bindingset[t]
predicate supportsType(TypeFlowNode mid, Type t) {
exists(Type midtyp | exactType(mid, midtyp) or typeFlow(mid, midtyp) |
getAnAncestor(pragma[only_bind_out](midtyp)) = t
)
}
predicate hasPropertyBase(TypeFlowNode n, Prop t) { typeFlowBase(n, t) or exactType(n, t) }
}
/**
@@ -232,16 +73,8 @@ module TypeFlow<LocationSig Location, TypeFlowInput<Location> I> {
* likely to be better than the static type of `n`.
*/
private predicate typeFlow(TypeFlowNode n, Type t) {
typeFlowBase(n, t)
or
exists(TypeFlowNode mid | typeFlow(mid, t) and step(mid, n))
or
ForAll<TypeFlowNode, RankedJoinStep, TypeFlowPropagation>::flowJoin(n, t)
or
exists(TypeFlowScc scc |
sccRepr(n, scc) and
ForAll<TypeFlowScc, RankedSccJoinStep, TypeFlowPropagation>::flowJoin(scc, t)
)
UnivFlow::Flow<TypeFlowProperty>::hasProperty(n, t) and
not exactType(n, t)
}
pragma[nomagic]
@@ -329,22 +162,20 @@ module TypeFlow<LocationSig Location, TypeFlowInput<Location> I> {
*/
private predicate unionTypeFlowBaseCand(TypeFlowNode n, Type t, boolean exact) {
exists(TypeFlowNode next |
joinStepNotNull(n, next) and
UnivFlow::Internal::joinStepNotNull(n, next) and
bestTypeFlowOrTypeFlowBase(n, t, exact) and
not bestTypeFlowOrTypeFlowBase(next, t, exact) and
not exactType(next, _)
)
}
private module HasUnionTypePropagation implements TypePropagation {
class Typ = Unit;
predicate candType(TypeFlowNode mid, Unit unit) {
exists(unit) and
(unionTypeFlowBaseCand(mid, _, _) or hasUnionTypeFlow(mid))
module UnionTypeFlowProperty implements UnivFlow::NullaryPropertySig {
predicate hasPropertyBase(TypeFlowNode n) {
unionTypeFlowBaseCand(n, _, _) or
instanceofDisjunctionGuarded(n, _)
}
predicate supportsType = candType/2;
predicate barrier(TypeFlowNode n) { exactType(n, _) }
}
/**
@@ -352,25 +183,7 @@ module TypeFlow<LocationSig Location, TypeFlowInput<Location> I> {
* `unionTypeFlowBaseCand`, such that we can compute a union type bound for `n`.
* Disregards nodes for which we have an exact bound.
*/
private predicate hasUnionTypeFlow(TypeFlowNode n) {
not exactType(n, _) and
(
// Optimized version of
// `forex(TypeFlowNode mid | joinStepNotNull(mid, n) | unionTypeFlowBaseCand(mid, _, _) or hasUnionTypeFlow(mid))`
ForAll<TypeFlowNode, RankedJoinStep, HasUnionTypePropagation>::flowJoin(n, _)
or
exists(TypeFlowScc scc |
sccRepr(n, scc) and
// Optimized version of
// `forex(TypeFlowNode mid | sccJoinStep(mid, scc) | unionTypeFlowBaseCand(mid, _, _) or hasUnionTypeFlow(mid))`
ForAll<TypeFlowScc, RankedSccJoinStep, HasUnionTypePropagation>::flowJoin(scc, _)
)
or
exists(TypeFlowNode mid | step(mid, n) and hasUnionTypeFlow(mid))
or
instanceofDisjunctionGuarded(n, _)
)
}
private predicate hasUnionTypeFlow = UnivFlow::FlowNullary<UnionTypeFlowProperty>::hasProperty/1;
pragma[nomagic]
private Type getTypeBound(TypeFlowNode n) {
@@ -383,9 +196,9 @@ module TypeFlow<LocationSig Location, TypeFlowInput<Location> I> {
private predicate unionTypeFlow0(TypeFlowNode n, Type t, boolean exact) {
hasUnionTypeFlow(n) and
(
exists(TypeFlowNode mid | anyStep(mid, n) |
unionTypeFlowBaseCand(mid, t, exact) or unionTypeFlow(mid, t, exact)
)
exists(TypeFlowNode mid | UnivFlow::Internal::anyStep(mid, n) | unionTypeFlow(mid, t, exact))
or
unionTypeFlowBaseCand(n, t, exact)
or
instanceofDisjunctionGuarded(n, t) and exact = false
)
@@ -470,6 +283,7 @@ module TypeFlow<LocationSig Location, TypeFlowInput<Location> I> {
*/
predicate bestUnionType(TypeFlowNode n, Type t, boolean exact) {
unionTypeFlow(n, t, exact) and
not exactType(n, _) and
not irrelevantUnionType(n) and
not irrelevantUnionTypePart(n, t, exact)
}