mirror of
https://github.com/github/codeql.git
synced 2025-12-18 01:33:15 +01:00
451 lines
16 KiB
Plaintext
451 lines
16 KiB
Plaintext
/**
|
|
* Provides an implementation of global (interprocedural) data flow. This file
|
|
* re-exports the local (intraprocedural) data flow analysis from
|
|
* `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
|
|
* through the `Global` and `GlobalWithState` modules.
|
|
*/
|
|
|
|
private import DataFlowImplCommon
|
|
private import DataFlowImplSpecific::Private
|
|
import DataFlowImplSpecific::Public
|
|
import DataFlowImplCommonPublic
|
|
private import DataFlowImpl
|
|
|
|
/** An input configuration for data flow. */
|
|
signature module ConfigSig {
|
|
/**
|
|
* Holds if `source` is a relevant data flow source.
|
|
*/
|
|
predicate isSource(Node source);
|
|
|
|
/**
|
|
* Holds if `sink` is a relevant data flow sink.
|
|
*/
|
|
predicate isSink(Node sink);
|
|
|
|
/**
|
|
* Holds if data flow through `node` is prohibited. This completely removes
|
|
* `node` from the data flow graph.
|
|
*/
|
|
default predicate isBarrier(Node node) { none() }
|
|
|
|
/** Holds if data flow into `node` is prohibited. */
|
|
default predicate isBarrierIn(Node node) { none() }
|
|
|
|
/** Holds if data flow out of `node` is prohibited. */
|
|
default predicate isBarrierOut(Node node) { none() }
|
|
|
|
/**
|
|
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
|
*/
|
|
default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
|
|
|
|
/**
|
|
* Holds if an arbitrary number of implicit read steps of content `c` may be
|
|
* taken at `node`.
|
|
*/
|
|
default predicate allowImplicitRead(Node node, ContentSet c) { none() }
|
|
|
|
/**
|
|
* Holds if `node` should never be skipped over in the `PathGraph` and in path
|
|
* explanations.
|
|
*/
|
|
default predicate neverSkip(Node node) {
|
|
isAdditionalFlowStep(node, _) or isAdditionalFlowStep(_, node)
|
|
}
|
|
|
|
/**
|
|
* Gets the virtual dispatch branching limit when calculating field flow.
|
|
* This can be overridden to a smaller value to improve performance (a
|
|
* value of 0 disables field flow), or a larger value to get more results.
|
|
*/
|
|
default int fieldFlowBranchLimit() { result = 2 }
|
|
|
|
/**
|
|
* Gets a data flow configuration feature to add restrictions to the set of
|
|
* valid flow paths.
|
|
*
|
|
* - `FeatureHasSourceCallContext`:
|
|
* Assume that sources have some existing call context to disallow
|
|
* conflicting return-flow directly following the source.
|
|
* - `FeatureHasSinkCallContext`:
|
|
* Assume that sinks have some existing call context to disallow
|
|
* conflicting argument-to-parameter flow directly preceding the sink.
|
|
* - `FeatureEqualSourceSinkCallContext`:
|
|
* Implies both of the above and additionally ensures that the entire flow
|
|
* path preserves the call context.
|
|
*
|
|
* These features are generally not relevant for typical end-to-end data flow
|
|
* queries, but should only be used for constructing paths that need to
|
|
* somehow be pluggable in another path context.
|
|
*/
|
|
default FlowFeature getAFeature() { none() }
|
|
|
|
/** Holds if sources should be grouped in the result of `flowPath`. */
|
|
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
|
|
|
|
/** Holds if sinks should be grouped in the result of `flowPath`. */
|
|
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
|
|
|
|
/**
|
|
* Holds if hidden nodes should be included in the data flow graph.
|
|
*
|
|
* This feature should only be used for debugging or when the data flow graph
|
|
* is not visualized (as it is in a `path-problem` query).
|
|
*/
|
|
default predicate includeHiddenNodes() { none() }
|
|
}
|
|
|
|
/** An input configuration for data flow using flow state. */
|
|
signature module StateConfigSig {
|
|
bindingset[this]
|
|
class FlowState;
|
|
|
|
/**
|
|
* Holds if `source` is a relevant data flow source with the given initial
|
|
* `state`.
|
|
*/
|
|
predicate isSource(Node source, FlowState state);
|
|
|
|
/**
|
|
* Holds if `sink` is a relevant data flow sink accepting `state`.
|
|
*/
|
|
predicate isSink(Node sink, FlowState state);
|
|
|
|
/**
|
|
* Holds if data flow through `node` is prohibited. This completely removes
|
|
* `node` from the data flow graph.
|
|
*/
|
|
default predicate isBarrier(Node node) { none() }
|
|
|
|
/**
|
|
* Holds if data flow through `node` is prohibited when the flow state is
|
|
* `state`.
|
|
*/
|
|
default predicate isBarrier(Node node, FlowState state) { none() }
|
|
|
|
/** Holds if data flow into `node` is prohibited. */
|
|
default predicate isBarrierIn(Node node) { none() }
|
|
|
|
/** Holds if data flow out of `node` is prohibited. */
|
|
default predicate isBarrierOut(Node node) { none() }
|
|
|
|
/**
|
|
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
|
*/
|
|
default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
|
|
|
|
/**
|
|
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
|
* This step is only applicable in `state1` and updates the flow state to `state2`.
|
|
*/
|
|
default predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
|
|
none()
|
|
}
|
|
|
|
/**
|
|
* Holds if an arbitrary number of implicit read steps of content `c` may be
|
|
* taken at `node`.
|
|
*/
|
|
default predicate allowImplicitRead(Node node, ContentSet c) { none() }
|
|
|
|
/**
|
|
* Holds if `node` should never be skipped over in the `PathGraph` and in path
|
|
* explanations.
|
|
*/
|
|
default predicate neverSkip(Node node) {
|
|
isAdditionalFlowStep(node, _) or
|
|
isAdditionalFlowStep(_, node) or
|
|
isAdditionalFlowStep(node, _, _, _) or
|
|
isAdditionalFlowStep(_, _, node, _)
|
|
}
|
|
|
|
/**
|
|
* Gets the virtual dispatch branching limit when calculating field flow.
|
|
* This can be overridden to a smaller value to improve performance (a
|
|
* value of 0 disables field flow), or a larger value to get more results.
|
|
*/
|
|
default int fieldFlowBranchLimit() { result = 2 }
|
|
|
|
/**
|
|
* Gets a data flow configuration feature to add restrictions to the set of
|
|
* valid flow paths.
|
|
*
|
|
* - `FeatureHasSourceCallContext`:
|
|
* Assume that sources have some existing call context to disallow
|
|
* conflicting return-flow directly following the source.
|
|
* - `FeatureHasSinkCallContext`:
|
|
* Assume that sinks have some existing call context to disallow
|
|
* conflicting argument-to-parameter flow directly preceding the sink.
|
|
* - `FeatureEqualSourceSinkCallContext`:
|
|
* Implies both of the above and additionally ensures that the entire flow
|
|
* path preserves the call context.
|
|
*
|
|
* These features are generally not relevant for typical end-to-end data flow
|
|
* queries, but should only be used for constructing paths that need to
|
|
* somehow be pluggable in another path context.
|
|
*/
|
|
default FlowFeature getAFeature() { none() }
|
|
|
|
/** Holds if sources should be grouped in the result of `flowPath`. */
|
|
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
|
|
|
|
/** Holds if sinks should be grouped in the result of `flowPath`. */
|
|
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
|
|
|
|
/**
|
|
* Holds if hidden nodes should be included in the data flow graph.
|
|
*
|
|
* This feature should only be used for debugging or when the data flow graph
|
|
* is not visualized (as it is in a `path-problem` query).
|
|
*/
|
|
default predicate includeHiddenNodes() { none() }
|
|
}
|
|
|
|
/**
|
|
* Gets the exploration limit for `partialFlow` and `partialFlowRev`
|
|
* measured in approximate number of interprocedural steps.
|
|
*/
|
|
signature int explorationLimitSig();
|
|
|
|
/**
|
|
* The output of a global data flow computation.
|
|
*/
|
|
signature module GlobalFlowSig {
|
|
/**
|
|
* A `Node` augmented with a call context (except for sinks) and an access path.
|
|
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
|
|
*/
|
|
class PathNode;
|
|
|
|
/**
|
|
* Holds if data can flow from `source` to `sink`.
|
|
*
|
|
* The corresponding paths are generated from the end-points and the graph
|
|
* included in the module `PathGraph`.
|
|
*/
|
|
predicate flowPath(PathNode source, PathNode sink);
|
|
|
|
/**
|
|
* Holds if data can flow from `source` to `sink`.
|
|
*/
|
|
predicate flow(Node source, Node sink);
|
|
|
|
/**
|
|
* Holds if data can flow from some source to `sink`.
|
|
*/
|
|
predicate flowTo(Node sink);
|
|
|
|
/**
|
|
* Holds if data can flow from some source to `sink`.
|
|
*/
|
|
predicate flowToExpr(DataFlowExpr sink);
|
|
}
|
|
|
|
/**
|
|
* Constructs a global data flow computation.
|
|
*/
|
|
module Global<ConfigSig Config> implements GlobalFlowSig {
|
|
private module C implements FullStateConfigSig {
|
|
import DefaultState<Config>
|
|
import Config
|
|
}
|
|
|
|
import Impl<C>
|
|
}
|
|
|
|
/** DEPRECATED: Use `Global` instead. */
|
|
deprecated module Make<ConfigSig Config> implements GlobalFlowSig {
|
|
import Global<Config>
|
|
}
|
|
|
|
/**
|
|
* Constructs a global data flow computation using flow state.
|
|
*/
|
|
module GlobalWithState<StateConfigSig Config> implements GlobalFlowSig {
|
|
private module C implements FullStateConfigSig {
|
|
import Config
|
|
}
|
|
|
|
import Impl<C>
|
|
}
|
|
|
|
/** DEPRECATED: Use `GlobalWithState` instead. */
|
|
deprecated module MakeWithState<StateConfigSig Config> implements GlobalFlowSig {
|
|
import GlobalWithState<Config>
|
|
}
|
|
|
|
signature class PathNodeSig {
|
|
/** Gets a textual representation of this element. */
|
|
string toString();
|
|
|
|
/**
|
|
* Holds if this element is at the specified location.
|
|
* The location spans column `startcolumn` of line `startline` to
|
|
* column `endcolumn` of line `endline` in file `filepath`.
|
|
* For more information, see
|
|
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
|
*/
|
|
predicate hasLocationInfo(
|
|
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
|
);
|
|
|
|
/** Gets the underlying `Node`. */
|
|
Node getNode();
|
|
}
|
|
|
|
signature module PathGraphSig<PathNodeSig PathNode> {
|
|
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
|
|
predicate edges(PathNode a, PathNode b);
|
|
|
|
/** Holds if `n` is a node in the graph of data flow path explanations. */
|
|
predicate nodes(PathNode n, string key, string val);
|
|
|
|
/**
|
|
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
|
|
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
|
|
* `ret -> out` is summarized as the edge `arg -> out`.
|
|
*/
|
|
predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out);
|
|
}
|
|
|
|
/**
|
|
* Constructs a `PathGraph` from two `PathGraph`s by disjoint union.
|
|
*/
|
|
module MergePathGraph<
|
|
PathNodeSig PathNode1, PathNodeSig PathNode2, PathGraphSig<PathNode1> Graph1,
|
|
PathGraphSig<PathNode2> Graph2>
|
|
{
|
|
private newtype TPathNode =
|
|
TPathNode1(PathNode1 p) or
|
|
TPathNode2(PathNode2 p)
|
|
|
|
/** A node in a graph of path explanations that is formed by disjoint union of the two given graphs. */
|
|
class PathNode extends TPathNode {
|
|
/** Gets this as a projection on the first given `PathGraph`. */
|
|
PathNode1 asPathNode1() { this = TPathNode1(result) }
|
|
|
|
/** Gets this as a projection on the second given `PathGraph`. */
|
|
PathNode2 asPathNode2() { this = TPathNode2(result) }
|
|
|
|
/** Gets a textual representation of this element. */
|
|
string toString() {
|
|
result = this.asPathNode1().toString() or
|
|
result = this.asPathNode2().toString()
|
|
}
|
|
|
|
/**
|
|
* Holds if this element is at the specified location.
|
|
* The location spans column `startcolumn` of line `startline` to
|
|
* column `endcolumn` of line `endline` in file `filepath`.
|
|
* For more information, see
|
|
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
|
*/
|
|
predicate hasLocationInfo(
|
|
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
|
) {
|
|
this.asPathNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) or
|
|
this.asPathNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
|
}
|
|
|
|
/** Gets the underlying `Node`. */
|
|
Node getNode() {
|
|
result = this.asPathNode1().getNode() or
|
|
result = this.asPathNode2().getNode()
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Provides the query predicates needed to include a graph in a path-problem query.
|
|
*/
|
|
module PathGraph implements PathGraphSig<PathNode> {
|
|
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
|
|
query predicate edges(PathNode a, PathNode b) {
|
|
Graph1::edges(a.asPathNode1(), b.asPathNode1()) or
|
|
Graph2::edges(a.asPathNode2(), b.asPathNode2())
|
|
}
|
|
|
|
/** Holds if `n` is a node in the graph of data flow path explanations. */
|
|
query predicate nodes(PathNode n, string key, string val) {
|
|
Graph1::nodes(n.asPathNode1(), key, val) or
|
|
Graph2::nodes(n.asPathNode2(), key, val)
|
|
}
|
|
|
|
/**
|
|
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
|
|
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
|
|
* `ret -> out` is summarized as the edge `arg -> out`.
|
|
*/
|
|
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
|
|
Graph1::subpaths(arg.asPathNode1(), par.asPathNode1(), ret.asPathNode1(), out.asPathNode1()) or
|
|
Graph2::subpaths(arg.asPathNode2(), par.asPathNode2(), ret.asPathNode2(), out.asPathNode2())
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Constructs a `PathGraph` from three `PathGraph`s by disjoint union.
|
|
*/
|
|
module MergePathGraph3<
|
|
PathNodeSig PathNode1, PathNodeSig PathNode2, PathNodeSig PathNode3,
|
|
PathGraphSig<PathNode1> Graph1, PathGraphSig<PathNode2> Graph2, PathGraphSig<PathNode3> Graph3>
|
|
{
|
|
private module MergedInner = MergePathGraph<PathNode1, PathNode2, Graph1, Graph2>;
|
|
|
|
private module Merged =
|
|
MergePathGraph<MergedInner::PathNode, PathNode3, MergedInner::PathGraph, Graph3>;
|
|
|
|
/** A node in a graph of path explanations that is formed by disjoint union of the three given graphs. */
|
|
class PathNode instanceof Merged::PathNode {
|
|
/** Gets this as a projection on the first given `PathGraph`. */
|
|
PathNode1 asPathNode1() { result = super.asPathNode1().asPathNode1() }
|
|
|
|
/** Gets this as a projection on the second given `PathGraph`. */
|
|
PathNode2 asPathNode2() { result = super.asPathNode1().asPathNode2() }
|
|
|
|
/** Gets this as a projection on the third given `PathGraph`. */
|
|
PathNode3 asPathNode3() { result = super.asPathNode2() }
|
|
|
|
/** Gets a textual representation of this element. */
|
|
string toString() { result = super.toString() }
|
|
|
|
/**
|
|
* Holds if this element is at the specified location.
|
|
* The location spans column `startcolumn` of line `startline` to
|
|
* column `endcolumn` of line `endline` in file `filepath`.
|
|
* For more information, see
|
|
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
|
*/
|
|
predicate hasLocationInfo(
|
|
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
|
) {
|
|
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
|
}
|
|
|
|
/** Gets the underlying `Node`. */
|
|
Node getNode() { result = super.getNode() }
|
|
}
|
|
|
|
/**
|
|
* Provides the query predicates needed to include a graph in a path-problem query.
|
|
*/
|
|
module PathGraph implements PathGraphSig<PathNode> {
|
|
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
|
|
query predicate edges(PathNode a, PathNode b) { Merged::PathGraph::edges(a, b) }
|
|
|
|
/** Holds if `n` is a node in the graph of data flow path explanations. */
|
|
query predicate nodes(PathNode n, string key, string val) {
|
|
Merged::PathGraph::nodes(n, key, val)
|
|
}
|
|
|
|
/**
|
|
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
|
|
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
|
|
* `ret -> out` is summarized as the edge `arg -> out`.
|
|
*/
|
|
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
|
|
Merged::PathGraph::subpaths(arg, par, ret, out)
|
|
}
|
|
}
|
|
}
|