mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Java: Add proper support for variable capture flow.
This commit is contained in:
4
java/ql/lib/change-notes/2023-06-16-initial-version.md
Normal file
4
java/ql/lib/change-notes/2023-06-16-initial-version.md
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: majorAnalysis
|
||||
---
|
||||
* Improved support for flow through captured variables that properly adheres to inter-procedural control flow.
|
||||
@@ -176,7 +176,6 @@ extensions:
|
||||
- ["java.lang", "Object", "getClass", "()", "summary", "manual"]
|
||||
- ["java.lang", "Object", "hashCode", "()", "summary", "manual"]
|
||||
- ["java.lang", "Object", "toString", "()", "summary", "manual"]
|
||||
- ["java.lang", "Runnable", "run", "()", "summary", "manual"]
|
||||
- ["java.lang", "Runtime", "getRuntime", "()", "summary", "manual"]
|
||||
- ["java.lang", "String", "compareTo", "(String)", "summary", "manual"]
|
||||
- ["java.lang", "String", "contains", "(CharSequence)", "summary", "manual"]
|
||||
|
||||
@@ -156,7 +156,7 @@ private module DispatchImpl {
|
||||
|
||||
private module Unification = MkUnification<unificationTargetLeft/1, unificationTargetRight/1>;
|
||||
|
||||
private int parameterPosition() { result in [-1, any(Parameter p).getPosition()] }
|
||||
private int parameterPosition() { result in [-2, -1, any(Parameter p).getPosition()] }
|
||||
|
||||
/** A parameter position represented by an integer. */
|
||||
class ParameterPosition extends int {
|
||||
|
||||
@@ -55,7 +55,9 @@ private module Cached {
|
||||
)
|
||||
} or
|
||||
TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn) or
|
||||
TFieldValueNode(Field f)
|
||||
TFieldValueNode(Field f) or
|
||||
TParameterPostUpdate(CapturedParameter p) or
|
||||
TClosureNode(CaptureFlow::ClosureNode cn)
|
||||
|
||||
cached
|
||||
newtype TContent =
|
||||
@@ -64,6 +66,7 @@ private module Cached {
|
||||
TCollectionContent() or
|
||||
TMapKeyContent() or
|
||||
TMapValueContent() or
|
||||
TClosureContent(CapturedVariable v) or
|
||||
TSyntheticFieldContent(SyntheticField s)
|
||||
|
||||
cached
|
||||
@@ -73,6 +76,7 @@ private module Cached {
|
||||
TCollectionContentApprox() or
|
||||
TMapKeyContentApprox() or
|
||||
TMapValueContentApprox() or
|
||||
TClosureContentApprox(CapturedVariable v) or
|
||||
TSyntheticFieldApproxContent()
|
||||
}
|
||||
|
||||
@@ -127,6 +131,8 @@ module Public {
|
||||
or
|
||||
result = this.(ImplicitPostUpdateNode).getPreUpdateNode().getType()
|
||||
or
|
||||
result = this.(ClosureNode).getTypeImpl()
|
||||
or
|
||||
result = this.(FieldValueNode).getField().getType()
|
||||
}
|
||||
|
||||
@@ -359,6 +365,10 @@ private class ImplicitExprPostUpdate extends ImplicitPostUpdateNode, TImplicitEx
|
||||
}
|
||||
}
|
||||
|
||||
private class ParameterPostUpdate extends ImplicitPostUpdateNode, TParameterPostUpdate {
|
||||
override Node getPreUpdateNode() { this = TParameterPostUpdate(result.asParameter()) }
|
||||
}
|
||||
|
||||
module Private {
|
||||
private import DataFlowDispatch
|
||||
|
||||
@@ -372,6 +382,7 @@ module Private {
|
||||
result.asCallable() = n.(MallocNode).getClassInstanceExpr().getEnclosingCallable() or
|
||||
result = nodeGetEnclosingCallable(n.(ImplicitPostUpdateNode).getPreUpdateNode()) or
|
||||
result.asSummarizedCallable() = n.(FlowSummaryNode).getSummarizedCallable() or
|
||||
result = n.(ClosureNode).getCaptureFlowNode().getEnclosingCallable() or
|
||||
result.asFieldScope() = n.(FieldValueNode).getField()
|
||||
}
|
||||
|
||||
@@ -400,6 +411,8 @@ module Private {
|
||||
this = getInstanceArgument(_)
|
||||
or
|
||||
this.(FlowSummaryNode).isArgumentOf(_, _)
|
||||
or
|
||||
this.(ClosureNode).isArgumentOf(_, _)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -417,6 +430,8 @@ module Private {
|
||||
pos = -1 and this = getInstanceArgument(call.asCall())
|
||||
or
|
||||
this.(FlowSummaryNode).isArgumentOf(call, pos)
|
||||
or
|
||||
this.(ClosureNode).isArgumentOf(call, pos)
|
||||
}
|
||||
|
||||
/** Gets the call in which this node is an argument. */
|
||||
@@ -491,6 +506,34 @@ module Private {
|
||||
c.asSummarizedCallable() = this.getSummarizedCallable() and pos = this.getPosition()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthesized data flow node representing a closure object that tracks
|
||||
* captured variables.
|
||||
*/
|
||||
class ClosureNode extends Node, TClosureNode {
|
||||
CaptureFlow::ClosureNode getCaptureFlowNode() { this = TClosureNode(result) }
|
||||
|
||||
override Location getLocation() { result = this.getCaptureFlowNode().getLocation() }
|
||||
|
||||
override string toString() { result = this.getCaptureFlowNode().toString() }
|
||||
|
||||
predicate isParameter(DataFlowCallable c) { this.getCaptureFlowNode().isParameter(c) }
|
||||
|
||||
predicate isArgumentOf(DataFlowCall call, int pos) {
|
||||
this.getCaptureFlowNode().isArgument(call) and pos = -2
|
||||
}
|
||||
|
||||
Type getTypeImpl() { result instanceof TypeObject }
|
||||
}
|
||||
|
||||
class ClosureParameterNode extends ParameterNode, ClosureNode {
|
||||
ClosureParameterNode() { this.isParameter(_) }
|
||||
|
||||
override predicate isParameterOf(DataFlowCallable c, int pos) {
|
||||
this.isParameter(c) and pos = -2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private import Private
|
||||
@@ -520,3 +563,13 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNode {
|
||||
|
||||
override Node getPreUpdateNode() { result = pre }
|
||||
}
|
||||
|
||||
private class ClosurePostUpdateNode extends PostUpdateNode, ClosureNode {
|
||||
private ClosureNode pre;
|
||||
|
||||
ClosurePostUpdateNode() {
|
||||
CaptureFlow::closurePostUpdateNode(this.getCaptureFlowNode(), pre.getCaptureFlowNode())
|
||||
}
|
||||
|
||||
override Node getPreUpdateNode() { result = pre }
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ private import semmle.code.java.dataflow.FlowSummary
|
||||
private import FlowSummaryImpl as FlowSummaryImpl
|
||||
private import DataFlowImplConsistency
|
||||
private import DataFlowNodes
|
||||
private import codeql.dataflow.VariableCapture as VariableCapture
|
||||
import DataFlowNodes::Private
|
||||
|
||||
private newtype TReturnKind = TNormalReturnKind()
|
||||
@@ -51,24 +52,110 @@ private predicate fieldStep(Node node1, Node node2) {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `node1` to `node2` through variable capture.
|
||||
*/
|
||||
private predicate variableCaptureStep(Node node1, ExprNode node2) {
|
||||
exists(SsaImplicitInit closure, SsaVariable captured |
|
||||
closure.captures(captured) and
|
||||
node2.getExpr() = closure.getAFirstUse()
|
||||
private module CaptureInput implements VariableCapture::InputSig {
|
||||
private import java as J
|
||||
|
||||
class Location = J::Location;
|
||||
|
||||
class BasicBlock instanceof J::BasicBlock {
|
||||
string toString() { result = super.toString() }
|
||||
|
||||
DataFlowCallable getEnclosingCallable() { result.asCallable() = super.getEnclosingCallable() }
|
||||
}
|
||||
|
||||
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { bbIDominates(result, bb) }
|
||||
|
||||
BasicBlock getABasicBlockSuccessor(BasicBlock bb) {
|
||||
result = bb.(J::BasicBlock).getABBSuccessor()
|
||||
}
|
||||
|
||||
//TODO: support capture of `this` in lambdas
|
||||
class CapturedVariable instanceof LocalScopeVariable {
|
||||
CapturedVariable() {
|
||||
2 <=
|
||||
strictcount(J::Callable c |
|
||||
c = this.getCallable() or c = this.getAnAccess().getEnclosingCallable()
|
||||
)
|
||||
}
|
||||
|
||||
string toString() { result = super.toString() }
|
||||
|
||||
DataFlowCallable getCallable() { result.asCallable() = super.getCallable() }
|
||||
}
|
||||
|
||||
class CapturedParameter extends CapturedVariable instanceof Parameter { }
|
||||
|
||||
additional predicate capturedVarUpdate(
|
||||
J::BasicBlock bb, int i, CapturedVariable v, VariableUpdate upd
|
||||
) {
|
||||
upd.getDestVar() = v and bb.getNode(i) = upd
|
||||
}
|
||||
|
||||
additional predicate capturedVarRead(J::BasicBlock bb, int i, CapturedVariable v, RValue rv) {
|
||||
v.(LocalScopeVariable).getAnAccess() = rv and bb.getNode(i) = rv
|
||||
}
|
||||
|
||||
predicate variableWrite(BasicBlock bb, int i, CapturedVariable v, Location loc) {
|
||||
exists(VariableUpdate upd | capturedVarUpdate(bb, i, v, upd) and loc = upd.getLocation())
|
||||
}
|
||||
|
||||
predicate variableRead(BasicBlock bb, int i, CapturedVariable v, Location loc) {
|
||||
exists(RValue rv | capturedVarRead(bb, i, v, rv) and loc = rv.getLocation())
|
||||
}
|
||||
|
||||
class Callable = DataFlowCallable;
|
||||
|
||||
class Call instanceof DataFlowCall {
|
||||
string toString() { result = super.toString() }
|
||||
|
||||
Location getLocation() { result = super.getLocation() }
|
||||
|
||||
DataFlowCallable getEnclosingCallable() { result = super.getEnclosingCallable() }
|
||||
|
||||
predicate hasCfgNode(BasicBlock bb, int i) { super.asCall() = bb.(J::BasicBlock).getNode(i) }
|
||||
}
|
||||
}
|
||||
|
||||
class CapturedVariable = CaptureInput::CapturedVariable;
|
||||
|
||||
class CapturedParameter = CaptureInput::CapturedParameter;
|
||||
|
||||
module CaptureFlow = VariableCapture::Flow<CaptureInput>;
|
||||
|
||||
private predicate captureStoreStep(Node node1, ClosureContent c, Node node2) {
|
||||
exists(BasicBlock bb, int i, CaptureInput::CapturedVariable v, VariableUpdate upd |
|
||||
upd.(VariableAssign).getSource() = node1.asExpr() or
|
||||
upd.(AssignOp) = node1.asExpr()
|
||||
|
|
||||
node1.asExpr() = captured.getAUse()
|
||||
or
|
||||
not exists(captured.getAUse()) and
|
||||
exists(SsaVariable capturedDef | capturedDef = captured.getAnUltimateDefinition() |
|
||||
capturedDef.(SsaImplicitInit).isParameterDefinition(node1.asParameter()) or
|
||||
capturedDef.(SsaExplicitUpdate).getDefiningExpr().(VariableAssign).getSource() =
|
||||
node1.asExpr() or
|
||||
capturedDef.(SsaExplicitUpdate).getDefiningExpr().(AssignOp) = node1.asExpr()
|
||||
)
|
||||
CaptureInput::capturedVarUpdate(bb, i, v, upd) and
|
||||
c.getVariable() = v and
|
||||
CaptureFlow::storeStep(bb, i, v, node2.(ClosureNode).getCaptureFlowNode())
|
||||
)
|
||||
or
|
||||
exists(Parameter p |
|
||||
node1.asParameter() = p and
|
||||
c.getVariable() = p and
|
||||
CaptureFlow::parameterStoreStep(p, node2.(ClosureNode).getCaptureFlowNode())
|
||||
)
|
||||
}
|
||||
|
||||
private predicate captureReadStep(Node node1, ClosureContent c, Node node2) {
|
||||
exists(BasicBlock bb, int i, CaptureInput::CapturedVariable v |
|
||||
CaptureFlow::readStep(node1.(ClosureNode).getCaptureFlowNode(), bb, i, v) and
|
||||
c.getVariable() = v and
|
||||
CaptureInput::capturedVarRead(bb, i, v, node2.asExpr())
|
||||
)
|
||||
or
|
||||
exists(Parameter p |
|
||||
CaptureFlow::parameterReadStep(node1.(ClosureNode).getCaptureFlowNode(), p) and
|
||||
c.getVariable() = p and
|
||||
node2.(PostUpdateNode).getPreUpdateNode().asParameter() = p
|
||||
)
|
||||
}
|
||||
|
||||
predicate captureValueStep(Node node1, Node node2) {
|
||||
CaptureFlow::localFlowStep(node1.(ClosureNode).getCaptureFlowNode(),
|
||||
node2.(ClosureNode).getCaptureFlowNode())
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -78,10 +165,6 @@ private predicate variableCaptureStep(Node node1, ExprNode node2) {
|
||||
predicate jumpStep(Node node1, Node node2) {
|
||||
fieldStep(node1, node2)
|
||||
or
|
||||
variableCaptureStep(node1, node2)
|
||||
or
|
||||
variableCaptureStep(node1.(PostUpdateNode).getPreUpdateNode(), node2)
|
||||
or
|
||||
any(AdditionalValueStep a).step(node1, node2) and
|
||||
node1.getEnclosingCallable() != node2.getEnclosingCallable()
|
||||
or
|
||||
@@ -117,6 +200,8 @@ predicate storeStep(Node node1, ContentSet f, Node node2) {
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), f,
|
||||
node2.(FlowSummaryNode).getSummaryNode())
|
||||
or
|
||||
captureStoreStep(node1, f, node2)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -149,6 +234,8 @@ predicate readStep(Node node1, ContentSet f, Node node2) {
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), f,
|
||||
node2.(FlowSummaryNode).getSummaryNode())
|
||||
or
|
||||
captureReadStep(node1, f, node2)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -447,6 +534,8 @@ ContentApprox getContentApprox(Content c) {
|
||||
or
|
||||
c instanceof MapValueContent and result = TMapValueContentApprox()
|
||||
or
|
||||
exists(CapturedVariable v | c = TClosureContent(v) and result = TClosureContentApprox(v))
|
||||
or
|
||||
c instanceof SyntheticFieldContent and result = TSyntheticFieldApproxContent()
|
||||
}
|
||||
|
||||
|
||||
@@ -135,6 +135,10 @@ private module Cached {
|
||||
|
||||
import Cached
|
||||
|
||||
private predicate capturedVariableRead(Node n) {
|
||||
n.asExpr().(RValue).getVariable() instanceof CapturedVariable
|
||||
}
|
||||
|
||||
private predicate simpleLocalFlowStep0(Node node1, Node node2) {
|
||||
TaintTrackingUtil::forceCachingInSameStage() and
|
||||
// Variable flow steps through adjacent def-use and use-use pairs.
|
||||
@@ -142,23 +146,27 @@ private predicate simpleLocalFlowStep0(Node node1, Node node2) {
|
||||
upd.getDefiningExpr().(VariableAssign).getSource() = node1.asExpr() or
|
||||
upd.getDefiningExpr().(AssignOp) = node1.asExpr()
|
||||
|
|
||||
node2.asExpr() = upd.getAFirstUse()
|
||||
node2.asExpr() = upd.getAFirstUse() and
|
||||
not capturedVariableRead(node2)
|
||||
)
|
||||
or
|
||||
exists(SsaImplicitInit init |
|
||||
init.isParameterDefinition(node1.asParameter()) and
|
||||
node2.asExpr() = init.getAFirstUse()
|
||||
node2.asExpr() = init.getAFirstUse() and
|
||||
not capturedVariableRead(node2)
|
||||
)
|
||||
or
|
||||
adjacentUseUse(node1.asExpr(), node2.asExpr()) and
|
||||
not exists(FieldRead fr |
|
||||
hasNonlocalValue(fr) and fr.getField().isStatic() and fr = node1.asExpr()
|
||||
) and
|
||||
not FlowSummaryImpl::Private::Steps::prohibitsUseUseFlow(node1, _)
|
||||
not FlowSummaryImpl::Private::Steps::prohibitsUseUseFlow(node1, _) and
|
||||
not capturedVariableRead(node2)
|
||||
or
|
||||
ThisFlow::adjacentThisRefs(node1, node2)
|
||||
or
|
||||
adjacentUseUse(node1.(PostUpdateNode).getPreUpdateNode().asExpr(), node2.asExpr())
|
||||
adjacentUseUse(node1.(PostUpdateNode).getPreUpdateNode().asExpr(), node2.asExpr()) and
|
||||
not capturedVariableRead(node2)
|
||||
or
|
||||
ThisFlow::adjacentThisRefs(node1.(PostUpdateNode).getPreUpdateNode(), node2)
|
||||
or
|
||||
@@ -185,6 +193,8 @@ private predicate simpleLocalFlowStep0(Node node1, Node node2) {
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryLocalStep(node1.(FlowSummaryNode).getSummaryNode(),
|
||||
node2.(FlowSummaryNode).getSummaryNode(), true)
|
||||
or
|
||||
captureValueStep(node1, node2)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -256,6 +266,19 @@ class MapValueContent extends Content, TMapValueContent {
|
||||
override string toString() { result = "<map.value>" }
|
||||
}
|
||||
|
||||
/** A captured variable. */
|
||||
class ClosureContent extends Content, TClosureContent {
|
||||
CapturedVariable v;
|
||||
|
||||
ClosureContent() { this = TClosureContent(v) }
|
||||
|
||||
CapturedVariable getVariable() { result = v }
|
||||
|
||||
override DataFlowType getType() { result = getErasedRepr(v.(Variable).getType()) }
|
||||
|
||||
override string toString() { result = v.toString() }
|
||||
}
|
||||
|
||||
/** A reference through a synthetic instance field. */
|
||||
class SyntheticFieldContent extends Content, TSyntheticFieldContent {
|
||||
SyntheticField s;
|
||||
|
||||
98
java/ql/test/library-tests/dataflow/capture/B.java
Normal file
98
java/ql/test/library-tests/dataflow/capture/B.java
Normal file
@@ -0,0 +1,98 @@
|
||||
import java.util.*;
|
||||
import java.util.function.*;
|
||||
|
||||
public class B {
|
||||
static String source(String label) { return null; }
|
||||
|
||||
static void sink(String s) { }
|
||||
|
||||
static void test1() {
|
||||
List<String> l1 = new ArrayList<>();
|
||||
l1.add(source("L"));
|
||||
List<String> l2 = new ArrayList<>();
|
||||
l1.forEach(e -> l2.add(e));
|
||||
sink(l2.get(0)); // $ hasValueFlow=L
|
||||
}
|
||||
|
||||
String bf1;
|
||||
String bf2;
|
||||
|
||||
void test2() {
|
||||
B other = new B();
|
||||
Consumer<String> f = x -> { this.bf1 = x; bf2 = x; other.bf1 = x; };
|
||||
|
||||
// no flow
|
||||
sink(bf1);
|
||||
sink(this.bf2);
|
||||
sink(other.bf1);
|
||||
sink(other.bf2);
|
||||
|
||||
f.accept(source("T"));
|
||||
|
||||
sink(bf1); // $ MISSING: hasValueFlow=T
|
||||
sink(this.bf2); // $ MISSING: hasValueFlow=T
|
||||
sink(other.bf1); // $ hasValueFlow=T
|
||||
sink(other.bf2);
|
||||
}
|
||||
|
||||
static void convert(Map<String, String> inp, Map<String, String> out) {
|
||||
inp.forEach((key, value) -> { out.put(key, value); });
|
||||
}
|
||||
|
||||
void test3() {
|
||||
HashMap<String,String> m1 = new HashMap<>();
|
||||
HashMap<String,String> m2 = new HashMap<>();
|
||||
m1.put(source("Key"), source("Value"));
|
||||
convert(m1, m2);
|
||||
m2.forEach((k, v) -> {
|
||||
sink(k); // $ hasValueFlow=Key
|
||||
sink(v); // $ hasValueFlow=Value
|
||||
});
|
||||
}
|
||||
|
||||
String elem;
|
||||
|
||||
void testParamIn1() {
|
||||
elem = source("pin.This.elem");
|
||||
testParamIn2(source("pin.Arg"));
|
||||
}
|
||||
|
||||
void testParamIn2(String param) {
|
||||
Runnable r = () -> {
|
||||
sink(elem); // $ MISSING: hasValueFlow=pin.This.elem
|
||||
sink(this.elem); // $ MISSING: hasValueFlow=pin.This.elem
|
||||
sink(param); // $ hasValueFlow=pin.Arg
|
||||
};
|
||||
r.run();
|
||||
}
|
||||
|
||||
void testParamOut1() {
|
||||
B other = new B();
|
||||
testParamOut2(other);
|
||||
sink(elem); // $ MISSING: hasValueFlow=pout.This.elem
|
||||
sink(this.elem); // $ MISSING: hasValueFlow=pout.This.elem
|
||||
sink(other.elem); // $ hasValueFlow=pout.param
|
||||
}
|
||||
|
||||
void testParamOut2(B param) {
|
||||
Runnable r = () -> {
|
||||
this.elem = source("pout.This.elem");
|
||||
param.elem = source("pout.param");
|
||||
};
|
||||
r.run();
|
||||
}
|
||||
|
||||
void testCrossLambda() {
|
||||
B b = new B();
|
||||
Runnable sink1 = () -> { sink(b.elem); };
|
||||
Runnable sink2 = () -> { sink(b.elem); }; // $ hasValueFlow=src
|
||||
Runnable src = () -> { b.elem = source("src"); };
|
||||
doRun(sink1);
|
||||
doRun(src);
|
||||
doRun(sink2);
|
||||
}
|
||||
|
||||
void doRun(Runnable r) {
|
||||
r.run();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
import TestUtilities.InlineFlowTest
|
||||
@@ -1,6 +1,7 @@
|
||||
| A.java:14:14:14:16 | "A" | A.java:14:14:14:16 | "A" |
|
||||
| A.java:14:14:14:16 | "A" | A.java:15:16:15:22 | get(...) |
|
||||
| A.java:14:14:14:16 | "A" | A.java:18:8:18:15 | p |
|
||||
| A.java:14:14:14:16 | "A" | A.java:18:8:18:15 | p [post update] |
|
||||
| A.java:14:14:14:16 | "A" | A.java:32:26:32:26 | p |
|
||||
| A.java:21:11:21:13 | "B" | A.java:15:16:15:22 | get(...) |
|
||||
| A.java:21:11:21:13 | "B" | A.java:21:7:21:13 | ...=... |
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import java
|
||||
import semmle.code.java.dataflow.DataFlow
|
||||
|
||||
StringLiteral src() { result.getCompilationUnit().fromSource() }
|
||||
StringLiteral src() {
|
||||
result.getCompilationUnit().fromSource() and
|
||||
result.getFile().toString() = "A"
|
||||
}
|
||||
|
||||
module Config implements DataFlow::ConfigSig {
|
||||
predicate isSource(DataFlow::Node n) { n.asExpr() = src() }
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: majorAnalysis
|
||||
---
|
||||
* Initial release. Adds a library to implement flow through captured variables that properly adheres to inter-procedural control flow.
|
||||
439
shared/dataflow/codeql/dataflow/VariableCapture.qll
Normal file
439
shared/dataflow/codeql/dataflow/VariableCapture.qll
Normal file
@@ -0,0 +1,439 @@
|
||||
private import codeql.util.Boolean
|
||||
private import codeql.util.Unit
|
||||
private import codeql.ssa.Ssa as Ssa
|
||||
|
||||
signature module InputSig {
|
||||
class Location;
|
||||
|
||||
/**
|
||||
* A basic block, that is, a maximal straight-line sequence of control flow nodes
|
||||
* without branches or joins.
|
||||
*/
|
||||
class BasicBlock {
|
||||
/** Gets a textual representation of this basic block. */
|
||||
string toString();
|
||||
|
||||
/** Gets the enclosing callable. */
|
||||
Callable getEnclosingCallable();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the basic block that immediately dominates basic block `bb`, if any.
|
||||
*
|
||||
* That is, all paths reaching `bb` from some entry point basic block must go
|
||||
* through the result.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```csharp
|
||||
* int M(string s) {
|
||||
* if (s == null)
|
||||
* throw new ArgumentNullException(nameof(s));
|
||||
* return s.Length;
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* The basic block starting on line 2 is an immediate dominator of
|
||||
* the basic block on line 4 (all paths from the entry point of `M`
|
||||
* to `return s.Length;` must go through the null check.
|
||||
*/
|
||||
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb);
|
||||
|
||||
/** Gets an immediate successor of basic block `bb`, if any. */
|
||||
BasicBlock getABasicBlockSuccessor(BasicBlock bb);
|
||||
|
||||
/** Holds if `bb` is a control-flow entry point. */
|
||||
default predicate entryBlock(BasicBlock bb) { not exists(getImmediateBasicBlockDominator(bb)) }
|
||||
|
||||
/** Holds if `bb` is a control-flow exit point. */
|
||||
default predicate exitBlock(BasicBlock bb) { not exists(getABasicBlockSuccessor(bb)) }
|
||||
|
||||
/** A variable that is captured in a nested closure. */
|
||||
class CapturedVariable {
|
||||
/** Gets a textual representation of this variable. */
|
||||
string toString();
|
||||
|
||||
/** Gets the callable that defines this variable. */
|
||||
Callable getCallable();
|
||||
}
|
||||
|
||||
/** A parameter that is captured in a nested closure. */
|
||||
class CapturedParameter extends CapturedVariable;
|
||||
|
||||
/**
|
||||
* Holds if the `i`th node of basic block `bb` is a write to captured variable
|
||||
* `v`. This must include the initial assignment from the parameter in case
|
||||
* the captured variable is a parameter.
|
||||
*/
|
||||
predicate variableWrite(BasicBlock bb, int i, CapturedVariable v, Location loc);
|
||||
|
||||
/** Holds if the `i`th node of basic block `bb` reads captured variable `v`. */
|
||||
predicate variableRead(BasicBlock bb, int i, CapturedVariable v, Location loc);
|
||||
|
||||
class Callable {
|
||||
/** Gets a textual representation of this callable. */
|
||||
string toString();
|
||||
|
||||
/** Gets the location of this callable. */
|
||||
Location getLocation();
|
||||
}
|
||||
|
||||
class Call {
|
||||
/** Gets a textual representation of this call. */
|
||||
string toString();
|
||||
|
||||
/** Gets the location of this call. */
|
||||
Location getLocation();
|
||||
|
||||
/** Gets the enclosing callable. */
|
||||
Callable getEnclosingCallable();
|
||||
|
||||
/**
|
||||
* Holds if the `i`th node of basic block `bb` makes this call. Does not
|
||||
* hold if the call occurs in a CFG-less callable.
|
||||
*/
|
||||
predicate hasCfgNode(BasicBlock bb, int i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a potential virtual dispatch call target. This may overapproximate the
|
||||
* final call graph.
|
||||
*
|
||||
* If this is empty, it is assumed that all calls might reach a callable that
|
||||
* accesses a captured variable, so slightly more `ClosureNode`s will be
|
||||
* generated.
|
||||
*/
|
||||
default Callable getACallTarget(Call call) { none() }
|
||||
}
|
||||
|
||||
signature module OutputSig<InputSig I> {
|
||||
/**
|
||||
* A synthesized data flow node representing a closure object that tracks
|
||||
* captured variables.
|
||||
*/
|
||||
class ClosureNode {
|
||||
/** Gets a textual representation of this node. */
|
||||
string toString();
|
||||
|
||||
/** Gets the location of this node. */
|
||||
I::Location getLocation();
|
||||
|
||||
/** Gets the enclosing callable. */
|
||||
I::Callable getEnclosingCallable();
|
||||
|
||||
/** Holds if this node is a parameter node. */
|
||||
predicate isParameter(I::Callable callable);
|
||||
|
||||
/** Holds if this node is an argument node. */
|
||||
predicate isArgument(I::Call call);
|
||||
}
|
||||
|
||||
/** Holds if `post` is a `PostUpdateNode` for `pre`. */
|
||||
predicate closurePostUpdateNode(ClosureNode post, ClosureNode pre);
|
||||
|
||||
/** Holds if there is a local flow step from `node1` to `node2`. */
|
||||
predicate localFlowStep(ClosureNode node1, ClosureNode node2);
|
||||
|
||||
/**
|
||||
* Holds if there is a store step from the `variableWrite(bb, i, v, _)` with
|
||||
* content `v` to `node`.
|
||||
*/
|
||||
predicate storeStep(I::BasicBlock bb, int i, I::CapturedVariable v, ClosureNode node);
|
||||
|
||||
/**
|
||||
* Holds if there is a read step from `node` to the `variableRead(bb, i, v, _)`
|
||||
* with content `v`.
|
||||
*/
|
||||
predicate readStep(ClosureNode node, I::BasicBlock bb, int i, I::CapturedVariable v);
|
||||
|
||||
/**
|
||||
* Holds if there is a store step from the `ParameterNode` for `p` with
|
||||
* content `p` to `node`.
|
||||
*/
|
||||
predicate parameterStoreStep(I::CapturedParameter p, ClosureNode node);
|
||||
|
||||
/**
|
||||
* Holds if there is a read step from `node` to the post-update of the
|
||||
* `ParameterNode` for `p` with content `p`.
|
||||
*/
|
||||
predicate parameterReadStep(ClosureNode node, I::CapturedParameter p);
|
||||
}
|
||||
|
||||
module Flow<InputSig Input> implements OutputSig<Input> {
|
||||
private import Input
|
||||
|
||||
private predicate callEdge(Callable c1, Callable c2) {
|
||||
exists(Call call | c1 = call.getEnclosingCallable() and c2 = getACallTarget(call))
|
||||
}
|
||||
|
||||
private predicate noCallGraph() { not exists(Call call, Callable c | c = getACallTarget(call)) }
|
||||
|
||||
private predicate readOrWrite(BasicBlock bb, int i) {
|
||||
variableRead(bb, i, _, _) or variableWrite(bb, i, _, _)
|
||||
}
|
||||
|
||||
private predicate readsOrWritesCapturedVar(Callable c) {
|
||||
exists(BasicBlock bb | readOrWrite(bb, _) and c = bb.getEnclosingCallable())
|
||||
or
|
||||
// Captured parameters have implicit reads and writes to connect the
|
||||
// parameter value to the captured value stored on the heap.
|
||||
exists(CapturedParameter p | p.getCallable() = c)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `c` either reads or writes a captured variable, or may
|
||||
* transitively call another callable that reads or writes a captured
|
||||
* variable.
|
||||
*/
|
||||
private predicate capturedVarsAreLive(Callable c) {
|
||||
readsOrWritesCapturedVar(c)
|
||||
or
|
||||
exists(Callable mid | capturedVarsAreLive(mid) and callEdge(c, mid))
|
||||
or
|
||||
exists(Call call |
|
||||
noCallGraph() and
|
||||
call.getEnclosingCallable() = c
|
||||
)
|
||||
}
|
||||
|
||||
private predicate liveEntryBlock(BasicBlock bb) {
|
||||
capturedVarsAreLive(bb.getEnclosingCallable()) and
|
||||
entryBlock(bb)
|
||||
}
|
||||
|
||||
/*
|
||||
* We introduce a variable `heap` and treat all captured variables as fields
|
||||
* on it. We then thread `heap` through the call graph so it is available
|
||||
* everywhere we need to access a captured variable.
|
||||
* We need parameter definitions `THeapVarParam` and argument accesses
|
||||
* `THeapVarArg` to thread it through the call graph, and we need qualifier
|
||||
* accesses `THeapVarCaptureQualifier` for each access to a captured variable.
|
||||
* We also need post-update nodes for all of the accesses to handle
|
||||
* side-effects.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Holds if `call` should get a `heap` argument. This holds if `call` might
|
||||
* target a callee that needs access to the `heap` state.
|
||||
*/
|
||||
private predicate hasHeapArg(Call call) {
|
||||
capturedVarsAreLive(call.getEnclosingCallable()) and capturedVarsAreLive(getACallTarget(call))
|
||||
or
|
||||
noCallGraph()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `heap` is read in the `i`th node of `bb` as either the qualifier
|
||||
* of an access to a captured variable or as an argument to a call that needs
|
||||
* the `heap` state.
|
||||
*/
|
||||
private predicate heapRead(BasicBlock bb, int i) {
|
||||
readOrWrite(bb, i)
|
||||
or
|
||||
exists(Call call | hasHeapArg(call) and call.hasCfgNode(bb, i))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the initial store of a captured parameter into the `heap` should
|
||||
* occur in the `i`th node of `bb`.
|
||||
*/
|
||||
private predicate parameterHeapStore(BasicBlock bb, int i, CapturedParameter p) {
|
||||
entryBlock(bb) and
|
||||
bb.getEnclosingCallable() = p.getCallable() and
|
||||
i = -1 + min(int j | j = 2 or heapRead(bb, j))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a final read of the value of a captured parameter as it exists in
|
||||
* the `heap` should occur in the `i`th node of `bb` in order to update the
|
||||
* parameter, such that side-effects on the parameter are visible to the
|
||||
* caller.
|
||||
*/
|
||||
private predicate parameterHeapRead(BasicBlock bb, int i, CapturedParameter p) {
|
||||
exitBlock(bb) and
|
||||
bb.getEnclosingCallable() = p.getCallable() and
|
||||
i = 1 + max(int j | j = 0 or heapRead(bb, j))
|
||||
}
|
||||
|
||||
private predicate hasHeapQualifier(BasicBlock bb, int i) {
|
||||
readOrWrite(bb, i) or parameterHeapStore(bb, i, _) or parameterHeapRead(bb, i, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the `i`th node of `bb` occurs before any read or write of a
|
||||
* captured variable, and that captured captured variables are live in the
|
||||
* callable containing `bb`.
|
||||
* This will be used as the position for the definition of the heap parameter.
|
||||
*/
|
||||
private predicate entryDef(BasicBlock bb, int i) {
|
||||
liveEntryBlock(bb) and
|
||||
i = -1 + min(int j | j = 1 or hasHeapQualifier(bb, j))
|
||||
}
|
||||
|
||||
private module HeapVariableSsaInput implements Ssa::InputSig {
|
||||
class BasicBlock instanceof Input::BasicBlock {
|
||||
string toString() { result = super.toString() }
|
||||
}
|
||||
|
||||
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) {
|
||||
result = Input::getImmediateBasicBlockDominator(bb)
|
||||
}
|
||||
|
||||
BasicBlock getABasicBlockSuccessor(BasicBlock bb) {
|
||||
result = Input::getABasicBlockSuccessor(bb)
|
||||
}
|
||||
|
||||
class ExitBasicBlock extends BasicBlock {
|
||||
ExitBasicBlock() { not exists(getABasicBlockSuccessor(this)) }
|
||||
}
|
||||
|
||||
// We need one heap variable per callable.
|
||||
class SourceVariable = Callable;
|
||||
|
||||
predicate variableWrite(BasicBlock bb, int i, SourceVariable v, boolean certain) {
|
||||
entryDef(bb, i) and v = bb.(Input::BasicBlock).getEnclosingCallable() and certain = true
|
||||
}
|
||||
|
||||
predicate variableRead(BasicBlock bb, int i, SourceVariable v, boolean certain) {
|
||||
(
|
||||
hasHeapQualifier(bb, i)
|
||||
or
|
||||
exists(Call call |
|
||||
hasHeapArg(call) and
|
||||
call.hasCfgNode(bb, i)
|
||||
)
|
||||
) and
|
||||
v = bb.(Input::BasicBlock).getEnclosingCallable() and
|
||||
certain = true
|
||||
}
|
||||
}
|
||||
|
||||
private module HeapVariableSsa = Ssa::Make<HeapVariableSsaInput>;
|
||||
|
||||
private newtype TClosureNode =
|
||||
THeapVarParam(Callable c) { capturedVarsAreLive(c) } or
|
||||
THeapVarArg(Call call, Boolean isPost) { hasHeapArg(call) } or
|
||||
THeapVarCaptureQualifier(BasicBlock bb, int i, Boolean isPost) { hasHeapQualifier(bb, i) } or
|
||||
THeapVarPhi(HeapVariableSsa::PhiReadNode phi)
|
||||
|
||||
class ClosureNode extends TClosureNode {
|
||||
string toString() {
|
||||
result = "parameter heap" and this = THeapVarParam(_)
|
||||
or
|
||||
result = "heap argument" and this = THeapVarArg(_, false)
|
||||
or
|
||||
result = "heap argument [post update]" and this = THeapVarArg(_, true)
|
||||
or
|
||||
result = "heap" and this = THeapVarCaptureQualifier(_, _, false)
|
||||
or
|
||||
result = "heap [post update]" and this = THeapVarCaptureQualifier(_, _, true)
|
||||
or
|
||||
result = "heap phi" and this = THeapVarPhi(_)
|
||||
}
|
||||
|
||||
Location getLocation() {
|
||||
exists(Callable c | this = THeapVarParam(c) and result = c.getLocation())
|
||||
or
|
||||
exists(Call call | this = THeapVarArg(call, _) and result = call.getLocation())
|
||||
or
|
||||
exists(BasicBlock bb, int i | this = THeapVarCaptureQualifier(bb, i, _) |
|
||||
variableRead(bb, i, _, result)
|
||||
or
|
||||
variableWrite(bb, i, _, result)
|
||||
or
|
||||
not readOrWrite(bb, i) and result = bb.getEnclosingCallable().getLocation()
|
||||
)
|
||||
}
|
||||
|
||||
Callable getEnclosingCallable() {
|
||||
this = THeapVarParam(result)
|
||||
or
|
||||
exists(Call call | this = THeapVarArg(call, _) and result = call.getEnclosingCallable())
|
||||
or
|
||||
exists(BasicBlock bb |
|
||||
this = THeapVarCaptureQualifier(bb, _, _) and result = bb.getEnclosingCallable()
|
||||
)
|
||||
or
|
||||
exists(HeapVariableSsa::PhiReadNode phi |
|
||||
this = THeapVarPhi(phi) and result = phi.getBasicBlock().(BasicBlock).getEnclosingCallable()
|
||||
)
|
||||
}
|
||||
|
||||
predicate isParameter(Callable c) { this = THeapVarParam(c) }
|
||||
|
||||
predicate isArgument(Call call) { this = THeapVarArg(call, false) }
|
||||
}
|
||||
|
||||
predicate closurePostUpdateNode(ClosureNode post, ClosureNode pre) {
|
||||
exists(Call call | pre = THeapVarArg(call, false) and post = THeapVarArg(call, true))
|
||||
or
|
||||
exists(BasicBlock bb, int i |
|
||||
pre = THeapVarCaptureQualifier(bb, i, false) and post = THeapVarCaptureQualifier(bb, i, true)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate step(BasicBlock bb1, int i1, BasicBlock bb2, int i2) {
|
||||
HeapVariableSsa::adjacentDefReadExt(_, _, bb1, i1, bb2, i2)
|
||||
or
|
||||
exists(HeapVariableSsa::DefinitionExt next |
|
||||
HeapVariableSsa::lastRefRedefExt(_, _, bb1, i1, next) and
|
||||
next.definesAt(_, bb2, i2, _)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate closureNodeAt(ClosureNode n, boolean isPost, BasicBlock bb, int i) {
|
||||
exists(Callable c |
|
||||
n = THeapVarParam(c) and
|
||||
entryDef(bb, i) and
|
||||
bb.getEnclosingCallable() = c and
|
||||
isPost = false
|
||||
)
|
||||
or
|
||||
exists(Call call | n = THeapVarArg(call, isPost) and call.hasCfgNode(bb, i))
|
||||
or
|
||||
n = THeapVarCaptureQualifier(bb, i, isPost)
|
||||
or
|
||||
exists(HeapVariableSsa::PhiReadNode phi |
|
||||
n = THeapVarPhi(phi) and phi.definesAt(_, bb, i, _) and isPost = false
|
||||
)
|
||||
}
|
||||
|
||||
predicate localFlowStep(ClosureNode node1, ClosureNode node2) {
|
||||
exists(BasicBlock bb1, int i1, BasicBlock bb2, int i2 |
|
||||
step(bb1, i1, bb2, i2) and
|
||||
closureNodeAt(node1, _, bb1, i1) and
|
||||
closureNodeAt(node2, false, bb2, i2)
|
||||
)
|
||||
or
|
||||
exists(Callable c, Call call |
|
||||
// For a CFG-less callable (e.g. a MaD callable), we add an edge from the
|
||||
// parameter to each use.
|
||||
node1 = THeapVarParam(c) and
|
||||
not closureNodeAt(node1, _, _, _) and
|
||||
node2 = THeapVarArg(call, false) and
|
||||
call.getEnclosingCallable() = c
|
||||
)
|
||||
}
|
||||
|
||||
predicate storeStep(BasicBlock bb, int i, CapturedVariable v, ClosureNode node) {
|
||||
variableWrite(bb, i, v, _) and node = THeapVarCaptureQualifier(bb, i, true)
|
||||
}
|
||||
|
||||
predicate readStep(ClosureNode node, BasicBlock bb, int i, CapturedVariable v) {
|
||||
variableRead(bb, i, v, _) and node = THeapVarCaptureQualifier(bb, i, false)
|
||||
}
|
||||
|
||||
predicate parameterStoreStep(CapturedParameter p, ClosureNode node) {
|
||||
exists(BasicBlock bb, int i |
|
||||
parameterHeapStore(bb, i, p) and
|
||||
node = THeapVarCaptureQualifier(bb, i, true)
|
||||
)
|
||||
}
|
||||
|
||||
predicate parameterReadStep(ClosureNode node, CapturedParameter p) {
|
||||
exists(BasicBlock bb, int i |
|
||||
parameterHeapRead(bb, i, p) and
|
||||
node = THeapVarCaptureQualifier(bb, i, false)
|
||||
)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user