Merge pull request #1000 from jbj/dataflow-defbyref

C++: Support definition by reference in data flow library
This commit is contained in:
Robert Marsh
2019-03-01 13:54:37 -08:00
committed by GitHub
10 changed files with 246 additions and 26 deletions

View File

@@ -36,6 +36,10 @@
## Changes to QL libraries
* The `semmle.code.cpp.dataflow.DataFlow` library now supports _definition by reference_ via output parameters of known functions.
* Data flows through `memcpy` and `memmove` by default.
* Custom flow into or out of arguments assigned by reference can be modelled with the new class `DataFlow::DefinitionByReferenceNode`.
* The data flow library adds flow through library functions that are modeled in `semmle.code.cpp.models.interfaces.DataFlow`. Queries can add subclasses of `DataFlowFunction` to specify additional flow.
* There is a new `Namespace.isInline()` predicate, which holds if the namespace was declared as `inline namespace`.
* The `Expr.isConstant()` predicate now also holds for _address constant expressions_, which are addresses that will be constant after the program has been linked. These address constants do not have a result for `Expr.getValue()`.
* There are new `Function.isDeclaredConstexpr()` and `Function.isConstexpr()` predicates. They can be used to tell whether a function was declared as `constexpr`, and whether it actually is `constexpr`.

View File

@@ -3,10 +3,14 @@
*/
import cpp
private import semmle.code.cpp.dataflow.internal.FlowVar
private import semmle.code.cpp.models.interfaces.DataFlow
private newtype TNode =
TExprNode(Expr e) or
TParameterNode(Parameter p) { exists(p.getFunction().getBlock()) } or
TDefinitionByReferenceNode(VariableAccess va, Expr argument) {
definitionByReference(va, argument)
} or
TUninitializedNode(LocalVariable v) {
not v.hasInitializer()
}
@@ -20,13 +24,7 @@ private newtype TNode =
*/
class Node extends TNode {
/** Gets the function to which this node belongs. */
Function getFunction() {
result = this.asExpr().getEnclosingFunction()
or
result = this.asParameter().getFunction()
or
result = this.asUninitialized().getFunction()
}
Function getFunction() { none() } // overridden in subclasses
/**
* INTERNAL: Do not use. Alternative name for `getFunction`.
@@ -36,11 +34,7 @@ class Node extends TNode {
}
/** Gets the type of this node. */
Type getType() {
result = this.asExpr().getType()
or
result = asVariable(this).getType()
}
Type getType() { none() } // overridden in subclasses
/** Gets the expression corresponding to this node, if any. */
Expr asExpr() { result = this.(ExprNode).getExpr() }
@@ -48,6 +42,9 @@ class Node extends TNode {
/** Gets the parameter corresponding to this node, if any. */
Parameter asParameter() { result = this.(ParameterNode).getParameter() }
/** Gets the argument that defines this `DefinitionByReferenceNode`, if any. */
Expr asDefiningArgument() { result = this.(DefinitionByReferenceNode).getArgument() }
/**
* Gets the uninitialized local variable corresponding to this node, if
* any.
@@ -74,6 +71,8 @@ class Node extends TNode {
class ExprNode extends Node, TExprNode {
Expr expr;
ExprNode() { this = TExprNode(expr) }
override Function getFunction() { result = expr.getEnclosingFunction() }
override Type getType() { result = expr.getType() }
override string toString() { result = expr.toString() }
override Location getLocation() { result = expr.getLocation() }
/** Gets the expression corresponding to this node. */
@@ -87,6 +86,8 @@ class ExprNode extends Node, TExprNode {
class ParameterNode extends Node, TParameterNode {
Parameter param;
ParameterNode() { this = TParameterNode(param) }
override Function getFunction() { result = param.getFunction() }
override Type getType() { result = param.getType() }
override string toString() { result = param.toString() }
override Location getLocation() { result = param.getLocation() }
/** Gets the parameter corresponding to this node. */
@@ -100,6 +101,35 @@ class ParameterNode extends Node, TParameterNode {
}
}
/**
* A node that represents the value of a variable after a function call that
* may have changed the variable because it's passed by reference.
*
* A typical example would be a call `f(&x)`. Firstly, there will be flow into
* `x` from previous definitions of `x`. Secondly, there will be a
* `DefinitionByReferenceNode` to represent the value of `x` after the call has
* returned. This node will have its `getArgument()` equal to `&x`.
*/
class DefinitionByReferenceNode extends Node, TDefinitionByReferenceNode {
VariableAccess va;
Expr argument;
DefinitionByReferenceNode() { this = TDefinitionByReferenceNode(va, argument) }
override Function getFunction() { result = va.getEnclosingFunction() }
override Type getType() { result = va.getType() }
override string toString() { result = "ref arg " + argument.toString() }
override Location getLocation() { result = argument.getLocation() }
/** Gets the argument corresponding to this node. */
Expr getArgument() { result = argument }
/** Gets the parameter through which this value is assigned. */
Parameter getParameter() {
exists(FunctionCall call, int i |
argument = call.getArgument(i) and
result = call.getTarget().getParameter(i)
)
}
}
/**
* The value of an uninitialized local variable, viewed as a node in a data
* flow graph.
@@ -107,6 +137,8 @@ class ParameterNode extends Node, TParameterNode {
class UninitializedNode extends Node, TUninitializedNode {
LocalVariable v;
UninitializedNode() { this = TUninitializedNode(v) }
override Function getFunction() { result = v.getFunction() }
override Type getType() { result = v.getType() }
override string toString() { result = v.toString() }
override Location getLocation() { result = v.getLocation() }
/** Gets the uninitialized local variable corresponding to this node. */
@@ -143,6 +175,14 @@ ExprNode exprNode(Expr e) { result.getExpr() = e }
*/
ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
/**
* Gets the `Node` corresponding to a definition by reference of the variable
* that is passed as `argument` of a call.
*/
DefinitionByReferenceNode definitionByReferenceNodeFromArgument(Expr argument) {
result.getArgument() = argument
}
/**
* Gets the `Node` corresponding to the value of an uninitialized local
* variable `v`.
@@ -151,12 +191,6 @@ UninitializedNode uninitializedNode(LocalVariable v) {
result.getLocalVariable() = v
}
private Variable asVariable(Node node) {
result = node.asParameter()
or
result = node.asUninitialized()
}
/**
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
@@ -170,10 +204,17 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) {
(
exprToVarStep(nodeFrom.asExpr(), var)
or
varSourceBaseCase(var, asVariable(nodeFrom))
varSourceBaseCase(var, nodeFrom.asParameter())
or
varSourceBaseCase(var, nodeFrom.asUninitialized())
or
var.definedByReference(nodeFrom.asDefiningArgument())
) and
varToExprStep(var, nodeTo.asExpr())
)
or
// Expr -> DefinitionByReferenceNode
exprToDefinitionByReferenceStep(nodeFrom.asExpr(), nodeTo.asDefiningArgument())
}
/**
@@ -232,10 +273,31 @@ private predicate exprToExprStep_nocfg(Expr fromExpr, Expr toExpr) {
fromExpr = op.getOperand()
)
or
toExpr = any(FunctionCall moveCall |
moveCall.getTarget().getNamespace().getName() = "std" and
moveCall.getTarget().getName() = "move" and
fromExpr = moveCall.getArgument(0)
toExpr = any(Call call |
exists(DataFlowFunction f, FunctionInput inModel , FunctionOutput outModel, int iIn |
call.getTarget() = f and
f.hasDataFlow(inModel, outModel) and
outModel.isOutReturnValue() and
inModel.isInParameter(iIn) and
fromExpr = call.getArgument(iIn)
)
)
}
private predicate exprToDefinitionByReferenceStep(Expr exprIn, Expr argOut) {
exists(DataFlowFunction f, Call call, FunctionOutput outModel, int argOutIndex |
call.getTarget() = f and
argOut = call.getArgument(argOutIndex) and
outModel.isOutParameterPointer(argOutIndex) and
exists(int argInIndex, FunctionInput inModel |
f.hasDataFlow(inModel, outModel)
|
inModel.isInParameterPointer(argInIndex) and
call.passesByReference(argInIndex, exprIn)
or
inModel.isInParameter(argInIndex) and
exprIn = call.getArgument(argInIndex)
)
)
}

View File

@@ -51,6 +51,12 @@ cached class FlowVar extends TFlowVar {
*/
cached abstract predicate definedByExpr(Expr e, ControlFlowNode node);
/**
* Holds if this `FlowVar` corresponds to the data written by a call that
* passes a variable as argument `arg`.
*/
cached abstract predicate definedByReference(Expr arg);
/**
* Holds if this `FlowVar` corresponds to the initial value of `v`. The following
* is an exhaustive list of cases where this may happen.
@@ -137,6 +143,8 @@ module FlowVar_internal {
or
assignmentLikeOperation(sbb, v, _)
or
blockVarDefinedByReference(sbb, v, _)
or
blockVarDefinedByVariable(sbb, v)
)
}
@@ -174,6 +182,11 @@ module FlowVar_internal {
else node = def.getDefinition())
}
override predicate definedByReference(Expr arg) {
definitionByReference(v.getAnAccess(), arg) and
arg = def.getDefinition()
}
override predicate definedByInitialValue(LocalScopeVariable param) {
def.definedByParameter(param) and
param = v
@@ -191,6 +204,8 @@ module FlowVar_internal {
this.definedByExpr(_, _)
or
this.definedByInitialValue(_)
or
this.definedByReference(_)
}
/**
@@ -221,7 +236,17 @@ module FlowVar_internal {
BlockVar() { this = TBlockVar(sbb, v) }
override VariableAccess getAnAccess() {
variableAccessInSBB(v, getAReachedBlockVarSBB(this), result)
exists(SubBasicBlock reached |
reached = getAReachedBlockVarSBB(this)
|
variableAccessInSBB(v, reached, result)
or
// Allow flow into a `VariableAccess` that is used as definition by
// reference. This flow is blocked by `getAReachedBlockVarSBB` because
// flow should not propagate past that.
result = reached.getASuccessor().(VariableAccess) and
blockVarDefinedByReference(result, v, _)
)
}
override predicate definedByInitialValue(LocalScopeVariable lsv) {
@@ -237,6 +262,10 @@ module FlowVar_internal {
node = sbb.getANode()
}
override predicate definedByReference(Expr arg) {
blockVarDefinedByReference(sbb, v, arg)
}
override string toString() {
exists(Expr e |
this.definedByExpr(e, _) and
@@ -246,9 +275,15 @@ module FlowVar_internal {
this.definedByInitialValue(_) and
result = "initial value of "+ v
or
exists(Expr arg |
this.definedByReference(arg) and
result = "ref def: "+ arg
)
or
// impossible case
not this.definedByExpr(_, _) and
not this.definedByInitialValue(_) and
not this.definedByReference(_) and
result = "undefined "+ v
}
@@ -373,7 +408,8 @@ module FlowVar_internal {
mid = getAReachedBlockVarSBB(start) and
result = mid.getASuccessor() and
not skipLoop(mid, result, sbbDef, v) and
not assignmentLikeOperation(result, v, _)
not assignmentLikeOperation(result, v, _) and
not blockVarDefinedByReference(result, v, _)
)
}
@@ -481,6 +517,9 @@ module FlowVar_internal {
*/
predicate overwrite(VariableAccess va, ControlFlowNode node) {
va = node.(AssignExpr).getLValue()
or
va = node and
definitionByReference(node, _)
}
/**
@@ -515,6 +554,11 @@ module FlowVar_internal {
)
}
predicate blockVarDefinedByReference(ControlFlowNode node, Variable v, Expr argument) {
node = v.getAnAccess() and
definitionByReference(node, argument)
}
/**
* Holds if `v` is initialized by `init` to have value `assignedExpr`.
*/
@@ -534,8 +578,11 @@ module FlowVar_internal {
class DataFlowSubBasicBlockCutNode extends SubBasicBlockCutNode {
DataFlowSubBasicBlockCutNode() {
exists(Variable v |
not fullySupportedSsaVariable(v) and
not fullySupportedSsaVariable(v)
|
assignmentLikeOperation(this, v, _)
or
blockVarDefinedByReference(this, v, _)
// It is not necessary to cut the basic blocks at `Initializer` nodes
// because the affected variable can have no _other_ value before its
// initializer. It is not necessary to cut basic blocks at procedure

View File

@@ -12,6 +12,8 @@ class TestAllocationConfig extends DataFlow::Configuration {
or
source.asParameter().getName().matches("source%")
or
source.(DataFlow::DefinitionByReferenceNode).getParameter().getName().matches("ref_source%")
or
// Track uninitialized variables
exists(source.asUninitialized())
}

View File

@@ -5,6 +5,7 @@
| example.c:24:13:24:30 | ... = ... | example.c:24:2:24:30 | ... = ... |
| example.c:24:24:24:30 | ... + ... | example.c:24:13:24:30 | ... = ... |
| example.c:26:13:26:16 | call to getX | example.c:26:2:26:25 | ... = ... |
| example.c:26:18:26:24 | ref arg & ... | example.c:26:2:26:7 | coords |
| test.cpp:6:12:6:17 | call to source | test.cpp:7:8:7:9 | t1 |
| test.cpp:6:12:6:17 | call to source | test.cpp:8:8:8:9 | t1 |
| test.cpp:6:12:6:17 | call to source | test.cpp:9:8:9:9 | t1 |
@@ -28,3 +29,17 @@
| test.cpp:24:10:24:11 | t2 | test.cpp:23:23:23:24 | t1 |
| test.cpp:24:10:24:11 | t2 | test.cpp:24:5:24:11 | ... = ... |
| test.cpp:24:10:24:11 | t2 | test.cpp:26:8:26:9 | t1 |
| test.cpp:430:48:430:54 | source1 | test.cpp:432:17:432:23 | source1 |
| test.cpp:431:12:431:13 | 0 | test.cpp:432:11:432:13 | tmp |
| test.cpp:432:10:432:13 | & ... | test.cpp:432:3:432:8 | call to memcpy |
| test.cpp:432:10:432:13 | ref arg & ... | test.cpp:433:8:433:10 | tmp |
| test.cpp:432:17:432:23 | source1 | test.cpp:432:10:432:13 | ref arg & ... |
| test.cpp:436:53:436:59 | source1 | test.cpp:439:17:439:23 | source1 |
| test.cpp:436:66:436:66 | b | test.cpp:441:7:441:7 | b |
| test.cpp:437:12:437:13 | 0 | test.cpp:438:19:438:21 | tmp |
| test.cpp:437:12:437:13 | 0 | test.cpp:439:11:439:13 | tmp |
| test.cpp:439:10:439:13 | & ... | test.cpp:439:3:439:8 | call to memcpy |
| test.cpp:439:10:439:13 | ref arg & ... | test.cpp:439:33:439:35 | tmp |
| test.cpp:439:10:439:13 | ref arg & ... | test.cpp:440:8:440:10 | tmp |
| test.cpp:439:10:439:13 | ref arg & ... | test.cpp:442:10:442:12 | tmp |
| test.cpp:439:17:439:23 | source1 | test.cpp:439:10:439:13 | ref arg & ... |

View File

@@ -423,3 +423,71 @@ class FlowThroughFields {
sink(field); // tainted
}
};
typedef unsigned long size_t;
void *memcpy(void *dest, const void *src, size_t count);
void flowThroughMemcpy_ssa_with_local_flow(int source1) {
int tmp = 0;
memcpy(&tmp, &source1, sizeof tmp);
sink(tmp); // tainted
}
void flowThroughMemcpy_blockvar_with_local_flow(int source1, int b) {
int tmp = 0;
int *capture = &tmp;
memcpy(&tmp, &source1, sizeof tmp);
sink(tmp); // tainted
if (b) {
sink(tmp); // tainted
}
}
void cleanedByMemcpy_ssa(int clean1) {
int tmp;
memcpy(&tmp, &clean1, sizeof tmp);
sink(tmp); // clean
}
void cleanedByMemcpy_blockvar(int clean1) {
int tmp;
int *capture = &tmp;
memcpy(&tmp, &clean1, sizeof tmp);
sink(tmp); // clean
}
void intRefSource(int &ref_source);
void intPointerSource(int *ref_source);
void intArraySource(int ref_source[], size_t len);
void intRefSourceCaller() {
int local;
intRefSource(local);
sink(local); // tainted
}
void intPointerSourceCaller() {
int local;
intPointerSource(&local);
sink(local); // tainted
}
void intPointerSourceCaller2() {
int local[1];
intPointerSource(local);
sink(local); // tainted
sink(*local); // clean
}
void intArraySourceCaller() {
int local;
intArraySource(&local, 1);
sink(local); // tainted
}
void intArraySourceCaller2() {
int local[2];
intArraySource(local, 2);
sink(local); // tainted
sink(*local); // clean
}

View File

@@ -27,6 +27,14 @@
| test.cpp:366:7:366:7 | x | test.cpp:362:4:362:9 | call to source |
| test.cpp:397:10:397:18 | globalVar | test.cpp:395:17:395:22 | call to source |
| test.cpp:423:10:423:14 | field | test.cpp:421:13:421:18 | call to source |
| test.cpp:433:8:433:10 | tmp | test.cpp:430:48:430:54 | source1 |
| test.cpp:440:8:440:10 | tmp | test.cpp:436:53:436:59 | source1 |
| test.cpp:442:10:442:12 | tmp | test.cpp:436:53:436:59 | source1 |
| test.cpp:466:8:466:12 | local | test.cpp:465:16:465:20 | ref arg local |
| test.cpp:472:8:472:12 | local | test.cpp:471:20:471:25 | ref arg & ... |
| test.cpp:478:8:478:12 | local | test.cpp:477:20:477:24 | ref arg local |
| test.cpp:485:8:485:12 | local | test.cpp:484:18:484:23 | ref arg & ... |
| test.cpp:491:8:491:12 | local | test.cpp:490:18:490:22 | ref arg local |
| true_upon_entry.cpp:21:8:21:8 | x | true_upon_entry.cpp:17:11:17:16 | call to source |
| true_upon_entry.cpp:29:8:29:8 | x | true_upon_entry.cpp:27:9:27:14 | call to source |
| true_upon_entry.cpp:39:8:39:8 | x | true_upon_entry.cpp:33:11:33:16 | call to source |

View File

@@ -9,6 +9,14 @@
| test.cpp:136:27:136:32 | test.cpp:140:22:140:23 | AST only |
| test.cpp:395:17:395:22 | test.cpp:397:10:397:18 | AST only |
| test.cpp:421:13:421:18 | test.cpp:423:10:423:14 | AST only |
| test.cpp:430:48:430:54 | test.cpp:433:8:433:10 | AST only |
| test.cpp:436:53:436:59 | test.cpp:440:8:440:10 | AST only |
| test.cpp:436:53:436:59 | test.cpp:442:10:442:12 | AST only |
| test.cpp:465:16:465:20 | test.cpp:466:8:466:12 | AST only |
| test.cpp:471:20:471:25 | test.cpp:472:8:472:12 | AST only |
| test.cpp:477:20:477:24 | test.cpp:478:8:478:12 | AST only |
| test.cpp:484:18:484:23 | test.cpp:485:8:485:12 | AST only |
| test.cpp:490:18:490:22 | test.cpp:491:8:491:12 | AST only |
| true_upon_entry.cpp:9:11:9:16 | true_upon_entry.cpp:13:8:13:8 | IR only |
| true_upon_entry.cpp:62:11:62:16 | true_upon_entry.cpp:66:8:66:8 | IR only |
| true_upon_entry.cpp:98:11:98:16 | true_upon_entry.cpp:105:8:105:8 | IR only |

View File

@@ -1,3 +1,4 @@
| test.cpp:75:7:75:8 | u1 | test.cpp:76:8:76:9 | u1 |
| test.cpp:83:7:83:8 | u2 | test.cpp:84:13:84:14 | u2 |
| test.cpp:83:7:83:8 | u2 | test.cpp:85:8:85:9 | u2 |
| test.cpp:453:7:453:9 | tmp | test.cpp:454:19:454:21 | tmp |

View File

@@ -129,6 +129,11 @@
| taint.cpp:164:19:164:24 | call to source | taint.cpp:172:18:172:24 | tainted | |
| taint.cpp:165:22:165:25 | {...} | taint.cpp:170:10:170:15 | buffer | |
| taint.cpp:165:24:165:24 | 0 | taint.cpp:165:22:165:25 | {...} | TAINT |
| taint.cpp:170:10:170:15 | buffer | taint.cpp:170:3:170:8 | call to strcpy | |
| taint.cpp:170:10:170:15 | ref arg buffer | taint.cpp:171:8:171:13 | buffer | |
| taint.cpp:171:8:171:13 | ref arg buffer | taint.cpp:172:10:172:15 | buffer | |
| taint.cpp:172:10:172:15 | buffer | taint.cpp:172:3:172:8 | call to strcat | |
| taint.cpp:172:10:172:15 | ref arg buffer | taint.cpp:173:8:173:13 | buffer | |
| taint.cpp:180:19:180:19 | p | taint.cpp:181:9:181:9 | p | |
| taint.cpp:181:9:181:9 | p | taint.cpp:181:8:181:9 | * ... | TAINT |
| taint.cpp:185:11:185:16 | call to source | taint.cpp:186:11:186:11 | x | |