Merge pull request #2763 from jbj/ir-VariableNode

C++: DefaultTaintTracking perf fix for globals
This commit is contained in:
Robert Marsh
2020-02-06 18:54:14 -05:00
committed by GitHub
13 changed files with 215 additions and 74 deletions

View File

@@ -69,7 +69,7 @@ private class DefaultTaintTrackingCfg extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
override predicate isSink(DataFlow::Node sink) { any() }
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
instructionTaintStep(n1.asInstruction(), n2.asInstruction())
@@ -84,18 +84,15 @@ private class ToGlobalVarTaintTrackingCfg extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
override predicate isSink(DataFlow::Node sink) {
exists(GlobalOrNamespaceVariable gv | writesVariable(sink.asInstruction(), gv))
sink.asVariable() instanceof GlobalOrNamespaceVariable
}
override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
instructionTaintStep(n1.asInstruction(), n2.asInstruction())
or
exists(StoreInstruction i1, LoadInstruction i2, GlobalOrNamespaceVariable gv |
writesVariable(i1, gv) and
readsVariable(i2, gv) and
i1 = n1.asInstruction() and
i2 = n2.asInstruction()
)
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
or
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
}
override predicate isBarrier(DataFlow::Node node) { nodeIsBarrier(node) }
@@ -105,19 +102,20 @@ private class FromGlobalVarTaintTrackingCfg extends DataFlow2::Configuration {
FromGlobalVarTaintTrackingCfg() { this = "FromGlobalVarTaintTrackingCfg" }
override predicate isSource(DataFlow::Node source) {
exists(
ToGlobalVarTaintTrackingCfg other, DataFlow::Node prevSink, GlobalOrNamespaceVariable gv
|
other.hasFlowTo(prevSink) and
writesVariable(prevSink.asInstruction(), gv) and
readsVariable(source.asInstruction(), gv)
)
// This set of sources should be reasonably small, which is good for
// performance since the set of sinks is very large.
exists(ToGlobalVarTaintTrackingCfg otherCfg | otherCfg.hasFlowTo(source))
}
override predicate isSink(DataFlow::Node sink) { any() }
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
instructionTaintStep(n1.asInstruction(), n2.asInstruction())
or
// Additional step for flow out of variables. There is no flow _into_
// variables in this configuration, so this step only serves to take flow
// out of a variable that's a source.
readsVariable(n2.asInstruction(), n1.asVariable())
}
override predicate isBarrier(DataFlow::Node node) { nodeIsBarrier(node) }
@@ -351,23 +349,12 @@ predicate taintedIncludingGlobalVars(Expr source, Element tainted, string global
globalVar = ""
or
exists(
ToGlobalVarTaintTrackingCfg toCfg, FromGlobalVarTaintTrackingCfg fromCfg, DataFlow::Node store,
GlobalOrNamespaceVariable global, DataFlow::Node load, DataFlow::Node sink
ToGlobalVarTaintTrackingCfg toCfg, FromGlobalVarTaintTrackingCfg fromCfg,
DataFlow::VariableNode variableNode, GlobalOrNamespaceVariable global, DataFlow::Node sink
|
toCfg.hasFlow(getNodeForSource(source), store) and
store
.asInstruction()
.(StoreInstruction)
.getDestinationAddress()
.(VariableAddressInstruction)
.getASTVariable() = global and
load
.asInstruction()
.(LoadInstruction)
.getSourceAddress()
.(VariableAddressInstruction)
.getASTVariable() = global and
fromCfg.hasFlow(load, sink) and
global = variableNode.getVariable() and
toCfg.hasFlow(getNodeForSource(source), variableNode) and
fromCfg.hasFlow(variableNode, sink) and
tainted = adjustedSink(sink) and
global = globalVarFromId(globalVar)
)

View File

@@ -7,17 +7,17 @@ private import DataFlowDispatch
* A data flow node that occurs as the argument of a call and is passed as-is
* to the callable. Instance arguments (`this` pointer) are also included.
*/
class ArgumentNode extends Node {
ArgumentNode() { exists(CallInstruction call | this.asInstruction() = call.getAnArgument()) }
class ArgumentNode extends InstructionNode {
ArgumentNode() { exists(CallInstruction call | this.getInstruction() = call.getAnArgument()) }
/**
* Holds if this argument occurs at the given position in the given call.
* The instance argument is considered to have index `-1`.
*/
predicate argumentOf(DataFlowCall call, int pos) {
this.asInstruction() = call.getPositionalArgument(pos)
this.getInstruction() = call.getPositionalArgument(pos)
or
this.asInstruction() = call.getThisArgument() and pos = -1
this.getInstruction() = call.getThisArgument() and pos = -1
}
/** Gets the call in which this node is an argument. */
@@ -36,15 +36,15 @@ class ReturnKind extends TReturnKind {
}
/** A data flow node that occurs as the result of a `ReturnStmt`. */
class ReturnNode extends Node {
ReturnNode() { exists(ReturnValueInstruction ret | this.asInstruction() = ret.getReturnValue()) }
class ReturnNode extends InstructionNode {
ReturnNode() { exists(ReturnValueInstruction ret | this.getInstruction() = ret.getReturnValue()) }
/** Gets the kind of this returned value. */
ReturnKind getKind() { result = TNormalReturnKind() }
}
/** A data flow node that represents the output of a call. */
class OutNode extends Node {
class OutNode extends InstructionNode {
override CallInstruction instr;
/** Gets the underlying call. */
@@ -181,11 +181,17 @@ private predicate suppressUnusedType(Type t) { any() }
// Java QL library compatibility wrappers
//////////////////////////////////////////////////////////////////////////////
/** A node that performs a type cast. */
class CastNode extends Node {
class CastNode extends InstructionNode {
CastNode() { none() } // stub implementation
}
class DataFlowCallable = Function;
/**
* A function that may contain code or a variable that may contain itself. When
* flow crosses from one _enclosing callable_ to another, the interprocedural
* data-flow library discards call contexts and inserts a node in the big-step
* relation used for human-readable path explanations.
*/
class DataFlowCallable = Declaration;
class DataFlowExpr = Expr;

View File

@@ -8,12 +8,9 @@ private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.ir.ValueNumbering
private import semmle.code.cpp.models.interfaces.DataFlow
/**
* A newtype wrapper to prevent accidental casts between `Node` and
* `Instruction`. This ensures we can add `Node`s that are not `Instruction`s
* in the future.
*/
private newtype TIRDataFlowNode = MkIRDataFlowNode(Instruction i)
private newtype TIRDataFlowNode =
TInstructionNode(Instruction i) or
TVariableNode(Variable var)
/**
* A node in a data flow graph.
@@ -23,21 +20,19 @@ private newtype TIRDataFlowNode = MkIRDataFlowNode(Instruction i)
* `DataFlow::parameterNode`, and `DataFlow::uninitializedNode` respectively.
*/
class Node extends TIRDataFlowNode {
Instruction instr;
Node() { this = MkIRDataFlowNode(instr) }
/**
* INTERNAL: Do not use. Alternative name for `getFunction`.
* INTERNAL: Do not use.
*/
Function getEnclosingCallable() { result = this.getFunction() }
Declaration getEnclosingCallable() { none() } // overridden in subclasses
Function getFunction() { result = instr.getEnclosingFunction() }
/** Gets the function to which this node belongs, if any. */
Function getFunction() { none() } // overridden in subclasses
/** Gets the type of this node. */
Type getType() { result = instr.getResultType() }
Type getType() { none() } // overridden in subclasses
Instruction asInstruction() { this = MkIRDataFlowNode(result) }
/** Gets the instruction corresponding to this node, if any. */
Instruction asInstruction() { result = this.(InstructionNode).getInstruction() }
/**
* Gets the non-conversion expression corresponding to this node, if any. If
@@ -45,22 +40,25 @@ class Node extends TIRDataFlowNode {
* `Conversion`, then the result is that `Conversion`'s non-`Conversion` base
* expression.
*/
Expr asExpr() {
result.getConversion*() = instr.getConvertedResultExpression() and
not result instanceof Conversion
}
Expr asExpr() { result = this.(ExprNode).getExpr() }
/**
* Gets the expression corresponding to this node, if any. The returned
* expression may be a `Conversion`.
*/
Expr asConvertedExpr() { result = instr.getConvertedResultExpression() }
Expr asConvertedExpr() { result = this.(ExprNode).getConvertedExpr() }
/** Gets the argument that defines this `DefinitionByReferenceNode`, if any. */
Expr asDefiningArgument() { result = this.(DefinitionByReferenceNode).getArgument() }
/** Gets the parameter corresponding to this node, if any. */
Parameter asParameter() { result = instr.(InitializeParameterInstruction).getParameter() }
Parameter asParameter() { result = this.(ParameterNode).getParameter() }
/**
* Gets the variable corresponding to this node, if any. This can be used for
* modelling flow in and out of global variables.
*/
Variable asVariable() { result = this.(VariableNode).getVariable() }
/**
* DEPRECATED: See UninitializedNode.
@@ -76,7 +74,7 @@ class Node extends TIRDataFlowNode {
Type getTypeBound() { result = getType() }
/** Gets the location of this element. */
Location getLocation() { result = instr.getLocation() }
Location getLocation() { none() } // overridden by subclasses
/**
* Holds if this element is at the specified location.
@@ -91,18 +89,38 @@ class Node extends TIRDataFlowNode {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
string toString() {
/** Gets a textual representation of this element. */
string toString() { none() } // overridden by subclasses
}
class InstructionNode extends Node, TInstructionNode {
Instruction instr;
InstructionNode() { this = TInstructionNode(instr) }
/** Gets the instruction corresponding to this node. */
Instruction getInstruction() { result = instr }
override Declaration getEnclosingCallable() { result = this.getFunction() }
override Function getFunction() { result = instr.getEnclosingFunction() }
override Type getType() { result = instr.getResultType() }
override Location getLocation() { result = instr.getLocation() }
override string toString() {
// This predicate is overridden in subclasses. This default implementation
// does not use `Instruction.toString` because that's expensive to compute.
result = this.asInstruction().getOpcode().toString()
result = this.getInstruction().getOpcode().toString()
}
}
/**
* An expression, viewed as a node in a data flow graph.
*/
class ExprNode extends Node {
ExprNode() { exists(this.asExpr()) }
class ExprNode extends InstructionNode {
ExprNode() { exists(instr.getConvertedResultExpression()) }
/**
* Gets the non-conversion expression corresponding to this node, if any. If
@@ -110,13 +128,16 @@ class ExprNode extends Node {
* `Conversion`, then the result is that `Conversion`'s non-`Conversion` base
* expression.
*/
Expr getExpr() { result = this.asExpr() }
Expr getExpr() {
result.getConversion*() = instr.getConvertedResultExpression() and
not result instanceof Conversion
}
/**
* Gets the expression corresponding to this node, if any. The returned
* expression may be a `Conversion`.
*/
Expr getConvertedExpr() { result = this.asConvertedExpr() }
Expr getConvertedExpr() { result = instr.getConvertedResultExpression() }
override string toString() { result = this.asConvertedExpr().toString() }
}
@@ -125,7 +146,7 @@ class ExprNode extends Node {
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
class ParameterNode extends Node {
class ParameterNode extends InstructionNode {
override InitializeParameterInstruction instr;
/**
@@ -139,7 +160,7 @@ class ParameterNode extends Node {
override string toString() { result = instr.getParameter().toString() }
}
private class ThisParameterNode extends Node {
private class ThisParameterNode extends InstructionNode {
override InitializeThisInstruction instr;
override string toString() { result = "this" }
@@ -176,7 +197,7 @@ deprecated class UninitializedNode extends Node {
* This class exists to match the interface used by Java. There are currently no non-abstract
* classes that extend it. When we implement field flow, we can revisit this.
*/
abstract class PostUpdateNode extends Node {
abstract class PostUpdateNode extends InstructionNode {
/**
* Gets the node before the state update.
*/
@@ -193,7 +214,7 @@ abstract class PostUpdateNode extends Node {
* returned. This node will have its `getArgument()` equal to `&x` and its
* `getVariableAccess()` equal to `x`.
*/
class DefinitionByReferenceNode extends Node {
class DefinitionByReferenceNode extends InstructionNode {
override WriteSideEffectInstruction instr;
/** Gets the argument corresponding to this node. */
@@ -220,10 +241,41 @@ class DefinitionByReferenceNode extends Node {
}
}
/**
* A `Node` corresponding to a variable in the program, as opposed to the
* value of that variable at some particular point. This can be used for
* modelling flow in and out of global variables.
*/
class VariableNode extends Node, TVariableNode {
Variable v;
VariableNode() { this = TVariableNode(v) }
/** Gets the variable corresponding to this node. */
Variable getVariable() { result = v }
override Function getFunction() { none() }
override Declaration getEnclosingCallable() {
// When flow crosses from one _enclosing callable_ to another, the
// interprocedural data-flow library discards call contexts and inserts a
// node in the big-step relation used for human-readable path explanations.
// Therefore we want a distinct enclosing callable for each `VariableNode`,
// and that can be the `Variable` itself.
result = v
}
override Type getType() { result = v.getType() }
override Location getLocation() { result = v.getLocation() }
override string toString() { result = v.toString() }
}
/**
* Gets the node corresponding to `instr`.
*/
Node instructionNode(Instruction instr) { result.asInstruction() = instr }
InstructionNode instructionNode(Instruction instr) { result.getInstruction() = instr }
DefinitionByReferenceNode definitionByReferenceNode(Expr e) { result.getArgument() = e }
@@ -244,6 +296,9 @@ ExprNode convertedExprNode(Expr e) { result.getExpr() = e }
*/
ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
/** Gets the `VariableNode` corresponding to the variable `v`. */
VariableNode variableNode(Variable v) { result.getVariable() = v }
/**
* Gets the `Node` corresponding to the value of an uninitialized local
* variable `v`.

View File

@@ -0,0 +1,4 @@
| globals.cpp:13:15:13:20 | call to getenv | globals.cpp:2:17:2:25 | sinkParam | global1 |
| globals.cpp:13:15:13:20 | call to getenv | globals.cpp:12:10:12:16 | global1 | global1 |
| globals.cpp:23:15:23:20 | call to getenv | globals.cpp:2:17:2:25 | sinkParam | global2 |
| globals.cpp:23:15:23:20 | call to getenv | globals.cpp:19:10:19:16 | global2 | global2 |

View File

@@ -0,0 +1,7 @@
import semmle.code.cpp.ir.dataflow.DefaultTaintTracking
from Expr source, Element tainted, string globalVar
where
taintedIncludingGlobalVars(source, tainted, globalVar) and
globalVar != ""
select source, tainted, globalVar

View File

@@ -0,0 +1,24 @@
char * getenv(const char *);
void sink(char *sinkParam);
void throughLocal() {
char * local = getenv("VAR");
sink(local); // flow
}
char * global1 = 0;
void readWriteGlobal1() {
sink(global1); // flow
global1 = getenv("VAR");
}
static char * global2 = 0;
void readGlobal2() {
sink(global2); // flow
}
void writeGlobal2() {
global2 = getenv("VAR");
}

View File

@@ -101,6 +101,14 @@
| defaulttainttracking.cpp:88:18:88:23 | call to getenv | defaulttainttracking.cpp:88:18:88:23 | call to getenv |
| defaulttainttracking.cpp:88:18:88:23 | call to getenv | defaulttainttracking.cpp:88:18:88:30 | (reference to) |
| defaulttainttracking.cpp:88:18:88:23 | call to getenv | test_diff.cpp:1:11:1:20 | p#0 |
| globals.cpp:5:20:5:25 | call to getenv | globals.cpp:2:17:2:25 | sinkParam |
| globals.cpp:5:20:5:25 | call to getenv | globals.cpp:5:12:5:16 | local |
| globals.cpp:5:20:5:25 | call to getenv | globals.cpp:5:20:5:25 | call to getenv |
| globals.cpp:5:20:5:25 | call to getenv | globals.cpp:6:10:6:14 | local |
| globals.cpp:13:15:13:20 | call to getenv | globals.cpp:9:8:9:14 | global1 |
| globals.cpp:13:15:13:20 | call to getenv | globals.cpp:13:15:13:20 | call to getenv |
| globals.cpp:23:15:23:20 | call to getenv | globals.cpp:16:15:16:21 | global2 |
| globals.cpp:23:15:23:20 | call to getenv | globals.cpp:23:15:23:20 | call to getenv |
| test_diff.cpp:92:10:92:13 | argv | defaulttainttracking.cpp:9:11:9:20 | p#0 |
| test_diff.cpp:92:10:92:13 | argv | test_diff.cpp:1:11:1:20 | p#0 |
| test_diff.cpp:92:10:92:13 | argv | test_diff.cpp:92:10:92:13 | argv |

View File

@@ -15,6 +15,8 @@
| defaulttainttracking.cpp:88:18:88:23 | call to getenv | defaulttainttracking.cpp:88:8:88:32 | (reference dereference) | IR only |
| defaulttainttracking.cpp:88:18:88:23 | call to getenv | defaulttainttracking.cpp:88:18:88:30 | (reference to) | IR only |
| defaulttainttracking.cpp:88:18:88:23 | call to getenv | test_diff.cpp:1:11:1:20 | p#0 | IR only |
| globals.cpp:13:15:13:20 | call to getenv | globals.cpp:13:5:13:11 | global1 | AST only |
| globals.cpp:23:15:23:20 | call to getenv | globals.cpp:23:5:23:11 | global2 | AST only |
| test_diff.cpp:104:12:104:15 | argv | test_diff.cpp:104:11:104:20 | (...) | IR only |
| test_diff.cpp:108:10:108:13 | argv | test_diff.cpp:36:24:36:24 | p | AST only |
| test_diff.cpp:111:10:111:13 | argv | defaulttainttracking.cpp:9:11:9:20 | p#0 | AST only |

View File

@@ -36,9 +36,27 @@ class TestAllocationConfig extends DataFlow::Configuration {
)
}
override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
exists(GlobalOrNamespaceVariable var | var.getName().matches("flowTestGlobal%") |
writesVariable(n1.asInstruction(), var) and
var = n2.asVariable()
or
readsVariable(n2.asInstruction(), var) and
var = n1.asVariable()
)
}
override predicate isBarrier(DataFlow::Node barrier) {
barrier.asExpr().(VariableAccess).getTarget().hasName("barrier")
}
override predicate isBarrierGuard(DataFlow::BarrierGuard bg) { bg instanceof TestBarrierGuard }
}
private predicate readsVariable(LoadInstruction load, Variable var) {
load.getSourceAddress().(VariableAddressInstruction).getASTVariable() = var
}
private predicate writesVariable(StoreInstruction store, Variable var) {
store.getDestinationAddress().(VariableAddressInstruction).getASTVariable() = var
}

View File

@@ -0,0 +1,24 @@
int source();
void sink(int);
void throughLocal() {
int local = source();
sink(local); // flow
}
int flowTestGlobal1 = 0;
void readWriteGlobal1() {
sink(flowTestGlobal1); // flow
flowTestGlobal1 = source();
}
static int flowTestGlobal2 = 0;
void readGlobal2() {
sink(flowTestGlobal2); // flow
}
void writeGlobal2() {
flowTestGlobal2 = source();
}

View File

@@ -22,6 +22,7 @@
| dispatch.cpp:36:16:36:25 | call to notSource2 | dispatch.cpp:10:37:10:42 | call to source |
| dispatch.cpp:43:15:43:24 | call to notSource1 | dispatch.cpp:9:37:9:42 | call to source |
| dispatch.cpp:44:15:44:24 | call to notSource2 | dispatch.cpp:10:37:10:42 | call to source |
| globals.cpp:6:10:6:14 | local | globals.cpp:5:17:5:22 | call to source |
| lambdas.cpp:14:3:14:6 | t | lambdas.cpp:8:10:8:15 | call to source |
| lambdas.cpp:18:8:18:8 | call to operator() | lambdas.cpp:8:10:8:15 | call to source |
| lambdas.cpp:21:3:21:6 | t | lambdas.cpp:8:10:8:15 | call to source |

View File

@@ -17,6 +17,8 @@
| dispatch.cpp:107:17:107:22 | dispatch.cpp:96:8:96:8 | IR only |
| dispatch.cpp:140:8:140:13 | dispatch.cpp:96:8:96:8 | IR only |
| dispatch.cpp:144:8:144:13 | dispatch.cpp:96:8:96:8 | IR only |
| globals.cpp:13:23:13:28 | globals.cpp:12:10:12:24 | IR only |
| globals.cpp:23:23:23:28 | globals.cpp:19:10:19:24 | IR only |
| lambdas.cpp:8:10:8:15 | lambdas.cpp:14:3:14:6 | AST only |
| lambdas.cpp:8:10:8:15 | lambdas.cpp:18:8:18:8 | AST only |
| lambdas.cpp:8:10:8:15 | lambdas.cpp:21:3:21:6 | AST only |

View File

@@ -35,6 +35,9 @@
| dispatch.cpp:96:8:96:8 | x | dispatch.cpp:107:17:107:22 | call to source |
| dispatch.cpp:96:8:96:8 | x | dispatch.cpp:140:8:140:13 | call to source |
| dispatch.cpp:96:8:96:8 | x | dispatch.cpp:144:8:144:13 | call to source |
| globals.cpp:6:10:6:14 | local | globals.cpp:5:17:5:22 | call to source |
| globals.cpp:12:10:12:24 | flowTestGlobal1 | globals.cpp:13:23:13:28 | call to source |
| globals.cpp:19:10:19:24 | flowTestGlobal2 | globals.cpp:23:23:23:28 | call to source |
| lambdas.cpp:35:8:35:8 | a | lambdas.cpp:8:10:8:15 | call to source |
| test.cpp:7:8:7:9 | t1 | test.cpp:6:12:6:17 | call to source |
| test.cpp:9:8:9:9 | t1 | test.cpp:6:12:6:17 | call to source |