diff --git a/change-notes/2021-02-02-constant-comparison-sanitizer-guard.md b/change-notes/2021-02-02-constant-comparison-sanitizer-guard.md new file mode 100644 index 00000000000..0442a7f9ccd --- /dev/null +++ b/change-notes/2021-02-02-constant-comparison-sanitizer-guard.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* A function which compares a value with a list of constants now acts as a sanitizer guard. This should lead to fewer false positive results. diff --git a/ql/src/semmle/go/controlflow/ControlFlowGraph.qll b/ql/src/semmle/go/controlflow/ControlFlowGraph.qll index 63225ca78b0..71bd7f1c74f 100644 --- a/ql/src/semmle/go/controlflow/ControlFlowGraph.qll +++ b/ql/src/semmle/go/controlflow/ControlFlowGraph.qll @@ -274,6 +274,17 @@ module ControlFlow { * cannot return normally, but never fails to hold of a function that can return normally. */ predicate mayReturnNormally(FuncDecl f) { CFG::mayReturnNormally(f.getBody()) } + + /** + * Holds if `pred` is the node for the case `testExpr` in an expression + * switch statement which is switching on `switchExpr`, and `succ` is the + * node to be executed next if the case test succeeds. + */ + predicate isSwitchCaseTestPassingEdge( + ControlFlow::Node pred, ControlFlow::Node succ, Expr switchExpr, Expr testExpr + ) { + CFG::isSwitchCaseTestPassingEdge(pred, succ, switchExpr, testExpr) + } } class Write = ControlFlow::WriteNode; diff --git a/ql/src/semmle/go/controlflow/ControlFlowGraphImpl.qll b/ql/src/semmle/go/controlflow/ControlFlowGraphImpl.qll index 51c976dc911..bf1d1aef15e 100644 --- a/ql/src/semmle/go/controlflow/ControlFlowGraphImpl.qll +++ b/ql/src/semmle/go/controlflow/ControlFlowGraphImpl.qll @@ -1058,11 +1058,16 @@ module CFG { pred = getExprEnd(i, false) and succ = getExprStart(i + 1) or - pred = getExprEnd(i, true) and - succ = getBodyStart() + isPassingEdge(i, pred, succ, _) ) } + predicate isPassingEdge(int i, ControlFlow::Node pred, ControlFlow::Node succ, Expr testExpr) { + pred = getExprEnd(i, true) and + succ = getBodyStart() and + testExpr = getExpr(i) + } + override ControlFlowTree getChildTree(int i) { result = getStmt(i) } } @@ -1971,6 +1976,20 @@ module CFG { exists(ControlFlow::Node last, Completion cmpl | lastNode(root, last, cmpl) and cmpl != Panic()) } + /** + * Holds if `pred` is the node for the case `testExpr` in an expression + * switch statement which is switching on `switchExpr`, and `succ` is the + * node to be executed next if the case test succeeds. + */ + cached + predicate isSwitchCaseTestPassingEdge( + ControlFlow::Node pred, ControlFlow::Node succ, Expr switchExpr, Expr testExpr + ) { + exists(ExpressionSwitchStmt ess | ess.getExpr() = switchExpr | + ess.getACase().(CaseClauseTree).isPassingEdge(_, pred, succ, testExpr) + ) + } + /** Gets a successor of `nd`, that is, a node that is executed after `nd`. */ cached ControlFlow::Node succ(ControlFlow::Node nd) { any(ControlFlowTree tree).succ(nd, result) } diff --git a/ql/src/semmle/go/controlflow/IR.qll b/ql/src/semmle/go/controlflow/IR.qll index 1b1e7df9425..95270055d18 100644 --- a/ql/src/semmle/go/controlflow/IR.qll +++ b/ql/src/semmle/go/controlflow/IR.qll @@ -905,6 +905,9 @@ module IR { ReturnInstruction() { this = MkReturnNode(ret) } + /** Gets the corresponding `ReturnStmt`. */ + ReturnStmt getReturnStmt() { result = ret } + /** Holds if this statement returns multiple results. */ predicate returnsMultipleResults() { exists(MkExtractNode(ret, _)) or ret.getNumExpr() > 1 } diff --git a/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll b/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll index f99365d1706..9efbbd4e1e3 100644 --- a/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -331,6 +331,20 @@ class FuncLitNode extends FunctionNode::Range, ExprNode { override ResultNode getAResult() { result.getRoot() = getExpr() } } +/** + * Gets a possible target of call `cn`.class + * + * This is written explicitly like this instead of using `getCalleeNode().getAPredecessor*()` + * or `result.getASuccessor*() = cn.getCalleeNode()` because the explicit form inhibits the + * optimizer from combining this with other uses of `getASuccessor*()`, which can lead to + * recursion through a magic side-condition if those other users call `getACallee()` and thus + * pointless recomputation of `getACallee()` each recursive iteration. + */ +private DataFlow::Node getACalleeSource(DataFlow::CallNode cn) { + result = cn.getCalleeNode() or + result.getASuccessor() = getACalleeSource(cn) +} + /** A data flow node that represents a call. */ class CallNode extends ExprNode { override CallExpr expr; @@ -338,7 +352,7 @@ class CallNode extends ExprNode { /** Gets the declared target of this call */ Function getTarget() { result = expr.getTarget() } - private DataFlow::Node getACalleeSource() { result.getASuccessor*() = getCalleeNode() } + private DataFlow::Node getACalleeSource() { result = getACalleeSource(this) } /** * Gets the definition of a possible target of this call. @@ -1184,10 +1198,10 @@ abstract class BarrierGuard extends Node { fd.getFunction() = f and localFlow(inp.getExitNode(fd), arg) and ret = outp.getEntryNode(fd) and - // Case: a function like "if someBarrierGuard(arg) { return true } else { return false }" ( + // Case: a function like "if someBarrierGuard(arg) { return true } else { return false }" exists(ControlFlow::ConditionGuardNode guard | - guards(guard, arg) and + this.guards(guard, arg) and guard.dominates(ret.getBasicBlock()) | exists(boolean b | @@ -1221,7 +1235,7 @@ abstract class BarrierGuard extends Node { DataFlow::Property outpProp | not exists(DataFlow::Node otherRet | otherRet = outp.getEntryNode(fd) | otherRet != ret) and - guardingFunction(f2, inp2, outp2, outpProp) and + this.guardingFunction(f2, inp2, outp2, outpProp) and c = f2.getACall() and arg = inp2.getNode(c) and ( @@ -1242,7 +1256,7 @@ abstract class BarrierGuard extends Node { * Holds if `ret` is a data-flow node whose value contributes to the output `res` of `fd`, * and that node may have Boolean value `b`. */ -private predicate possiblyReturnsBool(FuncDecl fd, FunctionOutput res, Node ret, Boolean b) { +predicate possiblyReturnsBool(FuncDecl fd, FunctionOutput res, Node ret, Boolean b) { ret = res.getEntryNode(fd) and ret.getType().getUnderlyingType() instanceof BoolType and not ret.getBoolValue() != b @@ -1264,7 +1278,7 @@ private predicate onlyPossibleReturnOfBool(FuncDecl fd, FunctionOutput res, Node * Holds if `ret` is a data-flow node whose value contributes to the output `res` of `fd`, * and that node may evaluate to a value other than `nil`. */ -private predicate possiblyReturnsNonNil(FuncDecl fd, FunctionOutput res, Node ret) { +predicate possiblyReturnsNonNil(FuncDecl fd, FunctionOutput res, Node ret) { ret = res.getEntryNode(fd) and not ret.asExpr() = Builtin::nil().getAReference() } diff --git a/ql/src/semmle/go/dataflow/internal/TaintTrackingUtil.qll b/ql/src/semmle/go/dataflow/internal/TaintTrackingUtil.qll index 4627bbf3159..d060ee25bea 100644 --- a/ql/src/semmle/go/dataflow/internal/TaintTrackingUtil.qll +++ b/ql/src/semmle/go/dataflow/internal/TaintTrackingUtil.qll @@ -223,3 +223,162 @@ class EqualityTestGuard extends DefaultTaintSanitizerGuard, DataFlow::EqualityTe outcome = this.getPolarity() } } + +/** + * Holds if data flows from `node` to `switchExprNode`, which is the expression + * of a switch statement. + */ +private predicate flowsToSwitchExpression(DataFlow::Node node, DataFlow::Node switchExprNode) { + switchExprNode.asExpr() = any(ExpressionSwitchStmt ess).getExpr() and + DataFlow::localFlow(node, switchExprNode) +} + +/** + * Holds if `inputNode` is the exit node of a parameter to `fd` and data flows + * from `inputNode` to the expression of a switch statement. + */ +private predicate isPossibleInputNode(DataFlow::Node inputNode, FuncDef fd) { + inputNode = any(FunctionInput inp | inp.isParameter(_)).getExitNode(fd) and + flowsToSwitchExpression(inputNode, _) +} + +/** + * Gets a predecessor of `succ` without following edges corresponding to + * passing a constant case test in a switch statement which is switching on + * an expression which data flows to from `inputNode`. + */ +private ControlFlow::Node getANonTestPassingPredecessor( + ControlFlow::Node succ, DataFlow::Node inputNode +) { + isPossibleInputNode(inputNode, succ.getRoot().(FuncDef)) and + result = succ.getAPredecessor() and + not exists(Expr testExpr, DataFlow::Node switchExprNode | + flowsToSwitchExpression(inputNode, switchExprNode) and + ControlFlow::isSwitchCaseTestPassingEdge(result, succ, switchExprNode.asExpr(), testExpr) and + testExpr.isConst() + ) +} + +private ControlFlow::Node getANonTestPassingReachingNodeRecursive( + ControlFlow::Node n, DataFlow::Node inputNode +) { + isPossibleInputNode(inputNode, n.getRoot().(FuncDef)) and + ( + result = n or + result = + getANonTestPassingReachingNodeRecursive(getANonTestPassingPredecessor(n, inputNode), inputNode) + ) +} + +/** + * Gets a node by following predecessors from `ret` without following edges + * corresponding to passing a constant case test in a switch statement which is + * switching on an expression which data flows to from `inputNode`. + */ +private ControlFlow::Node getANonTestPassingReachingNodeBase( + IR::ReturnInstruction ret, DataFlow::Node inputNode +) { + result = getANonTestPassingReachingNodeRecursive(ret, inputNode) +} + +/** + * Holds if every way to get from the entry node of the function to `ret` + * involves passing a constant test case in a switch statement which is + * switching on an expression which data flows to from `inputNode`. + */ +private predicate mustPassConstantCaseTestToReach( + IR::ReturnInstruction ret, DataFlow::Node inputNode +) { + isPossibleInputNode(inputNode, ret.getRoot().(FuncDef)) and + not exists(ControlFlow::Node entry | entry = ret.getRoot().getEntryNode() | + entry = getANonTestPassingReachingNodeBase(ret, inputNode) + ) +} + +/** + * Holds if whenever `outp` of function `f` satisfies `p`, the input `inp` of + * `f` matched a constant in a case clause of a switch statement. + * + * We check this by looking for guards on `inp` that collectively dominate all + * the `return` statements in `f` that can return `true`. This means that if + * `f` returns `true`, one of the guards must have been satisfied. (Similar + * reasoning is applied for statements returning `false`, `nil` or a non-`nil` + * value.) + */ +predicate functionEnsuresInputIsConstant( + Function f, FunctionInput inp, FunctionOutput outp, DataFlow::Property p +) { + exists(FuncDecl fd | fd.getFunction() = f | + exists(boolean b | + p.isBoolean(b) and + forex(DataFlow::Node ret, IR::ReturnInstruction ri | + ret = outp.getEntryNode(fd) and + ri.getReturnStmt().getAnExpr() = ret.asExpr() and + DataFlow::possiblyReturnsBool(fd, outp, ret, b) + | + mustPassConstantCaseTestToReach(ri, inp.getExitNode(fd)) + ) + ) + or + p.isNonNil() and + forex(DataFlow::Node ret, IR::ReturnInstruction ri | + ret = outp.getEntryNode(fd) and + ri.getReturnStmt().getAnExpr() = ret.asExpr() and + DataFlow::possiblyReturnsNonNil(fd, outp, ret) + | + mustPassConstantCaseTestToReach(ri, inp.getExitNode(fd)) + ) + or + p.isNil() and + forex(DataFlow::Node ret, IR::ReturnInstruction ri | + ret = outp.getEntryNode(fd) and + ri.getReturnStmt().getAnExpr() = ret.asExpr() and + ret.asExpr() = Builtin::nil().getAReference() + | + exists(DataFlow::Node exprNode | + DataFlow::localFlow(inp.getExitNode(fd), exprNode) and + mustPassConstantCaseTestToReach(ri, inp.getExitNode(fd)) + ) + ) + ) +} + +/** + * Holds if whenever `outputNode` satisfies `p`, `inputNode` matched a constant + * in a case clause of a switch statement. + */ +pragma[noinline] +predicate inputIsConstantIfOutputHasProperty( + DataFlow::Node inputNode, DataFlow::Node outputNode, DataFlow::Property p +) { + exists(Function f, FunctionInput inp, FunctionOutput outp, DataFlow::CallNode call | + functionEnsuresInputIsConstant(f, inp, outp, p) and + call = f.getACall() and + inputNode = inp.getNode(call) and + DataFlow::localFlow(outp.getNode(call), outputNode) + ) +} + +/** + * A comparison against a list of constants, acting as a sanitizer guard for + * `guardedExpr` by restricting it to a known value. + * + * Currently this only looks for functions containing a switch statement, but + * it could equally look for a check for membership of a constant map or + * constant array, which does not need to be in its own function. + */ +class ListOfConstantsComparisonSanitizerGuard extends TaintTracking::DefaultTaintSanitizerGuard { + DataFlow::Node guardedExpr; + boolean outcome; + + ListOfConstantsComparisonSanitizerGuard() { + exists(DataFlow::Node outputNode, DataFlow::Property p | + inputIsConstantIfOutputHasProperty(guardedExpr, outputNode, p) and + p.checkOn(this, outcome, outputNode) + ) + } + + override predicate checks(Expr e, boolean branch) { + e = guardedExpr.asExpr() and branch = outcome + } +} diff --git a/ql/test/library-tests/semmle/go/dataflow/ListOfConstantsSanitizerGuards/test.expected b/ql/test/library-tests/semmle/go/dataflow/ListOfConstantsSanitizerGuards/test.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ql/test/library-tests/semmle/go/dataflow/ListOfConstantsSanitizerGuards/test.go b/ql/test/library-tests/semmle/go/dataflow/ListOfConstantsSanitizerGuards/test.go new file mode 100644 index 00000000000..ceab5488627 --- /dev/null +++ b/ql/test/library-tests/semmle/go/dataflow/ListOfConstantsSanitizerGuards/test.go @@ -0,0 +1,200 @@ +package main + +import ( + "crypto/rand" + "fmt" +) + +const constantGlobalVariable string = "constant global variable" + +// Utilities: a source, a sink and an error struct: + +func source() string { + return "tainted" +} + +func sink(s string) {} + +func getRandomString() string { + buff := make([]byte, 10) + rand.Read(buff) + return fmt.Sprintf("%x", buff) +} + +func getConstantString() string { + return "constant return value" +} + +type errorString struct { + s string +} + +func (e *errorString) Error() string { + return e.s +} + +// Candidate functions which compare the input against a list of constants: + +func switchStatementReturningTrueOnlyWhenConstant(s string) bool { + switch s { + case constantGlobalVariable, "string literal": + return true + case getRandomString(): + return false + case "another string literal": + return false + default: + return false + } +} + +func switchStatementReturningFalseOnlyWhenConstant(r string, s string) bool { + switch s { + case "string literal": + return false + case constantGlobalVariable: + return false + case "another string literal": + return true + } + return true +} + +func switchStatementReturningNonNilOnlyWhenConstant(s string) (string, error) { + switch s { + case constantGlobalVariable, "string literal": + return "error", &errorString{"a"} + case getRandomString(): + return "no error", nil + case "another string literal": + fallthrough + default: + return "no error", nil + } +} + +func switchStatementReturningNilOnlyWhenConstant(s string) *string { + t := s + switch t { + case "string literal": + fallthrough + case constantGlobalVariable: + return nil + case "another string literal": + str := "matches random string" + return &str + } + str := "no matches" + return &str +} + +func multipleSwitchStatementReturningTrueOnlyWhenConstant(s string, t string) bool { + switch s { + case constantGlobalVariable, "string literal": + return true + case getRandomString(): + return false + } + switch s { + case "another string literal": + return true + } + switch t { + case "another string literal": + return false + default: + return false + } +} + +func switchStatementWithoutUsefulInfo(s string) bool { + switch s { + case constantGlobalVariable, "string literal": + return false + case getRandomString(): + return true + default: + return false + } +} + +func switchStatementOverRandomString(s string) bool { + switch getRandomString() { + case "string literal": + return true + default: + return false + } +} + +// Tests + +func main() { + + // Switch statements in functions + + { + s := source() + if switchStatementReturningTrueOnlyWhenConstant(s) { + sink(s) + } else { + sink(s) // $dataflow=s + } + } + + { + s := source() + if switchStatementReturningFalseOnlyWhenConstant("", s) { + sink(s) // $dataflow=s + } else { + sink(s) + } + } + + { + s := source() + _, err := switchStatementReturningNonNilOnlyWhenConstant(s) + if err != nil { + sink(s) + } else { + sink(s) // $dataflow=s + } + } + + { + s := source() + if switchStatementReturningNilOnlyWhenConstant(s) == nil { + sink(s) + } else { + sink(s) // $dataflow=s + } + } + + { + s := source() + if multipleSwitchStatementReturningTrueOnlyWhenConstant(s, getRandomString()) { + sink(s) + } else { + sink(s) // $dataflow=s + } + } + + { + s := source() + if switchStatementWithoutUsefulInfo(s) { + sink(s) // $dataflow=s + } else { + sink(s) // $dataflow=s + } + } + + { + s := source() + if switchStatementOverRandomString(s) { + sink(s) // $dataflow=s + } else { + sink(s) // $dataflow=s + } + } + +} diff --git a/ql/test/library-tests/semmle/go/dataflow/ListOfConstantsSanitizerGuards/test.ql b/ql/test/library-tests/semmle/go/dataflow/ListOfConstantsSanitizerGuards/test.ql new file mode 100644 index 00000000000..2b6bddff7f9 --- /dev/null +++ b/ql/test/library-tests/semmle/go/dataflow/ListOfConstantsSanitizerGuards/test.ql @@ -0,0 +1,29 @@ +import go +import TestUtilities.InlineExpectationsTest + +class TestConfig extends TaintTracking::Configuration { + TestConfig() { this = "test config" } + + override predicate isSource(DataFlow::Node source) { + source.(DataFlow::CallNode).getTarget().getName() = "source" + } + + override predicate isSink(DataFlow::Node sink) { + sink = any(DataFlow::CallNode c | c.getTarget().getName() = "sink").getAnArgument() + } +} + +class DataFlowTest extends InlineExpectationsTest { + DataFlowTest() { this = "DataFlowTest" } + + override string getARelevantTag() { result = "dataflow" } + + override predicate hasActualResult(string file, int line, string element, string tag, string value) { + tag = "dataflow" and + exists(DataFlow::Node sink | any(TestConfig c).hasFlow(_, sink) | + element = sink.toString() and + value = sink.toString() and + sink.hasLocationInfo(file, line, _, _, _) + ) + } +}