From 97461d1f119389dee7271ee4619557ad4bbce6c8 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Thu, 18 Nov 2021 12:18:48 +0100 Subject: [PATCH] add var unused in disjunct query --- .../performance/VarUnusedInDisjunct.ql | 212 ++++++++++++++++++ .../performance/VarUnusedInDisjunct/Test.qll | 167 ++++++++++++++ .../VarUnusedInDisjunct.expected | 8 + .../VarUnusedInDisjunct.qlref | 1 + 4 files changed, 388 insertions(+) create mode 100644 ql/src/queries/performance/VarUnusedInDisjunct.ql create mode 100644 ql/test/queries/performance/VarUnusedInDisjunct/Test.qll create mode 100644 ql/test/queries/performance/VarUnusedInDisjunct/VarUnusedInDisjunct.expected create mode 100644 ql/test/queries/performance/VarUnusedInDisjunct/VarUnusedInDisjunct.qlref diff --git a/ql/src/queries/performance/VarUnusedInDisjunct.ql b/ql/src/queries/performance/VarUnusedInDisjunct.ql new file mode 100644 index 00000000000..4311038df50 --- /dev/null +++ b/ql/src/queries/performance/VarUnusedInDisjunct.ql @@ -0,0 +1,212 @@ +/** + * @name Var only used in one side of disjunct. + * @description Only using a variable on one side of a disjunction can cause a cartesian product. + * @kind problem + * @problem.severity warning + * @id ql/var-unused-in-disjunct + * @tags maintainability + * performance + * @precision high + */ + +import ql + +/** + * Holds if `node` bind `var` in a (transitive) child node. + * Is a practical approximation that ignores `not` and many other features. + */ +pragma[noinline] +predicate alwaysBindsVar(VarDef var, AstNode node) { + // base case + node.(VarAccess).getDeclaration() = var and + not isSmallType(var.getType()) // <- early pruning + or + // recursive cases + alwaysBindsVar(var, node.getAChild(_)) and // the recursive step, go one step up to the parent. + not node.(FullAggregate).getAnArgument() = var and // except if the parent defines the variable, then we stop. + not node.(Quantifier).getAnArgument() = var and + not node instanceof EffectiveDisjunction // for disjunctions, we need to check both sides. + or + exists(EffectiveDisjunction disj | disj = node | + alwaysBindsVar(var, disj.getLeft()) and + alwaysBindsVar(var, disj.getRight()) + ) + or + exists(EffectiveDisjunction disj | disj = node | + alwaysBindsVar(var, disj.getAnOperand()) and + disj.getAnOperand() instanceof NoneCall + ) + or + exists(IfFormula ifForm | ifForm = node | alwaysBindsVar(var, ifForm.getCondition())) +} + +/** + * Holds if we assume `t` is a small type, and + * variables of this type are therefore not an issue in cartesian products. + */ +predicate isSmallType(Type t) { + t.getName() = "string" // DataFlow::Configuration and the like + or + exists(NewType newType | newType = t.getDeclaration() | + forex(NewTypeBranch branch | branch = newType.getABranch() | branch.getArity() = 0) + ) + or + t.getName() = "boolean" + or + exists(NewType newType | newType = t.getDeclaration() | + forex(NewTypeBranch branch | branch = newType.getABranch() | + isSmallType(branch.getReturnType()) + ) + ) + or + exists(NewTypeBranch branch | t = branch.getReturnType() | + forall(Type param | param = branch.getParameterType(_) | isSmallType(param)) + ) + or + isSmallType(t.getASuperType()) +} + +/** + * Holds if `pred` is inlined. + */ +predicate isInlined(Predicate pred) { + exists(Annotation inline | + inline = pred.getAnAnnotation() and + inline.getName() = "pragma" and + inline.getArgs(0).getValue() = "inline" + ) + or + pred.getAnAnnotation().getName() = "bindingset" +} + +/** + * An AstNode that acts like a disjunction. + */ +class EffectiveDisjunction extends AstNode { + EffectiveDisjunction() { + this instanceof IfFormula + or + this instanceof Disjunction + } + + /** Gets the left operand of this disjunction. */ + AstNode getLeft() { + result = this.(IfFormula).getThenPart() + or + result = this.(Disjunction).getLeft() + } + + /** Gets the right operand of this disjunction. */ + AstNode getRight() { + result = this.(IfFormula).getElsePart() + or + result = this.(Disjunction).getRight() + } + + /** Gets any of the operands of this disjunction. */ + AstNode getAnOperand() { result = [this.getLeft(), this.getRight()] } +} + +/** + * Holds if `disj` only uses `var` in one of its branches. + */ +pragma[noinline] +predicate onlyUseInOneBranch(EffectiveDisjunction disj, VarDef var) { + alwaysBindsVar(var, disj.getLeft()) and + not alwaysBindsVar(var, disj.getRight()) + or + not alwaysBindsVar(var, disj.getLeft()) and + alwaysBindsVar(var, disj.getRight()) +} + +/** + * Holds if `comp` is an equality comparison that has a low number of results. + */ +predicate isTinyAssignment(ComparisonFormula comp) { + comp.getOperator() = "=" and + ( + isSmallType(comp.getAnOperand().getType()) + or + comp.getAnOperand() instanceof Literal + ) +} + +/** + * An AstNode that acts like a conjunction. + */ +class EffectiveConjunction extends AstNode { + EffectiveConjunction() { + this instanceof Conjunction + or + this instanceof Exists + } + + /** Gets the left operand of this conjunction */ + Formula getLeft() { + result = this.(Conjunction).getLeft() + or + result = this.(Exists).getRange() + } + + /** Gets the right operand of this conjunction */ + Formula getRight() { + result = this.(Conjunction).getRight() + or + result = this.(Exists).getFormula() + } +} + +/** + * Holds if `node` is a sub-node of a node that always binds `var`. + */ +predicate varIsAlwaysBound(VarDef var, AstNode node) { + // base case + alwaysBindsVar(var, node) and + onlyUseInOneBranch(_, var) // <- manual magic + or + // recursive cases + exists(AstNode parent | node.getParent() = parent | varIsAlwaysBound(var, parent)) + or + exists(EffectiveConjunction parent | + varIsAlwaysBound(var, parent.getLeft()) and + node = parent.getRight() + or + varIsAlwaysBound(var, parent.getRight()) and + node = parent.getLeft() + ) + or + exists(IfFormula ifForm | varIsAlwaysBound(var, ifForm.getCondition()) | + node = [ifForm.getThenPart(), ifForm.getElsePart()] + ) + or + exists(Forall for | varIsAlwaysBound(var, for.getRange()) | node = for.getFormula()) +} + +/** + * Holds if `disj` only uses `var` in one of its branches. + * And we should report it as being a bad thing. + */ +predicate badDisjunction(EffectiveDisjunction disj, VarDef var) { + onlyUseInOneBranch(disj, var) and + // it's fine if it's always bound further up + not varIsAlwaysBound(var, disj) and + // none() on one side makes everything fine. (this happens, it's a type-system hack) + not disj.getAnOperand() instanceof NoneCall and + // inlined predicates might bind unused variables in the context they are used in. + not ( + isInlined(disj.getEnclosingPredicate()) and + var = disj.getEnclosingPredicate().getParameter(_) + ) and + // recursion prevention never runs, it's a compile-time check, so we remove those results here + not disj.getEnclosingPredicate().getParent().(Class).getName().matches("%RecursionPrevention") and // these are by design + // not a small type + not isSmallType(var.getType()) and + // one of the branches is a tiny assignment. These are usually intentional cartesian products (and not too big). + not isTinyAssignment(disj.getAnOperand()) +} + +from EffectiveDisjunction disj, VarDef var +where + badDisjunction(disj, var) and + not badDisjunction(disj.getParent(), var) // avoid duplicate reporting of the same error +select disj, "The variable " + var.getName() + " is only used in one side of disjunct." diff --git a/ql/test/queries/performance/VarUnusedInDisjunct/Test.qll b/ql/test/queries/performance/VarUnusedInDisjunct/Test.qll new file mode 100644 index 00000000000..9fde97a3544 --- /dev/null +++ b/ql/test/queries/performance/VarUnusedInDisjunct/Test.qll @@ -0,0 +1,167 @@ +import ql + +class Big = Expr; + +class Small extends boolean { + Small() { this = [true, false] } +} + +class MyStr extends string { + MyStr() { this = ["foo", "bar"] } +} + +predicate bad1(Big b) { + b.toString().matches("%foo") + or + any() +} + +int bad2() { + exists(Big big, Small small | + result = big.toString().toInt() + or + result = small.toString().toInt() + ) +} + +float bad3(Big t) { + result = [1 .. 10].toString().toFloat() or + result = [11 .. 20].toString().toFloat() or + result = t.toString().toFloat() or + result = [21 .. 30].toString().toFloat() +} + +string good1(Big t) { + ( + result = t.toString() + or + result instanceof MyStr // <- t unused here, but that's ok because of the conjunct that binds t. + ) and + t.toString().regexpMatch(".*foo") +} + +predicate helper(Big a, Big b) { + a = b and + a.toString().matches("%foo") +} + +predicate bad4(Big fromType, Big toType) { + helper(fromType, toType) + or + fromType.toString().matches("%foo") + or + helper(toType, fromType) +} + +predicate good2(Big t) { + exists(Small other | + t.toString().matches("%foo") + or + other.toString().matches("%foo") // <- t unused here, but that's ok because of the conjunct (exists) that binds t. + | + t.toString().regexpMatch(".*foo") + ) +} + +predicate mixed1(Big good, Small small) { + good.toString().matches("%foo") + or + good = + any(Big bad | + small.toString().matches("%foo") and + // the use of good is fine, the comparison futher up binds it. + // the same is not true for bad. + (bad.toString().matches("%foo") or good.toString().regexpMatch("foo.*")) and + small.toString().regexpMatch(".*foo") + ) +} + +newtype OtherSmall = + Small1() or + Small2(boolean b) { b = true } or + Small3(boolean b, Small o) { + b = true and + o.toString().matches("%foo") + } + +predicate good3(OtherSmall small) { + small = Small1() + or + 1 = 1 +} + +predicate good4(Big big, Small small) { + big.toString().matches("%foo") + or + // assignment to small type, intentional cartesian product + small = any(Small s | s.toString().matches("%foo")) +} + +predicate good5(Big bb, Big v, boolean certain) { + exists(Big read | + read = bb and + read = v and + certain = true + ) + or + v = + any(Big lsv | + lsv.getEnclosingPredicate() = bb.(Expr).getEnclosingPredicate() and + (lsv.toString().matches("%foo") or v.toString().matches("%foo")) and + certain = false + ) +} + +predicate bad5(Big bb) { if none() then bb.toString().matches("%foo") else any() } + +pragma[inline] +predicate good5(Big a, Big b) { + // fine. Assumes it's used somewhere where `a` and `b` are bound. + b = any(Big bb | bb.toString().matches("%foo")) + or + a = any(Big bb | bb.toString().matches("%foo")) +} + +predicate bad6(Big a) { + ( + a.toString().matches("%foo") // bad + or + any() + ) and + ( + a.toString().matches("%foo") // also bad + or + any() + ) +} + +predicate good6(Big a) { + a.toString().matches("%foo") and + ( + a.toString().matches("%foo") // good, `a` is bound on the branch of the conjunction. + or + any() + ) +} + +predicate good7() { + exists(Big l, Big r | + l.toString().matches("%foo1") and + r.toString().matches("%foo2") + or + l.toString().matches("%foo3") and + r.toString().matches("%foo4") + | + not (l.toString().regexpMatch("%foo5") or r.toString().regexpMatch("%foo6")) and + (l.toString().regexpMatch("%foo7") or r.toString().regexpMatch("%foo8")) + ) +} + +// TOOD: Next test, this one is +string good8(int bitSize) { + if bitSize != 0 + then bitSize = 1 and result = bitSize.toString() + else ( + if 1 = 0 then result = "foo" else result = "bar" + ) +} diff --git a/ql/test/queries/performance/VarUnusedInDisjunct/VarUnusedInDisjunct.expected b/ql/test/queries/performance/VarUnusedInDisjunct/VarUnusedInDisjunct.expected new file mode 100644 index 00000000000..088c3a3b505 --- /dev/null +++ b/ql/test/queries/performance/VarUnusedInDisjunct/VarUnusedInDisjunct.expected @@ -0,0 +1,8 @@ +| Test.qll:14:3:16:7 | Disjunction | The variable b is only used in one side of disjunct. | +| Test.qll:21:5:23:37 | Disjunction | The variable big is only used in one side of disjunct. | +| Test.qll:28:3:30:33 | Disjunction | The variable t is only used in one side of disjunct. | +| Test.qll:49:3:53:26 | Disjunction | The variable toType is only used in one side of disjunct. | +| Test.qll:74:8:74:77 | Disjunction | The variable bad is only used in one side of disjunct. | +| Test.qll:115:26:115:80 | IfFormula | The variable bb is only used in one side of disjunct. | +| Test.qll:127:5:129:9 | Disjunction | The variable a is only used in one side of disjunct. | +| Test.qll:132:5:134:9 | Disjunction | The variable a is only used in one side of disjunct. | diff --git a/ql/test/queries/performance/VarUnusedInDisjunct/VarUnusedInDisjunct.qlref b/ql/test/queries/performance/VarUnusedInDisjunct/VarUnusedInDisjunct.qlref new file mode 100644 index 00000000000..28f0c0d938a --- /dev/null +++ b/ql/test/queries/performance/VarUnusedInDisjunct/VarUnusedInDisjunct.qlref @@ -0,0 +1 @@ +queries/performance/VarUnusedInDisjunct.ql \ No newline at end of file