Merge branch 'master' into python-cwe-312

This commit is contained in:
Mark Shannon
2019-08-29 15:57:49 +01:00
359 changed files with 39895 additions and 17202 deletions

View File

@@ -3,7 +3,6 @@ import semmle.python.Operations
import semmle.python.Variables
import semmle.python.AstGenerated
import semmle.python.AstExtended
import semmle.python.AST
import semmle.python.Function
import semmle.python.Module
import semmle.python.Class
@@ -12,7 +11,6 @@ import semmle.python.Stmts
import semmle.python.Exprs
import semmle.python.Keywords
import semmle.python.Comprehensions
import semmle.python.Lists
import semmle.python.Flow
import semmle.python.Metrics
import semmle.python.Constants
@@ -28,7 +26,6 @@ import semmle.python.types.Version
import semmle.python.types.Descriptors
import semmle.python.protocols
import semmle.python.SSA
import semmle.python.Assigns
import semmle.python.SelfAttribute
import semmle.python.types.Properties
import semmle.python.xml.XML

View File

@@ -1,57 +0,0 @@
import python
/** Syntactic node (Class, Function, Module, Expr, Stmt or Comprehension) corresponding to a flow node */
abstract class AstNode extends AstNode_ {
/** Gets the scope that this node occurs in */
abstract Scope getScope();
/** Gets a flow node corresponding directly to this node.
* NOTE: For some statements and other purely syntactic elements,
* there may not be a `ControlFlowNode` */
ControlFlowNode getAFlowNode() {
py_flow_bb_node(result, this, _, _)
}
/** Gets the location for this AST node */
Location getLocation() {
none()
}
/** Whether this syntactic element is artificial, that is it is generated
* by the compiler and is not present in the source */
predicate isArtificial() {
none()
}
/** Gets a child node of this node in the AST. This predicate exists to aid exploration of the AST
* and other experiments. The child-parent relation may not be meaningful.
* For a more meaningful relation in terms of dependency use
* Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or
* Scope.getAStmt().
*/
abstract AstNode getAChildNode();
/** Gets the parent node of this node in the AST. This predicate exists to aid exploration of the AST
* and other experiments. The child-parent relation may not be meaningful.
* For a more meaningful relation in terms of dependency use
* Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or
* Scope.getAStmt() applied to the parent.
*/
AstNode getParentNode() {
result.getAChildNode() = this
}
/** Whether this contains `inner` syntactically */
predicate contains(AstNode inner) {
this.getAChildNode+() = inner
}
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
predicate containsInScope(AstNode inner) {
this.contains(inner) and
this.getScope() = inner.getScope() and
not inner instanceof Scope
}
}

View File

@@ -1,19 +0,0 @@
/**
* In order to handle data flow and other analyses efficiently the extractor transforms various statements which perform binding in assignments.
* These classes provide a wrapper to provide a more 'natural' interface to the syntactic elements transformed to assignments.
*/
import python
/** An assignment statement */
class AssignStmt extends Assign {
AssignStmt() {
not this instanceof FunctionDef and not this instanceof ClassDef
}
override string toString() {
result = "AssignStmt"
}
}

View File

@@ -1,5 +1,61 @@
import python
/** Syntactic node (Class, Function, Module, Expr, Stmt or Comprehension) corresponding to a flow node */
abstract class AstNode extends AstNode_ {
/** Gets the scope that this node occurs in */
abstract Scope getScope();
/** Gets a flow node corresponding directly to this node.
* NOTE: For some statements and other purely syntactic elements,
* there may not be a `ControlFlowNode` */
ControlFlowNode getAFlowNode() {
py_flow_bb_node(result, this, _, _)
}
/** Gets the location for this AST node */
Location getLocation() {
none()
}
/** Whether this syntactic element is artificial, that is it is generated
* by the compiler and is not present in the source */
predicate isArtificial() {
none()
}
/** Gets a child node of this node in the AST. This predicate exists to aid exploration of the AST
* and other experiments. The child-parent relation may not be meaningful.
* For a more meaningful relation in terms of dependency use
* Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or
* Scope.getAStmt().
*/
abstract AstNode getAChildNode();
/** Gets the parent node of this node in the AST. This predicate exists to aid exploration of the AST
* and other experiments. The child-parent relation may not be meaningful.
* For a more meaningful relation in terms of dependency use
* Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or
* Scope.getAStmt() applied to the parent.
*/
AstNode getParentNode() {
result.getAChildNode() = this
}
/** Whether this contains `inner` syntactically */
predicate contains(AstNode inner) {
this.getAChildNode+() = inner
}
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
predicate containsInScope(AstNode inner) {
this.contains(inner) and
this.getScope() = inner.getScope() and
not inner instanceof Scope
}
}
/* Parents */
/** Internal implementation class */
@@ -116,3 +172,59 @@ class StringPartList extends StringPartList_ {
}
/* **** Lists ***/
/** A parameter list */
class ParameterList extends @py_parameter_list {
Function getParent() {
py_parameter_lists(this, result)
}
/** Gets a parameter */
Parameter getAnItem() {
/* Item can be a Name or a Tuple, both of which are expressions */
py_exprs(result, _, this, _)
}
/** Gets the nth parameter */
Parameter getItem(int index) {
/* Item can be a Name or a Tuple, both of which are expressions */
py_exprs(result, _, this, index)
}
string toString() {
result = "ParameterList"
}
}
/** A list of Comprehensions (for generating parts of a set, list or dictionary comprehension) */
class ComprehensionList extends ComprehensionList_ {
}
/** A list of expressions */
class ExprList extends ExprList_ {
}
library class DictItemList extends DictItemList_ {
}
library class DictItemListParent extends DictItemListParent_ {
}
/** A list of strings (the primitive type string not Bytes or Unicode) */
class StringList extends StringList_ {
}
/** A list of aliases in an import statement */
class AliasList extends AliasList_ {
}

View File

@@ -1,7 +1,5 @@
import python
import semmle.python.flow.NameNode
private import semmle.python.pointsto.PointsTo
private import semmle.python.Pruning
/* Note about matching parent and child nodes and CFG splitting:
*
@@ -33,10 +31,6 @@ private AstNode toAst(ControlFlowNode n) {
*/
class ControlFlowNode extends @py_flow_node {
cached ControlFlowNode() {
Pruner::reachable(this)
}
/** Whether this control flow node is a load (including those in augmented assignments) */
predicate isLoad() {
exists(Expr e | e = toAst(this) | py_expr_contexts(_, 3, e) and not augstore(_, this))
@@ -180,8 +174,7 @@ class ControlFlowNode extends @py_flow_node {
/** Gets a successor of this flow node */
ControlFlowNode getASuccessor() {
py_successors(this, result) and
not Pruner::unreachableEdge(this, result)
py_successors(this, result)
}
/** Gets the immediate dominator of this flow node */
@@ -967,71 +960,147 @@ class RaiseStmtNode extends ControlFlowNode {
}
private
predicate defined_by(NameNode def, Variable v) {
def.defines(v) or
exists(NameNode p | defined_by(p, v) and p.getASuccessor() = def and not p.defines(v))
}
/** A control flow node corresponding to a (plain variable) name expression, such as `var`.
* `None`, `True` and `False` are excluded.
*/
class NameNode extends ControlFlowNode {
/* Combine extractor-generated basic block after pruning */
private class BasicBlockPart extends @py_flow_node {
string toString() { result = "Basic block part" }
BasicBlockPart() {
py_flow_bb_node(_, _, this, _) and
Pruner::reachable(this)
}
predicate isHead() {
count(this.(ControlFlowNode).getAPredecessor()) != 1
NameNode() {
exists(Name n | py_flow_bb_node(this, n, _, _))
or
exists(ControlFlowNode pred | pred = this.(ControlFlowNode).getAPredecessor() | strictcount(pred.getASuccessor()) > 1)
exists(PlaceHolder p | py_flow_bb_node(this, p, _, _))
}
private BasicBlockPart previous() {
not this.isHead() and
py_flow_bb_node(this.(ControlFlowNode).getAPredecessor(), _, result, _)
/** Whether this flow node defines the variable `v`. */
predicate defines(Variable v) {
exists(Name d | this.getNode() = d and d.defines(v))
and not this.isLoad()
}
BasicBlockPart getHead() {
this.isHead() and result = this
/** Whether this flow node deletes the variable `v`. */
predicate deletes(Variable v) {
exists(Name d | this.getNode() = d and d.deletes(v))
}
/** Whether this flow node uses the variable `v`. */
predicate uses(Variable v) {
this.isLoad() and exists(Name u | this.getNode() = u and u.uses(v))
or
result = this.previous().getHead()
}
predicate isLast() {
not exists(BasicBlockPart part | part.previous() = this)
}
int length() {
result = max(int j | py_flow_bb_node(_, _, this, j)) + 1
}
int startIndex() {
this.isHead() and result = 0
exists(PlaceHolder u | this.getNode() = u and u.getVariable() = v and u.getCtx() instanceof Load)
or
exists(BasicBlockPart prev |
prev = this.previous() and
result = prev.startIndex() + prev.length()
Scopes::use_of_global_variable(this, v.getScope(), v.getId())
}
string getId() {
result = this.getNode().(Name).getId()
or
result = this.getNode().(PlaceHolder).getId()
}
/** Whether this is a use of a local variable. */
predicate isLocal() {
Scopes::local(this)
}
/** Whether this is a use of a non-local variable. */
predicate isNonLocal() {
Scopes::non_local(this)
}
/** Whether this is a use of a global (including builtin) variable. */
predicate isGlobal() {
Scopes::use_of_global_variable(this, _, _)
}
predicate isSelf() {
exists(SsaVariable selfvar |
selfvar.isSelf() and selfvar.getAUse() = this
)
}
predicate contains(ControlFlowNode node) {
py_flow_bb_node(node, _, this, _)
}
/** A control flow node corresponding to a named constant, one of `None`, `True` or `False`. */
class NameConstantNode extends NameNode {
NameConstantNode() {
exists(NameConstant n | py_flow_bb_node(this, n, _, _))
}
int indexOf(ControlFlowNode node) {
py_flow_bb_node(node, _, this, result)
override deprecated predicate defines(Variable v) { none() }
override deprecated predicate deletes(Variable v) { none() }
/* We ought to override uses as well, but that has
* a serious performance impact.
deprecated predicate uses(Variable v) { none() }
*/
}
private module Scopes {
private predicate fast_local(NameNode n) {
exists(FastLocalVariable v |
n.uses(v) and
v.getScope() = n.getScope()
)
}
ControlFlowNode lastNode() {
this.indexOf(result) = max(this.indexOf(_))
predicate local(NameNode n) {
fast_local(n)
or
exists(SsaVariable var |
var.getAUse() = n and
n.getScope() instanceof Class and
exists(var.getDefinition())
)
}
BasicBlockPart getImmediateDominator() {
result.contains(this.(ControlFlowNode).getImmediateDominator())
predicate non_local(NameNode n) {
exists(FastLocalVariable flv |
flv.getALoad() = n.getNode() and
not flv.getScope() = n.getScope()
)
}
// magic is fine, but we get questionable join-ordering of it
pragma [nomagic]
predicate use_of_global_variable(NameNode n, Module scope, string name) {
n.isLoad() and
not non_local(n)
and
not exists(SsaVariable var |
var.getAUse() = n |
var.getVariable() instanceof FastLocalVariable
or
n.getScope() instanceof Class and
not maybe_undefined(var)
)
and name = n.getId()
and scope = n.getEnclosingModule()
}
private predicate maybe_defined(SsaVariable var) {
exists(var.getDefinition()) and not py_ssa_phi(var, _) and not var.getDefinition().isDelete()
or
exists(SsaVariable input |
input = var.getAPhiInput() |
maybe_defined(input)
)
}
private predicate maybe_undefined(SsaVariable var) {
not exists(var.getDefinition()) and not py_ssa_phi(var, _)
or
var.getDefinition().isDelete()
or
maybe_undefined(var.getAPhiInput())
or
exists(BasicBlock incoming |
exists(var.getAPhiInput()) and
incoming.getASuccessor() = var.getDefinition().getBasicBlock() and
not var.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming)
)
}
}
@@ -1040,24 +1109,17 @@ private class BasicBlockPart extends @py_flow_node {
class BasicBlock extends @py_flow_node {
BasicBlock() {
this.(BasicBlockPart).isHead()
}
private BasicBlockPart getAPart() {
result.getHead() = this
py_flow_bb_node(_, _, this, _)
}
/** Whether this basic block contains the specified node */
predicate contains(ControlFlowNode node) {
this.getAPart().contains(node)
py_flow_bb_node(node, _, this, _)
}
/** Gets the nth node in this basic block */
ControlFlowNode getNode(int n) {
exists(BasicBlockPart part |
part = this.getAPart() and
n = part.startIndex() + part.indexOf(result)
)
py_flow_bb_node(result, _, this, n)
}
string toString() {
@@ -1077,7 +1139,7 @@ class BasicBlock extends @py_flow_node {
}
cached BasicBlock getImmediateDominator() {
this.getAPart().getImmediateDominator() = result.getAPart()
this.firstNode().getImmediateDominator().getBasicBlock() = result
}
/** Dominance frontier of a node x is the set of all nodes `other` such that `this` dominates a predecessor
@@ -1093,10 +1155,9 @@ class BasicBlock extends @py_flow_node {
/** Gets the last node in this basic block */
ControlFlowNode getLastNode() {
exists(BasicBlockPart part |
part = this.getAPart() and
part.isLast() and
result = part.lastNode()
exists(int i |
this.getNode(i) = result and
i = max(int j | py_flow_bb_node(_, _, this, j))
)
}

View File

@@ -1,55 +0,0 @@
import python
/** A parameter list */
class ParameterList extends @py_parameter_list {
Function getParent() {
py_parameter_lists(this, result)
}
/** Gets a parameter */
Parameter getAnItem() {
/* Item can be a Name or a Tuple, both of which are expressions */
py_exprs(result, _, this, _)
}
/** Gets the nth parameter */
Parameter getItem(int index) {
/* Item can be a Name or a Tuple, both of which are expressions */
py_exprs(result, _, this, index)
}
string toString() {
result = "ParameterList"
}
}
/** A list of Comprehensions (for generating parts of a set, list or dictionary comprehension) */
class ComprehensionList extends ComprehensionList_ {
}
/** A list of expressions */
class ExprList extends ExprList_ {
}
library class DictItemList extends DictItemList_ {
}
library class DictItemListParent extends DictItemListParent_ {
}
/** A list of strings (the primitive type string not Bytes or Unicode) */
class StringList extends StringList_ {
}
/** A list of aliases in an import statement */
class AliasList extends AliasList_ {
}

View File

@@ -1,640 +0,0 @@
private import AST
private import Exprs
private import Stmts
private import Import
private import Operations
module Pruner {
/** A control flow node before pruning */
class UnprunedCfgNode extends @py_flow_node {
string toString() { none() }
/** Gets a predecessor of this flow node */
UnprunedCfgNode getAPredecessor() {
py_successors(result, this)
}
/** Gets a successor of this flow node */
UnprunedCfgNode getASuccessor() {
py_successors(this, result)
}
/** Gets the immediate dominator of this flow node */
UnprunedCfgNode getImmediateDominator() {
py_idoms(this, result)
}
/* Holds if this CFG node is a branch */
predicate isBranch() {
py_true_successors(this, _) or py_false_successors(this, _)
}
/** Gets the syntactic element corresponding to this flow node */
AstNode getNode() {
py_flow_bb_node(this, result, _, _)
}
UnprunedBasicBlock getBasicBlock() {
py_flow_bb_node(this, _, result, _)
}
/** Gets a successor for this node if the relevant condition is True. */
UnprunedCfgNode getATrueSuccessor() {
py_true_successors(this, result)
}
/** Gets a successor for this node if the relevant condition is False. */
UnprunedCfgNode getAFalseSuccessor() {
py_false_successors(this, result)
}
}
/** A control flow node corresponding to a comparison operation, such as `x<y` */
class UnprunedCompareNode extends UnprunedCfgNode {
UnprunedCompareNode() {
py_flow_bb_node(this, any(Compare c), _, _)
}
/** Whether left and right are a pair of operands for this comparison */
predicate operands(UnprunedCfgNode left, Cmpop op, UnprunedCfgNode right) {
exists(Compare c, Expr eleft, Expr eright |
this.getNode() = c and left.getNode() = eleft and right.getNode() = eright |
eleft = c.getLeft() and eright = c.getComparator(0) and op = c.getOp(0)
or
exists(int i | eleft = c.getComparator(i-1) and eright = c.getComparator(i) and op = c.getOp(i))
) and
left.getBasicBlock().dominates(this.getBasicBlock()) and
right.getBasicBlock().dominates(this.getBasicBlock())
}
}
/** A control flow node corresponding to a unary not expression: (`not x`) */
class UnprunedNot extends UnprunedCfgNode {
UnprunedNot() {
exists(UnaryExpr unary |
py_flow_bb_node(this, unary, _, _) and
unary.getOp() instanceof Not
)
}
/** Gets the control flow node corresponding to the operand of this `not` expression */
UnprunedCfgNode getOperand() {
exists(UnaryExpr u | this.getNode() = u and result.getNode() = u.getOperand()) and
result.getBasicBlock().dominates(this.getBasicBlock())
}
}
/** A basic block before pruning */
class UnprunedBasicBlock extends @py_flow_node {
UnprunedBasicBlock() {
py_flow_bb_node(_, _, this, _)
}
/** Whether this basic block contains the specified node */
predicate contains(UnprunedCfgNode node) {
py_flow_bb_node(node, _, this, _)
}
string toString() { none() }
/** Whether this basic block strictly dominates the other */
pragma[nomagic] predicate strictlyDominates(UnprunedBasicBlock other) {
other.getImmediateDominator+() = this
}
/** Whether this basic block dominates the other */
pragma[nomagic] predicate dominates(UnprunedBasicBlock other) {
this = other
or
this.strictlyDominates(other)
}
UnprunedBasicBlock getImmediateDominator() {
this.first().getImmediateDominator().getBasicBlock() = result
}
UnprunedBasicBlock getASuccessor() {
this.last().getASuccessor() = result.first()
}
UnprunedCfgNode first() {
py_flow_bb_node(result, _, this, 0)
}
UnprunedCfgNode last() {
py_flow_bb_node(result, _, this, max(int i | py_flow_bb_node(_, _, this, i)))
}
/** Gets a successor for this node if the relevant condition is True. */
UnprunedBasicBlock getATrueSuccessor() {
this.last().getATrueSuccessor() = result.first()
}
/** Gets a successor for this node if the relevant condition is False. */
UnprunedBasicBlock getAFalseSuccessor() {
this.last().getAFalseSuccessor() = result.first()
}
/** Whether this BB is the first in its scope */
predicate isEntry() {
py_scope_flow(this.first(), _, -1)
}
UnprunedCfgNode getANode() {
py_flow_bb_node(result, _, this, _)
}
}
private import Comparisons
private import SSA
private int intValue(ImmutableLiteral lit) {
result = lit.(IntegerLiteral).getValue()
or
result = lit.(NegativeIntegerLiteral).getValue()
}
newtype TConstraint =
TTruthy(boolean b) { b = true or b = false }
or
TIsNone(boolean b) { b = true or b = false }
or
TConstrainedByConstant(CompareOp op, int k) {
int_test(_, _, op, k)
or
exists(Assign a | intValue(a.getValue()) = k) and op = eq()
}
/** A constraint that may be applied to an SSA variable.
* Used for computing unreachable edges
*/
abstract class Constraint extends TConstraint {
abstract string toString();
abstract Constraint invert();
/** Holds if this constraint constrains the "truthiness" of the variable.
* That is, for a variable `var` constrained by this constraint
* `bool(var) is value`
*/
abstract predicate constrainsVariableToBe(boolean value);
/** Holds if the value constrained by this constraint cannot be `None` */
abstract predicate cannotBeNone();
}
/** A basic block ending in a test (and branch). */
class UnprunedConditionBlock extends UnprunedBasicBlock {
UnprunedConditionBlock() { this.last().isBranch() }
/** Holds if `controlled` is only reachable if the test in this block evaluates to `testIsTrue` */
predicate controls(UnprunedBasicBlock controlled, boolean testIsTrue) {
/* For this block to control the block 'controlled' with 'testIsTrue' the following must be true:
Execution must have passed through the test i.e. 'this' must strictly dominate 'controlled'.
Execution must have passed through the 'testIsTrue' edge leaving 'this'.
Although "passed through the true edge" implies that this.getATrueSuccessor() dominates 'controlled',
the reverse is not true, as flow may have passed through another edge to get to this.getATrueSuccessor()
so we need to assert that this.getATrueSuccessor() dominates 'controlled' *and* that
all predecessors of this.getATrueSuccessor() are either this or dominated by this.getATrueSuccessor().
For example, in the following python snippet:
<code>
if x:
controlled
false_successor
uncontrolled
</code>
false_successor dominates uncontrolled, but not all of its predecessors are this (if x)
or dominated by itself. Whereas in the following code:
<code>
if x:
while controlled:
also_controlled
false_successor
uncontrolled
</code>
the block 'while controlled' is controlled because all of its predecessors are this (if x)
or (in the case of 'also_controlled') dominated by itself.
The additional constraint on the predecessors of the test successor implies
that `this` strictly dominates `controlled` so that isn't necessary to check
directly.
*/
exists(UnprunedBasicBlock succ |
testIsTrue = true and succ = this.getATrueSuccessor()
or
testIsTrue = false and succ = this.getAFalseSuccessor()
|
succ.dominates(controlled) and
forall(UnprunedBasicBlock pred | pred.getASuccessor() = succ |
pred = this or succ.dominates(pred)
)
)
}
/** Holds if the edge `pred->succ` is reachable only if the test in this block evaluates to `testIsTrue` */
predicate controlsEdge(UnprunedBasicBlock pred, UnprunedBasicBlock succ, boolean testIsTrue) {
this.controls(pred, testIsTrue) and succ = pred.getASuccessor()
or
pred = this and (
testIsTrue = true and succ = this.getATrueSuccessor()
or
testIsTrue = false and succ = this.getAFalseSuccessor()
)
}
}
/** A constraint that the variable is truthy `bool(var) is True` or falsey `bool(var) is False` */
class Truthy extends Constraint, TTruthy {
private boolean booleanValue() {
this = TTruthy(result)
}
override string toString() {
result = "Truthy" and this.booleanValue() = true
or
result = "Falsey" and this.booleanValue() = false
}
override Constraint invert() {
result = TTruthy(this.booleanValue().booleanNot())
}
override predicate constrainsVariableToBe(boolean value) {
value = this.booleanValue()
}
override predicate cannotBeNone() {
this.booleanValue() = true
}
}
/** A constraint that the variable is None `(var is None) is True` or not None `(var is None) is False`.
* This includes the `is not` operator, `x is not None` being equivalent to `not x is None` */
class IsNone extends Constraint, TIsNone {
private boolean isNone() {
this = TIsNone(result)
}
override string toString() {
result = "Is None" and this.isNone() = true
or
result = "Is not None" and this.isNone() = false
}
override Constraint invert() {
result = TIsNone(this.isNone().booleanNot())
}
override predicate constrainsVariableToBe(boolean value) {
value = false and this.isNone() = true
}
override predicate cannotBeNone() {
this = TIsNone(false)
}
}
/** A constraint that the variable fulfils some equality or inequality to an integral constant.
* `(var op k) is True` where `op` is an equality or inequality operator and `k` is an integer constant
*/
class ConstrainedByConstant extends Constraint, TConstrainedByConstant {
private int intValue() {
this = TConstrainedByConstant(_, result)
}
private CompareOp getOp() {
this = TConstrainedByConstant(result, _)
}
override string toString() {
result = this.getOp().repr() + " " + this.intValue().toString()
}
override Constraint invert() {
result = TConstrainedByConstant(this.getOp().invert(), this.intValue())
}
override predicate constrainsVariableToBe(boolean value) {
this.getOp() = eq() and this.intValue() = 0 and value = false
or
value = true and (
this.getOp() = eq() and this.intValue() != 0
or
this.getOp() = lt() and this.intValue() <= 0
or
this.getOp() = le() and this.intValue() < 0
or
this.getOp() = gt() and this.intValue() >= 0
or
this.getOp() = ge() and this.intValue() > 0
)
}
predicate eq(int val) {
this = TConstrainedByConstant(eq(), val)
}
predicate ne(int val) {
this = TConstrainedByConstant(ne(), val)
}
override predicate cannotBeNone() {
this.getOp() = eq()
}
/** The minimum value that a variable fulfilling this constraint may hold
* within the bounds of a signed 32 bit number.
*/
int minValue() {
this.getOp() = eq() and result = this.intValue()
or
this.getOp() = lt() and result = -2147483648
or
this.getOp() = le() and result = -2147483648
or
this.getOp() = gt() and result = this.intValue()+1
or
this.getOp() = ge() and result = this.intValue()
}
/** The maximum value that a variable fulfilling this constraint may hold
* within the bounds of a signed 32 bit number.
*/
int maxValue() {
this.getOp() = eq() and result = this.intValue()
or
this.getOp() = gt() and result = 2147483647
or
this.getOp() = ge() and result = 2147483647
or
this.getOp() = lt() and result = this.intValue()-1
or
this.getOp() = le() and result = this.intValue()
}
}
/** Holds if the control flow node `n` is unreachable due to
* one or more constraints.
*/
predicate unreachable(UnprunedCfgNode n) {
exists(UnprunedBasicBlock bb |
unreachableBB(bb) and bb.contains(n)
)
}
predicate reachable(UnprunedCfgNode n) {
exists(UnprunedBasicBlock bb |
reachableBB(bb) and bb.contains(n)
)
}
/** Holds if the basic block `bb` is unreachable due to
* one or more constraints.
*/
predicate unreachableBB(UnprunedBasicBlock bb) {
not reachableBB(bb)
}
/** Holds if the basic block `bb` is reachable despite
* constraints
*/
predicate reachableBB(UnprunedBasicBlock bb) {
bb.isEntry() or
reachableEdge(_, bb)
}
Constraint constraintFromExpr(SsaVariable var, UnprunedCfgNode node) {
py_ssa_use(node, var) and result = TTruthy(true)
or
exists(boolean b |
none_test(node, var, b) and result = TIsNone(b)
)
or
exists(CompareOp op, int k |
int_test(node, var, op, k) and
result = TConstrainedByConstant(op, k)
)
or
result = constraintFromExpr(var, node.(UnprunedNot).getOperand()).invert()
}
Constraint constraintFromTest(SsaVariable var, UnprunedCfgNode node) {
result = constraintFromExpr(var, node) and node.isBranch()
}
predicate none_test(UnprunedCompareNode test, SsaVariable var, boolean is) {
exists(UnprunedCfgNode left, Cmpop op, UnprunedCfgNode right |
py_ssa_use(left, var) and
test.operands(left, op, right) and
right.getNode() instanceof None
|
op instanceof Is and is = true
or
op instanceof IsNot and is = false
)
}
predicate int_test(UnprunedCfgNode test, SsaVariable var, CompareOp op, int k) {
exists(UnprunedCfgNode left, UnprunedCfgNode right, Cmpop cop |
test.(UnprunedCompareNode).operands(left, cop, right)
|
op.forOp(cop) and
py_ssa_use(left, var) and
intValue(right.getNode()) = k
or
op.reverse().forOp(cop) and
py_ssa_use(right, var) and
intValue(left.getNode()) = k
)
}
private predicate constrainingValue(Expr e) {
exists(Assign a, UnprunedCfgNode asgn |
a.getValue() = e and a.getATarget() = asgn.getNode() and py_ssa_defn(_, asgn)
)
or
exists(UnaryExpr n | constrainingValue(n) and n.getOp() instanceof Not and e = n.getOperand())
}
private Constraint constraintFromValue(Expr e) {
constrainingValue(e) and
(
result = TConstrainedByConstant(eq(), intValue(e))
or
e instanceof True and result = TTruthy(true)
or
e instanceof False and result = TTruthy(false)
or
e instanceof None and result = TIsNone(true)
or
result = constraintFromValue(e.(UnaryExpr).getOperand()).invert()
)
}
/** Gets the constraint on `var` resulting from the assignment in `asgn` */
Constraint constraintFromAssignment(SsaVariable var, UnprunedCfgNode asgn) {
exists(Assign a |
a.getATarget() = asgn.getNode() and
py_ssa_defn(var, asgn) and
result = constraintFromValue(a.getValue())
)
or
module_import(asgn, var) and result = TTruthy(true)
}
/** Holds if the constraint `preval` holds for `var` on edge `pred` -> `succ` as a result of a prior test or assignment */
pragma [nomagic]
predicate priorConstraint(UnprunedBasicBlock pred, UnprunedBasicBlock succ, Constraint preval, SsaVariable var) {
not (blacklisted(var) and preval = TTruthy(_))
and
not var.getVariable().escapes()
and
exists(UnprunedBasicBlock first |
not first = pred and
first.(UnprunedConditionBlock).controlsEdge(pred, succ, true) and
preval = constraintFromTest(var, first.last())
or
not first = pred and
first.(UnprunedConditionBlock).controlsEdge(pred, succ, false) and
preval = constraintFromTest(var, first.last()).invert()
or
preval = constraintFromAssignment(var, first.getANode()) and
first.dominates(pred) and
(succ = pred.getAFalseSuccessor() or succ = pred.getATrueSuccessor())
)
}
/** Holds if `cond` holds for `var` on conditional edge `pred` -> `succ` as a result of the test for that edge */
pragma [nomagic]
predicate constraintOnBranch(UnprunedBasicBlock pred, UnprunedBasicBlock succ, Constraint cond, SsaVariable var) {
cond = constraintFromTest(var, pred.last()) and
succ = pred.getATrueSuccessor()
or
cond = constraintFromTest(var, pred.last()).invert() and
succ = pred.getAFalseSuccessor()
}
/** Holds if the pair of constraints (`preval`, `postcond`) holds on the edge `pred` -> `succ` for some SSA variable */
predicate controllingConditions(UnprunedBasicBlock pred, UnprunedBasicBlock succ, Constraint preval, Constraint postcond) {
exists(SsaVariable var |
priorConstraint(pred, succ, preval, var) and
constraintOnBranch(pred, succ, postcond, var)
)
}
/** Holds if the edge `pred` -> `succ` should be pruned as it cannot be reached */
predicate unreachableEdge(UnprunedCfgNode pred, UnprunedCfgNode succ) {
exists(UnprunedBasicBlock predBB, UnprunedBasicBlock succBB |
succBB = predBB.getASuccessor() and
not reachableEdge(predBB, succBB) and
pred = predBB.last() and succ = succBB.first()
)
}
/** Holds if the edge `pred` -> `succ` is reachable as a result of
* `pred` being reachable and this edge not being pruned. */
predicate reachableEdge(UnprunedBasicBlock pred, UnprunedBasicBlock succ) {
reachableBB(pred) and succ = pred.getASuccessor() and
not contradictoryEdge(pred, succ) and
not simplyDead(pred, succ)
}
predicate contradictoryEdge(UnprunedBasicBlock pred, UnprunedBasicBlock succ) {
exists(Constraint pre, Constraint cond |
controllingConditions(pred, succ, pre, cond) and
contradicts(pre, cond)
)
}
/* Helper for `contradictoryEdge`, deal with inequalities here to avoid blow up */
pragma [inline]
private predicate contradicts(Constraint a, Constraint b) {
a = TIsNone(true) and b.cannotBeNone()
or
a.cannotBeNone() and b = TIsNone(true)
or
a.constrainsVariableToBe(true) and b.constrainsVariableToBe(false)
or
a.constrainsVariableToBe(false) and b.constrainsVariableToBe(true)
or
a.(ConstrainedByConstant).minValue() > b.(ConstrainedByConstant).maxValue()
or
a.(ConstrainedByConstant).maxValue() < b.(ConstrainedByConstant).minValue()
or
exists(int val |
a.(ConstrainedByConstant).eq(val) and b.(ConstrainedByConstant).ne(val)
or
a.(ConstrainedByConstant).ne(val) and b.(ConstrainedByConstant).eq(val)
)
}
/** Holds if edge is simply dead. Stuff like `if False: ...` */
predicate simplyDead(UnprunedBasicBlock pred, UnprunedBasicBlock succ) {
constTest(pred.last()) = true and pred.getAFalseSuccessor() = succ
or
constTest(pred.last()) = false and pred.getATrueSuccessor() = succ
}
/* Helper for simplyDead */
private boolean constTest(UnprunedCfgNode node) {
exists(ImmutableLiteral lit |
result = lit.booleanValue() and lit = node.getNode()
)
or
result = constTest(node.(UnprunedNot).getOperand()).booleanNot()
}
/** Holds if `var` is blacklisted as having possibly been mutated */
predicate blacklisted(SsaVariable var) {
possibly_mutated(var) and not whitelisted(var)
}
predicate possibly_mutated(SsaVariable var) {
exists(Subscript subscr, UnprunedCfgNode node |
subscr.getObject() = node.getNode() and
py_ssa_use(node, var)
)
or
exists(Attribute attr, UnprunedCfgNode node |
attr.getObject() = node.getNode() and
py_ssa_use(node, var)
)
}
/** If SSA variable is defined by an import, then it should
* be whitelisted as taking an attribute cannot change its
* truthiness.
*/
predicate whitelisted(SsaVariable var) {
module_import(_, var)
}
private predicate module_import(UnprunedCfgNode asgn, SsaVariable var) {
exists(Alias alias |
alias.getValue() instanceof ImportExpr and
py_ssa_defn(var, asgn) and
alias.getAsname() = asgn.getNode()
)
}
}

View File

@@ -95,6 +95,18 @@ class Assign extends Assign_ {
}
}
/** An assignment statement */
class AssignStmt extends Assign {
AssignStmt() {
not this instanceof FunctionDef and not this instanceof ClassDef
}
override string toString() {
result = "AssignStmt"
}
}
/** An augmented assignment statement, such as `x += y` */
class AugAssign extends AugAssign_ {

View File

@@ -1,143 +0,0 @@
import python
private import semmle.python.pointsto.Base
/** A control flow node corresponding to a (plain variable) name expression, such as `var`.
* `None`, `True` and `False` are excluded.
*/
class NameNode extends ControlFlowNode {
NameNode() {
exists(Name n | py_flow_bb_node(this, n, _, _))
or
exists(PlaceHolder p | py_flow_bb_node(this, p, _, _))
}
/** Whether this flow node defines the variable `v`. */
predicate defines(Variable v) {
exists(Name d | this.getNode() = d and d.defines(v))
and not this.isLoad()
}
/** Whether this flow node deletes the variable `v`. */
predicate deletes(Variable v) {
exists(Name d | this.getNode() = d and d.deletes(v))
}
/** Whether this flow node uses the variable `v`. */
predicate uses(Variable v) {
this.isLoad() and exists(Name u | this.getNode() = u and u.uses(v))
or
exists(PlaceHolder u | this.getNode() = u and u.getVariable() = v and u.getCtx() instanceof Load)
or
use_of_global_variable(this, v.getScope(), v.getId())
}
string getId() {
result = this.getNode().(Name).getId()
or
result = this.getNode().(PlaceHolder).getId()
}
/** Whether this is a use of a local variable. */
predicate isLocal() {
local(this)
}
/** Whether this is a use of a non-local variable. */
predicate isNonLocal() {
non_local(this)
}
/** Whether this is a use of a global (including builtin) variable. */
predicate isGlobal() {
use_of_global_variable(this, _, _)
}
predicate isSelf() {
exists(SsaVariable selfvar |
selfvar.isSelf() and selfvar.getAUse() = this
)
}
}
private predicate fast_local(NameNode n) {
exists(FastLocalVariable v |
n.uses(v) and
v.getScope() = n.getScope()
)
}
private predicate local(NameNode n) {
fast_local(n)
or
exists(SsaVariable var |
var.getAUse() = n and
n.getScope() instanceof Class and
exists(var.getDefinition())
)
}
private predicate non_local(NameNode n) {
exists(FastLocalVariable flv |
flv.getALoad() = n.getNode() and
not flv.getScope() = n.getScope()
)
}
// magic is fine, but we get questionable join-ordering of it
pragma [nomagic]
private predicate use_of_global_variable(NameNode n, Module scope, string name) {
n.isLoad() and
not non_local(n)
and
not exists(SsaVariable var |
var.getAUse() = n |
var.getVariable() instanceof FastLocalVariable
or
n.getScope() instanceof Class and
not maybe_undefined(var)
)
and name = n.getId()
and scope = n.getEnclosingModule()
}
private predicate maybe_defined(SsaVariable var) {
exists(var.getDefinition()) and not py_ssa_phi(var, _) and not var.getDefinition().isDelete()
or
exists(SsaVariable input |
input = var.getAPhiInput() |
maybe_defined(input)
)
}
private predicate maybe_undefined(SsaVariable var) {
not exists(var.getDefinition()) and not py_ssa_phi(var, _)
or
var.getDefinition().isDelete()
or
maybe_undefined(var.getAPhiInput())
or
exists(BasicBlock incoming |
exists(var.getAPhiInput()) and
incoming.getASuccessor() = var.getDefinition().getBasicBlock() and
not var.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming)
)
}
/** A control flow node corresponding to a named constant, one of `None`, `True` or `False`. */
class NameConstantNode extends NameNode {
NameConstantNode() {
exists(NameConstant n | py_flow_bb_node(this, n, _, _))
}
override deprecated predicate defines(Variable v) { none() }
override deprecated predicate deletes(Variable v) { none() }
/* We ought to override uses as well, but that has
* a serious performance impact.
deprecated predicate uses(Variable v) { none() }
*/
}

View File

@@ -97,6 +97,12 @@ abstract class ClassObjectInternal extends ObjectInternal {
/* Classes aren't usually iterable, but can e.g. Enums */
override ObjectInternal getIterNext() { result = ObjectInternal::unknown() }
override predicate hasAttribute(string name) {
this.getClassDeclaration().declaresAttribute(name)
or
Types::getBase(this, _).hasAttribute(name)
}
}
/** Class representing Python source classes */

View File

@@ -78,6 +78,7 @@ abstract class ConstantObjectInternal extends ObjectInternal {
}
pragma[nomagic]
private boolean callToBool(CallNode call, PointsToContext context) {
PointsToInternal::pointsTo(call.getFunction(), context, ClassValue::bool(), _) and
exists(ObjectInternal arg |

View File

@@ -59,6 +59,18 @@ abstract class ModuleObjectInternal extends ObjectInternal {
/* Modules aren't iterable */
override ObjectInternal getIterNext() { none() }
/** Holds if this module "exports" name.
* That is, does it define `name` in `__all__` or is
* `__all__` not defined and `name` a global variable that does not start with "_"
* This is the set of names imported by `from ... import *`.
*/
predicate exports(string name) {
not this.(ModuleObjectInternal).attribute("__all__", _, _) and this.hasAttribute(name)
and not name.charAt(0) = "_"
or
py_exports(this.getSourceModule(), name)
}
}
/** A class representing built-in modules */
@@ -209,6 +221,13 @@ class PackageObjectInternal extends ModuleObjectInternal, TPackageObject {
)
}
/** Holds if this value has the attribute `name` */
override predicate hasAttribute(string name) {
this.getInitModule().hasAttribute(name)
or
exists(this.submodule(name))
}
}
/** A class representing Python modules */
@@ -261,6 +280,24 @@ class PythonModuleObjectInternal extends ModuleObjectInternal, TPythonModule {
result = this.getSourceModule().getEntryNode()
}
/** Holds if this value has the attribute `name` */
override predicate hasAttribute(string name) {
name = "__name__"
or
this.getSourceModule().(ImportTimeScope).definesName(name)
or
exists(ModuleObjectInternal mod, ImportStarNode imp |
PointsToInternal::pointsTo(imp, _, mod, _) and
imp.getScope() = this.getSourceModule() and
mod.exports(name)
)
or
exists(ObjectInternal defined |
this.attribute(name, defined, _) and
not defined instanceof UndefinedInternal
)
}
}
/** A class representing a module that is missing from the DB, but inferred to exists from imports. */

View File

@@ -94,6 +94,11 @@ class Value extends TObject {
result = this.(ObjectInternal).getName()
}
/** Holds if this value has the attribute `name` */
predicate hasAttribute(string name) {
this.(ObjectInternal).hasAttribute(name)
}
}
/** Class representing modules in the Python program
@@ -111,10 +116,7 @@ class ModuleValue extends Value {
* This is the set of names imported by `from ... import *`.
*/
predicate exports(string name) {
not this.(ModuleObjectInternal).attribute("__all__", _, _) and exists(this.attr(name))
and not name.charAt(0) = "_"
or
py_exports(this.getScope(), name)
PointsTo::moduleExports(this, name)
}
/** Gets the scope for this module, provided that it is a Python module. */

View File

@@ -182,6 +182,11 @@ class ObjectInternal extends TObject {
*/
abstract ObjectInternal getIterNext();
/** Holds if this value has the attribute `name` */
predicate hasAttribute(string name) {
this.(ObjectInternal).attribute(name, _, _)
}
}

View File

@@ -166,6 +166,10 @@ module PointsTo {
)
}
cached predicate moduleExports(ModuleObjectInternal mod, string name) {
InterModulePointsTo::moduleExportsBoolean(mod, name) = true
}
}
cached module PointsToInternal {
@@ -1244,39 +1248,63 @@ module Expressions {
index = subscr.getIndex()
}
/** Track bitwise expressions so we can handle integer flags and enums.
* Tracking too many binary expressions is likely to kill performance.
/** Tracking too many binary expressions is likely to kill performance, so just say anything other than addition or bitwise or is 'unknown'.
*/
pragma [noinline]
predicate binaryPointsTo(BinaryExprNode b, PointsToContext context, ObjectInternal value, ControlFlowNode origin, ControlFlowNode operand, ObjectInternal opvalue) {
origin = b and
exists(ControlFlowNode left, Operator op, ControlFlowNode right |
b.operands(left, op, right)
operand = genericBinaryOperand(b) and
PointsToInternal::pointsTo(operand, context, opvalue, _) and
value = ObjectInternal::unknown()
}
private ControlFlowNode genericBinaryOperand(BinaryExprNode b) {
exists(Operator op |
b.operands(result, op, _)
or
b.operands(_, op, result)
|
not op instanceof BitOr and
(operand = left or operand = right) and
PointsToInternal::pointsTo(operand, context, opvalue, _) and
(
op instanceof Add and
value = TUnknownInstance(opvalue.getClass())
or
not op instanceof Add and
value = ObjectInternal::unknown()
)
not op instanceof Add
)
}
pragma [noinline]
predicate addPointsTo(BinaryExprNode b, PointsToContext context, ObjectInternal value, ControlFlowNode origin, ControlFlowNode operand, ObjectInternal opvalue) {
origin = b and
exists(Operator op |
b.operands(operand, op, _)
or
b.operands(_, op, operand)
|
op instanceof Add and
PointsToInternal::pointsTo(operand, context, opvalue, _) and
value = TUnknownInstance(opvalue.getClass())
)
}
pragma [noinline]
predicate bitOrPointsTo(BinaryExprNode b, PointsToContext context, ObjectInternal value, ControlFlowNode origin, ControlFlowNode operand, ObjectInternal opvalue) {
origin = b and
exists(Operator op, ControlFlowNode other |
b.operands(operand, op, other)
or
b.operands(other, op, operand)
|
op instanceof BitOr and
exists(ObjectInternal lobj, ObjectInternal robj |
PointsToInternal::pointsTo(left, context, lobj, _) and
PointsToInternal::pointsTo(right, context, robj, _) and
value = TInt(lobj.intValue().bitOr(robj.intValue()))
|
left = operand and opvalue = lobj
or
right = operand and opvalue = robj
exists(ObjectInternal obj, int i1, int i2 |
pointsToInt(operand, context, opvalue, i1) and
pointsToInt(other, context, obj, i2) and
value = TInt(i1.bitOr(i2))
)
)
}
predicate pointsToInt(ControlFlowNode n, PointsToContext context, ObjectInternal obj, int value) {
PointsToInternal::pointsTo(n, context, obj, _) and
value = obj.intValue()
}
pragma [noinline]
predicate unaryPointsTo(UnaryExprNode u, PointsToContext context, ObjectInternal value, ControlFlowNode origin, ControlFlowNode operand, ObjectInternal opvalue) {
exists(Unaryop op |
@@ -1518,6 +1546,10 @@ module Expressions {
or
subscriptPointsTo(expr, context, value, origin, subexpr, subvalue)
or
addPointsTo(expr, context, value, origin, subexpr, subvalue)
or
bitOrPointsTo(expr, context, value, origin, subexpr, subvalue)
or
binaryPointsTo(expr, context, value, origin, subexpr, subvalue)
or
unaryPointsTo(expr, context, value, origin, subexpr, subvalue)

View File

@@ -151,7 +151,7 @@ class ClassObject extends Object {
/** Whether this class has a attribute named `name`, either declared or inherited.*/
predicate hasAttribute(string name) {
Types::getMro(theClass()).getAnItem().getClassDeclaration().declaresAttribute(name)
theClass().hasAttribute(name)
}
/** Whether it is impossible to know all the attributes of this class. Usually because it is

View File

@@ -128,6 +128,20 @@ class BottleRoutePointToExtension extends PointsToExtension {
/* Python 3.6+ regex module constants */
string short_flag(string flag) {
(flag = "ASCII" or
flag = "IGNORECASE" or
flag = "LOCALE" or
flag = "UNICODE" or
flag = "MULTILINE" or
flag = "TEMPLATE")
and result = flag.prefix(1)
or
flag = "DOTALL" and result = "S"
or
flag = "VERBOSE" and result = "X"
}
class ReModulePointToExtension extends PointsToExtension {
string name;
@@ -139,9 +153,10 @@ class ReModulePointToExtension extends PointsToExtension {
}
override predicate pointsTo(Context context, ObjectInternal value, ControlFlowNode origin) {
exists(ModuleObjectInternal sre_constants, CfgOrigin orig |
exists(ModuleObjectInternal sre_constants, CfgOrigin orig, string flag |
(name = flag or name = short_flag(flag)) and
sre_constants.getName() = "sre_constants" and
sre_constants.attribute("SRE_FLAG_" + name, value, orig) and
sre_constants.attribute("SRE_FLAG_" + flag, value, orig) and
origin = orig.asCfgNodeOrHere(this)
)
and pointsTo_helper(context)

View File

@@ -51,9 +51,8 @@ abstract class ModuleObject extends Object {
result = this.getAttribute(name)
}
predicate hasAttribute(string name) {
exists(theModule().attr(name))
theModule().hasAttribute(name)
}
predicate attributeRefersTo(string name, Object obj, ControlFlowNode origin) {