Python: Dataflow: Remove IterationDefinition ESSA definition and add iteration assignment to ESSA assignment definition.

Enhance points-to and taint-tracking to add operational step sequence to next(iter(seq)) in for statement.
This commit is contained in:
Mark Shannon
2019-06-21 09:45:50 +01:00
parent 927d72414b
commit 9d6df78d44
32 changed files with 394 additions and 66 deletions

View File

@@ -741,6 +741,8 @@ class DefinitionNode extends ControlFlowNode {
exists(Assign a | a.getATarget().(Tuple).getAnElt().getAFlowNode() = this)
or
exists(Assign a | a.getATarget().(List).getAnElt().getAFlowNode() = this)
or
exists(For for | for.getTarget().getAFlowNode() = this)
}
/** flow node corresponding to the value assigned for the definition corresponding to this flow node */
@@ -860,7 +862,7 @@ class DictNode extends ControlFlowNode {
}
private Expr assigned_value(Expr lhs) {
private AstNode assigned_value(Expr lhs) {
/* lhs = result */
exists(Assign a | a.getATarget() = lhs and result = a.getValue())
or
@@ -877,6 +879,8 @@ private Expr assigned_value(Expr lhs) {
lhs = target.getElt(index) and
result = values.getElt(index)
)
or
result.(For).getTarget() = lhs
}
/** A flow node for a `for` statement. */

View File

@@ -58,12 +58,10 @@ abstract class PythonSsaSourceVariable extends SsaSourceVariable {
or
SsaSource::assignment_definition(this, def, _)
or
SsaSource::multi_assignment_definition(this, def)
SsaSource::multi_assignment_definition(this, def, _, _)
or
SsaSource::deletion_definition(this, def)
or
SsaSource::iteration_defined_variable(this, def, _)
or
SsaSource::init_module_submodule_defn(this, def)
or
SsaSource::parameter_definition(this, def)
@@ -381,10 +379,11 @@ cached module SsaSource {
}
/** Holds if `v` is defined by multiple assignment at `defn`. */
cached predicate multi_assignment_definition(Variable v, ControlFlowNode defn) {
cached predicate multi_assignment_definition(Variable v, ControlFlowNode defn, int n, SequenceNode lhs) {
defn.(NameNode).defines(v) and
not exists(defn.(DefinitionNode).getValue()) and
exists(SequenceNode s | s.getAnElement() = defn)
lhs.getElement(n) = defn and
lhs.getBasicBlock().dominates(defn.getBasicBlock())
}
/** Holds if `v` is defined by a `for` statement, the definition being `defn` */

View File

@@ -48,6 +48,9 @@ abstract class CallableObjectInternal extends ObjectInternal {
override string strValue() { none() }
/* Callables aren't iterable */
override ObjectInternal getIterNext() { none() }
}
/** Class representing Python functions */

View File

@@ -89,6 +89,10 @@ abstract class ClassObjectInternal extends ObjectInternal {
}
override predicate subscriptUnknown() { none() }
/* Classes aren't usually iterable, but can e.g. Enums */
override ObjectInternal getIterNext() { result = ObjectInternal::unknown() }
}
/** Class representing Python source classes */

View File

@@ -83,6 +83,9 @@ private abstract class BooleanObjectInternal extends ConstantObjectInternal {
none()
}
/* Booleans aren't iterable */
override ObjectInternal getIterNext() { none() }
}
private class TrueObjectInternal extends BooleanObjectInternal, TTrue {
@@ -165,6 +168,9 @@ private class NoneObjectInternal extends ConstantObjectInternal, TNone {
override int length() { none() }
/* None isn't iterable */
override ObjectInternal getIterNext() { none() }
}
@@ -203,6 +209,9 @@ private class IntObjectInternal extends ConstantObjectInternal, TInt {
override int length() { none() }
/* ints aren't iterable */
override ObjectInternal getIterNext() { none() }
}
private class FloatObjectInternal extends ConstantObjectInternal, TFloat {
@@ -248,6 +257,9 @@ private class FloatObjectInternal extends ConstantObjectInternal, TFloat {
override int length() { none() }
/* floats aren't iterable */
override ObjectInternal getIterNext() { none() }
}
@@ -290,6 +302,10 @@ private class UnicodeObjectInternal extends ConstantObjectInternal, TUnicode {
result = this.strValue().length()
}
override ObjectInternal getIterNext() {
result = TUnknownInstance(this.getClass())
}
}
private class BytesObjectInternal extends ConstantObjectInternal, TBytes {
@@ -331,6 +347,10 @@ private class BytesObjectInternal extends ConstantObjectInternal, TBytes {
result = this.strValue().length()
}
override ObjectInternal getIterNext() {
result = TUnknownInstance(this.getClass())
}
}

View File

@@ -91,6 +91,9 @@ class PropertyInternal extends ObjectInternal, TProperty {
)
}
/* Properties aren't iterable */
override ObjectInternal getIterNext() { none() }
}
/** A class representing classmethods in Python */
@@ -176,6 +179,9 @@ class ClassMethodObjectInternal extends ObjectInternal, TClassMethod {
result = this.getFunction().getName()
}
/* Classmethods aren't iterable */
override ObjectInternal getIterNext() { none() }
}
class StaticMethodObjectInternal extends ObjectInternal, TStaticMethod {
@@ -247,4 +253,7 @@ class StaticMethodObjectInternal extends ObjectInternal, TStaticMethod {
result = this.getFunction().getName()
}
/* Staticmethods aren't iterable */
override ObjectInternal getIterNext() { none() }
}

View File

@@ -51,6 +51,8 @@ abstract class InstanceObject extends ObjectInternal {
override string getName() { none() }
override ObjectInternal getIterNext() { result = ObjectInternal::unknown() }
}
private predicate self_variable_reaching_init_exit(EssaVariable self) {
@@ -366,6 +368,8 @@ class UnknownInstanceInternal extends TUnknownInstance, ObjectInternal {
override string getName() { none() }
override ObjectInternal getIterNext() { result = ObjectInternal::unknown() }
}
private int lengthFromClass(ClassObjectInternal cls) {
@@ -472,5 +476,7 @@ class SuperInstance extends TSuperInstance, ObjectInternal {
override string getName() { none() }
override ObjectInternal getIterNext() { result = ObjectInternal::unknown() }
}

View File

@@ -52,6 +52,9 @@ abstract class ModuleObjectInternal extends ObjectInternal {
any(PackageObjectInternal package).getInitModule() = this
}
/* Modules aren't iterable */
override ObjectInternal getIterNext() { none() }
}
/** A class representing built-in modules */
@@ -408,5 +411,8 @@ class AbsentModuleAttributeObjectInternal extends ObjectInternal, TAbsentModuleA
/* We know what this is called, but not its innate name */
override string getName() { none() }
/* Modules aren't iterable */
override ObjectInternal getIterNext() { none() }
}

View File

@@ -167,6 +167,12 @@ class ObjectInternal extends TObject {
*/
abstract string getName();
/** Gets the 'object' resulting from iterating over this object.
* Used in the context `for i in this:`. The result is the 'object'
* assigned to `i`.
*/
abstract ObjectInternal getIterNext();
}
@@ -249,6 +255,9 @@ class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject {
override string getName() {
result = this.getBuiltin().getName()
}
override ObjectInternal getIterNext() { result = ObjectInternal::unknown() }
}
@@ -326,6 +335,8 @@ class UnknownInternal extends ObjectInternal, TUnknown {
override string getName() { none() }
override ObjectInternal getIterNext() { result = ObjectInternal::unknown() }
}
class UndefinedInternal extends ObjectInternal, TUndefined {
@@ -404,6 +415,8 @@ class UndefinedInternal extends ObjectInternal, TUndefined {
override string getName() { none() }
override ObjectInternal getIterNext() { none() }
}
module ObjectInternal {

View File

@@ -32,6 +32,8 @@ abstract class SequenceObjectInternal extends ObjectInternal {
override string getName() { none() }
override ObjectInternal getIterNext() { result = this.getItem(_) }
}
abstract class TupleObjectInternal extends SequenceObjectInternal {

View File

@@ -234,13 +234,21 @@ class ExceptionCapture extends PyNodeDefinition {
class MultiAssignmentDefinition extends PyNodeDefinition {
MultiAssignmentDefinition() {
SsaSource::multi_assignment_definition(this.getSourceVariable(), this.getDefiningNode())
SsaSource::multi_assignment_definition(this.getSourceVariable(), this.getDefiningNode(), _, _)
}
override string getRepresentation() {
result = "..."
exists(ControlFlowNode value, int n |
this.indexOf(n, value) and
result = value.(DefinitionNode).getValue().getNode().toString() + "[" + n + "]"
)
}
predicate indexOf(int index, SequenceNode lhs) {
SsaSource::multi_assignment_definition(this.getSourceVariable(), this.getDefiningNode(), index, lhs)
}
}
@@ -294,21 +302,27 @@ class ParameterDefinition extends PyNodeDefinition {
}
/** A definition of a variable in a for loop `for v in ...:` */
class IterationDefinition extends PyNodeDefinition {
ControlFlowNode sequence;
private newtype TIterationDefinition =
TIterationDefinition_(SsaSourceVariable var, ControlFlowNode def, ControlFlowNode sequence) {
SsaSource::iteration_defined_variable(var, def, sequence)
}
IterationDefinition() {
SsaSource::iteration_defined_variable(this.getSourceVariable(), this.getDefiningNode(), sequence)
/** DEPRECATED. For backwards compatibility only.
* A definition of a variable in a for loop `for v in ...:` */
deprecated class IterationDefinition extends TIterationDefinition {
string toString() {
result = "IterationDefinition"
}
ControlFlowNode getSequence() {
result = sequence
this = TIterationDefinition_(_, _, result)
}
}
/** A deletion of a variable `del v` */
class DeletionDefinition extends PyNodeDefinition {

View File

@@ -208,6 +208,8 @@ cached module PointsToInternal {
AttributePointsTo::pointsTo(f, context, value, origin)
or
f.(PointsToExtension).pointsTo(context, value, origin)
or
iteration_points_to(f, context, value, origin)
}
/** Holds if the attribute `name` is required for `obj`
@@ -368,6 +370,20 @@ cached module PointsToInternal {
//)
}
/* Treat `ForNode` as intermediate step between sequence and iteration variable.
* In otherwords treat `for i in x:` as being equivalent to `i = next(iter(x))`
* attaching the value of `next(iter(x))` to the `ForNode`.
*/
pragma [noinline]
private predicate iteration_points_to(ForNode for, PointsToContext context, ObjectInternal value, ControlFlowNode origin) {
exists(ControlFlowNode seqNode, ObjectInternal seq |
for.iterates(_, seqNode) and
pointsTo(seqNode, context, seq, _) and
value = seq.getIterNext() and
origin = for
)
}
/** Holds if the ESSA definition `def` refers to `(value, origin)` given the context `context`. */
private predicate ssa_definition_points_to(EssaDefinition def, PointsToContext context, ObjectInternal value, CfgOrigin origin) {
ssa_phi_points_to(def, context, value, origin)
@@ -394,6 +410,8 @@ cached module PointsToInternal {
or
assignment_points_to(def, context, value, origin)
or
multi_assignment_points_to(def, context, value, origin)
or
self_parameter_points_to(def, context, value, origin)
or
delete_points_to(def, context, value, origin)
@@ -403,8 +421,6 @@ cached module PointsToInternal {
scope_entry_points_to(def, context, value, origin)
or
InterModulePointsTo::implicit_submodule_points_to(def, value, origin) and context.isImport()
or
iteration_definition_points_to(def, context, value, origin)
/*
* No points-to for non-local function entry definitions yet.
*/
@@ -484,6 +500,16 @@ cached module PointsToInternal {
pointsTo(def.getValue(), context, value, origin)
}
pragma [noinline]
private predicate multi_assignment_points_to(MultiAssignmentDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin) {
exists(int index, ControlFlowNode rhs, SequenceObjectInternal sequence |
def.indexOf(index, rhs) and
pointsTo(rhs, context, sequence, _) and
value = sequence.getItem(index) and
origin = def.getDefiningNode()
)
}
/** Points-to for deletion: `del name`. */
pragma [noinline]
private predicate delete_points_to(DeletionDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin) {
@@ -563,11 +589,6 @@ cached module PointsToInternal {
)
}
private predicate iteration_definition_points_to(IterationDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin) {
pointsTo(def.getSequence(), context, ObjectInternal::unknown(), _) and
value = ObjectInternal::unknown() and origin = def.getDefiningNode()
}
/** Holds if `f` is an expression node `tval if cond else fval` and points to `(value, origin)`. */
private predicate if_exp_points_to(IfExprNode f, PointsToContext context, ObjectInternal value, ControlFlowNode origin) {
pointsTo(f.getAnOperand(), context, value, origin)

View File

@@ -1102,8 +1102,8 @@ library module TaintFlowImplementation {
}
/** Holds if `v` is defined by a `for` statement, the definition being `defn` */
cached predicate iteration_step(TaintedNode fromnode, TrackedValue totaint, CallContext tocontext, ControlFlowNode iter) {
exists(ForNode for | for.iterates(iter, fromnode.getNode())) and
cached predicate iteration_step(TaintedNode fromnode, TrackedValue totaint, CallContext tocontext, ForNode for) {
for.iterates(_, fromnode.getNode()) and
totaint = TTrackedTaint(fromnode.getTaintKind().getTaintForIteration()) and
tocontext = fromnode.getContext()
}
@@ -1202,9 +1202,6 @@ library module TaintFlowImplementation {
tainted_with(def, context, origin)
or
tainted_exception_capture(def, context, origin)
or
tainted_iteration(def, context, origin)
}
predicate tainted_scope_entry(ScopeEntryDefinition def, CallContext context, TaintedNode origin) {
@@ -1407,11 +1404,6 @@ library module TaintFlowImplementation {
context = fromnode.getContext()
}
pragma [noinline]
private predicate tainted_iteration(IterationDefinition def, CallContext context, TaintedNode fromnode) {
def.getDefiningNode() = fromnode.getNode() and
context = fromnode.getContext()
}
/* A call that returns a copy (or similar) of the argument */
predicate copyCall(ControlFlowNode fromnode, CallNode tonode) {