Merge pull request #4423 from tausbn/python-add-attribute-access-interface

Approved by RasmusWL
This commit is contained in:
CodeQL CI
2020-10-13 02:56:21 -07:00
committed by GitHub
9 changed files with 426 additions and 10 deletions

View File

@@ -6,7 +6,7 @@ private import internal.DataFlowPrivate
/** Any string that may appear as the name of an attribute or access path. */
class AttributeName extends string {
AttributeName() { this = any(Attribute a).getName() }
AttributeName() { this = any(AttrRef a).getAttributeName() }
}
/** Either an attribute name, or the empty string (representing no attribute). */
@@ -115,11 +115,10 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
exists(AttributeAssignment a, Node var |
a.getName() = attr and
simpleLocalFlowStep*(nodeTo, var) and
var.asVar() = a.getInput() and
nodeFrom.asCfgNode() = a.getValue()
exists(AttrWrite a |
a.mayHaveAttributeName(attr) and
nodeFrom = a.getValue() and
simpleLocalFlowStep*(nodeTo, a.getObject())
)
}
@@ -127,7 +126,11 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
*/
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
exists(AttrRead a |
a.mayHaveAttributeName(attr) and
nodeFrom = a.getObject() and
nodeTo = a
)
}
/**

View File

@@ -0,0 +1,256 @@
/** This module provides an API for attribute reads and writes. */
import DataFlowUtil
import DataFlowPublic
private import DataFlowPrivate
/**
* A data flow node that reads or writes an attribute of an object.
*
* This abstract base class only knows about the base object on which the attribute is being
* accessed, and the attribute itself, if it is statically inferrable.
*/
abstract class AttrRef extends Node {
/**
* Gets the data flow node corresponding to the object whose attribute is being read or written.
*/
abstract Node getObject();
/**
* Gets the expression node that defines the attribute being accessed, if any. This is
* usually an identifier or literal.
*/
abstract ExprNode getAttributeNameExpr();
/**
* Holds if this attribute reference may access an attribute named `attrName`.
* Uses local data flow to track potential attribute names, which may lead to imprecision. If more
* precision is needed, consider using `getAttributeName` instead.
*/
predicate mayHaveAttributeName(string attrName) {
attrName = this.getAttributeName()
or
exists(Node nodeFrom |
localFlow(nodeFrom, this.getAttributeNameExpr()) and
attrName = nodeFrom.asExpr().(StrConst).getText()
)
}
/**
* Gets the name of the attribute being read or written. For dynamic attribute accesses, this
* method is not guaranteed to return a result. For such cases, using `mayHaveAttributeName` may yield
* better results.
*/
abstract string getAttributeName();
}
/**
* A data flow node that writes an attribute of an object. This includes
* - Simple attribute writes: `object.attr = value`
* - Dynamic attribute writes: `setattr(object, attr, value)`
* - Fields written during class initialization: `class MyClass: attr = value`
*/
abstract class AttrWrite extends AttrRef {
/** Gets the data flow node corresponding to the value that is written to the attribute. */
abstract Node getValue();
}
/**
* Represents a control flow node for a simple attribute assignment. That is,
* ```python
* object.attr = value
* ```
* Also gives access to the `value` being written, by extending `DefinitionNode`.
*/
private class AttributeAssignmentNode extends DefinitionNode, AttrNode, DataFlowCfgNode {
override ControlFlowNode getValue() { result = DefinitionNode.super.getValue() }
}
/** A simple attribute assignment: `object.attr = value`. */
private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
override AttributeAssignmentNode node;
override Node getValue() { result.asCfgNode() = node.getValue() }
override Node getObject() { result.asCfgNode() = node.getObject() }
override ExprNode getAttributeNameExpr() {
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
// identifiers, and are therefore represented directly as strings.
// Use `getAttributeName` to access the name of the attribute.
none()
}
override string getAttributeName() { result = node.getName() }
}
import semmle.python.types.Builtins
/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */
private class BuiltInCallNode extends CallNode, DataFlowCfgNode {
string name;
BuiltInCallNode() {
// TODO disallow instances where the name of the built-in may refer to an in-scope variable of that name.
exists(NameNode id | this.getFunction() = id and id.getId() = name and id.isGlobal()) and
name = any(Builtin b).getName()
}
/** Gets the name of the built-in function that is called at this `CallNode` */
string getBuiltinName() { result = name }
}
/**
* Represents a call to the built-ins that handle dynamic inspection and modification of
* attributes: `getattr`, `setattr`, `hasattr`, and `delattr`.
*/
private class BuiltinAttrCallNode extends BuiltInCallNode {
BuiltinAttrCallNode() { name in ["setattr", "getattr", "hasattr", "delattr"] }
/** Gets the control flow node for object on which the attribute is accessed. */
ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] }
/**
* Gets the control flow node for the value that is being written to the attribute.
* Only relevant for `setattr` calls.
*/
ControlFlowNode getValue() {
// only valid for `setattr`
name = "setattr" and
result in [this.getArg(2), this.getArgByName("value")]
}
/** Gets the control flow node that defines the name of the attribute being accessed. */
ControlFlowNode getName() { result in [this.getArg(1), this.getArgByName("name")] }
}
/** Represents calls to the built-in `setattr`. */
private class SetAttrCallNode extends BuiltinAttrCallNode {
SetAttrCallNode() { name = "setattr" }
}
/** Represents calls to the built-in `getattr`. */
private class GetAttrCallNode extends BuiltinAttrCallNode {
GetAttrCallNode() { name = "getattr" }
}
/** An attribute assignment using `setattr`, e.g. `setattr(object, attr, value)` */
private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode {
override SetAttrCallNode node;
override Node getValue() { result.asCfgNode() = node.getValue() }
override Node getObject() { result.asCfgNode() = node.getObject() }
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
override string getAttributeName() {
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
}
}
/**
* Represents an attribute of a class that is assigned statically during class definition. For instance
* ```python
* class MyClass:
* attr = value
* ...
* ```
* Instances of this class correspond to the `NameNode` for `attr`, and also gives access to `value` by
* virtue of being a `DefinitionNode`.
*/
private class ClassAttributeAssignmentNode extends DefinitionNode, NameNode, DataFlowCfgNode { }
/**
* An attribute assignment via a class field, e.g.
* ```python
* class MyClass:
* attr = value
* ```
* is treated as equivalent to `MyClass.attr = value`.
*/
private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
ClassExpr cls;
override ClassAttributeAssignmentNode node;
ClassDefinitionAsAttrWrite() { node.getScope() = cls.getInnerScope() }
override Node getValue() { result.asCfgNode() = node.getValue() }
override Node getObject() { result.asCfgNode() = cls.getAFlowNode() }
override ExprNode getAttributeNameExpr() { none() }
override string getAttributeName() { result = node.getId() }
}
/**
* A read of an attribute on an object. This includes
* - Simple attribute reads: `object.attr`
* - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
* - Qualified imports: `from module import attr as name`
*/
abstract class AttrRead extends AttrRef, Node { }
/**
* A convenience class for embedding `AttrNode` into `DataFlowCfgNode`, as the former is not
* obviously a subtype of the latter.
*/
private class DataFlowAttrNode extends AttrNode, DataFlowCfgNode { }
/** A simple attribute read, e.g. `object.attr` */
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
override DataFlowAttrNode node;
override Node getObject() { result.asCfgNode() = node.getObject() }
override ExprNode getAttributeNameExpr() {
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
// identifiers, and are therefore represented directly as strings.
// Use `getAttributeName` to access the name of the attribute.
none()
}
override string getAttributeName() { result = node.getName() }
}
/** An attribute read using `getattr`: `getattr(object, attr)` */
private class GetAttrCallAsAttrRead extends AttrRead, CfgNode {
override GetAttrCallNode node;
override Node getObject() { result.asCfgNode() = node.getObject() }
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
override string getAttributeName() {
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
}
}
/**
* A convenience class for embedding `ImportMemberNode` into `DataFlowCfgNode`, as the former is not
* obviously a subtype of the latter.
*/
private class DataFlowImportMemberNode extends ImportMemberNode, DataFlowCfgNode { }
/**
* Represents a named import as an attribute read. That is,
* ```python
* from module import attr as attr_ref
* ```
* is treated as if it is a read of the attribute `module.attr`, even if `module` is not imported directly.
*/
private class ModuleAttributeImportAsAttrRead extends AttrRead, CfgNode {
override DataFlowImportMemberNode node;
override Node getObject() { result.asCfgNode() = node.getModule(_) }
override ExprNode getAttributeNameExpr() {
// The name of an imported attribute doesn't exist as a `Node` in the control flow graph, as it
// can only ever be an identifier, and is therefore represented directly as a string.
// Use `getAttributeName` to access the name of the attribute.
none()
}
override string getAttributeName() { exists(node.getModule(result)) }
}

View File

@@ -5,6 +5,7 @@
private import python
private import DataFlowPrivate
import experimental.dataflow.TypeTracker
import Attributes
private import semmle.python.essa.SsaCompute
/**

View File

@@ -18,7 +18,7 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
/**
* Gets an EssaNode that holds the module imported by `name`.
* Gets a `Node` that refers to the module referenced by `name`.
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
* reference to the module `pkg`.
*
@@ -27,6 +27,9 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
* 2. `from <package> import <module>` when `<name> = <package> + "." + <module>`
* 3. `from <module> import <member>` when `<name> = <module> + "." + <member>`
*
* Finally, in `from <module> import <member>` we consider the `ImportExpr` corresponding to
* `<module>` to be a reference to that module.
*
* Note:
* While it is technically possible that `import mypkg.foo` and `from mypkg import foo` can give different values,
* it's highly unlikely that this will be a problem in production level code.
@@ -36,7 +39,7 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
*
* Also see `DataFlow::importMember`
*/
EssaNode importModule(string name) {
Node importModule(string name) {
exists(Variable var, Import imp, Alias alias |
alias = imp.getAName() and
alias.getAsname() = var.getAStore() and
@@ -45,8 +48,29 @@ EssaNode importModule(string name) {
or
name = alias.getValue().(ImportExpr).getImportedModuleName()
) and
result.getVar().(AssignmentDefinition).getSourceVariable() = var
result.(EssaNode).getVar().(AssignmentDefinition).getSourceVariable() = var
)
or
// Although it may seem superfluous to consider the `foo` part of `from foo import bar as baz` to
// be a reference to a module (since that reference only makes sense locally within the `import`
// statement), it's important for our use of type trackers to consider this local reference to
// also refer to the `foo` module. That way, if one wants to track references to the `bar`
// attribute using a type tracker, one can simply write
//
// ```ql
// DataFlow::Node bar_attr_tracker(TypeTracker t) {
// t.startInAttr("bar") and
// result = foo_module_tracker()
// or
// exists(TypeTracker t2 | result = bar_attr_tracker(t2).track(t2, t))
// }
// ```
//
// Where `foo_module_tracker` is a type tracker that tracks references to the `foo` module.
// Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz`
// is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a
// reference to `foo.bar`, as desired.
result.asCfgNode().getNode() = any(ImportExpr i | i.getAnImportedModuleName() = name)
}
/**

View File

@@ -1,16 +1,36 @@
importModule
| test1.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg |
| test1.py:1:8:1:12 | GSSA Variable mypkg | mypkg |
| test2.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg |
| test2.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg |
| test2.py:1:19:1:21 | GSSA Variable foo | mypkg.foo |
| test2.py:1:24:1:26 | GSSA Variable bar | mypkg.bar |
| test3.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg |
| test3.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg.foo |
| test3.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg |
| test3.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg.bar |
| test3.py:2:8:2:16 | GSSA Variable mypkg | mypkg |
| test4.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg |
| test4.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg.foo |
| test4.py:1:21:1:24 | GSSA Variable _foo | mypkg.foo |
| test4.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg |
| test4.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg.bar |
| test4.py:2:21:2:24 | GSSA Variable _bar | mypkg.bar |
| test5.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg |
| test5.py:1:8:1:12 | GSSA Variable mypkg | mypkg |
| test5.py:9:6:9:10 | ControlFlowNode for ImportExpr | mypkg |
| test5.py:9:26:9:29 | GSSA Variable _bar | mypkg.bar |
| test6.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg |
| test6.py:1:8:1:12 | GSSA Variable mypkg | mypkg |
| test6.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg |
| test6.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg.foo |
| test6.py:5:8:5:16 | GSSA Variable mypkg | mypkg |
| test7.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg |
| test7.py:1:19:1:21 | GSSA Variable foo | mypkg.foo |
| test7.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg |
| test7.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg.foo |
| test7.py:5:8:5:16 | GSSA Variable mypkg | mypkg |
| test7.py:9:6:9:10 | ControlFlowNode for ImportExpr | mypkg |
| test7.py:9:19:9:21 | GSSA Variable foo | mypkg.foo |
importMember
| test2.py:1:19:1:21 | GSSA Variable foo | mypkg | foo |

View File

@@ -29,3 +29,73 @@ def test_incompatible_types():
expects_int(x) # $int=field $f+:str=field
x.field = str("Hello") # $f+:int=field $str=field $f+:int $str
expects_string(x) # $f+:int=field $str=field
# Attributes assigned statically to a class
class MyClass: # $tracked=field
field = tracked # $tracked
lookup = MyClass.field # $tracked $tracked=field
instance = MyClass() # $tracked=field
lookup2 = instance.field # $f-:tracked
## Dynamic attribute access
# Via `getattr`/`setattr`
def setattr_immediate_write():
x = SomeClass() # $tracked=foo
setattr(x,"foo", tracked) # $tracked $tracked=foo
y = x.foo # $tracked $tracked=foo
do_stuff(y) # $tracked
def getattr_immediate_read():
x = SomeClass() # $tracked=foo
x.foo = tracked # $tracked $tracked=foo
y = getattr(x,"foo") # $tracked $tracked=foo
do_stuff(y) # $tracked
def setattr_indirect_write():
attr = "foo"
x = SomeClass() # $tracked=foo
setattr(x, attr, tracked) # $tracked $tracked=foo
y = x.foo # $tracked $tracked=foo
do_stuff(y) # $tracked
def getattr_indirect_read():
attr = "foo"
x = SomeClass() # $tracked=foo
x.foo = tracked # $tracked $tracked=foo
y = getattr(x, attr) #$tracked $tracked=foo
do_stuff(y) # $tracked
# Via `__dict__` -- not currently implemented.
def dunder_dict_immediate_write():
x = SomeClass() # $f-:tracked=foo
x.__dict__["foo"] = tracked # $tracked $f-:tracked=foo
y = x.foo # $f-:tracked $f-:tracked=foo
do_stuff(y) # $f-:tracked
def dunder_dict_immediate_read():
x = SomeClass() # $tracked=foo
x.foo = tracked # $tracked $tracked=foo
y = x.__dict__["foo"] # $f-:tracked $tracked=foo
do_stuff(y) # $f-:tracked
def dunder_dict_indirect_write():
attr = "foo"
x = SomeClass() # $f-:tracked=foo
x.__dict__[attr] = tracked # $tracked $f-:tracked=foo
y = x.foo # $f-:tracked $f-:tracked=foo
do_stuff(y) # $f-:tracked
def dunder_dict_indirect_read():
attr = "foo"
x = SomeClass() # $tracked=foo
x.foo = tracked # $tracked $tracked=foo
y = x.__dict__[attr] # $f-:tracked $tracked=foo
do_stuff(y) # $f-:tracked

View File

@@ -0,0 +1,9 @@
from module import attr as attr_ref
x = attr_ref
def fun():
y = attr_ref
# The following should _not_ be a reference to the above module, since we don't actually import it.
z = module

View File

@@ -0,0 +1,10 @@
module_tracker
| import_as_attr.py:1:6:1:11 | ControlFlowNode for ImportExpr |
module_attr_tracker
| import_as_attr.py:0:0:0:0 | ModuleVariableNode for Global Variable attr_ref in Module import_as_attr |
| import_as_attr.py:1:20:1:35 | ControlFlowNode for ImportMember |
| import_as_attr.py:1:28:1:35 | GSSA Variable attr_ref |
| import_as_attr.py:3:1:3:1 | GSSA Variable x |
| import_as_attr.py:3:5:3:12 | ControlFlowNode for attr_ref |
| import_as_attr.py:6:5:6:5 | SSA variable y |
| import_as_attr.py:6:9:6:16 | ControlFlowNode for attr_ref |

View File

@@ -0,0 +1,23 @@
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TypeTracker
DataFlow::Node module_tracker(TypeTracker t) {
t.start() and
result = DataFlow::importModule("module")
or
exists(TypeTracker t2 | result = module_tracker(t2).track(t2, t))
}
query DataFlow::Node module_tracker() { result = module_tracker(DataFlow::TypeTracker::end()) }
DataFlow::Node module_attr_tracker(TypeTracker t) {
t.startInAttr("attr") and
result = module_tracker()
or
exists(TypeTracker t2 | result = module_attr_tracker(t2).track(t2, t))
}
query DataFlow::Node module_attr_tracker() {
result = module_attr_tracker(DataFlow::TypeTracker::end())
}