mirror of
https://github.com/github/codeql.git
synced 2025-12-20 18:56:32 +01:00
Merge branch 'main' into python-model-invoke
This commit is contained in:
@@ -6,7 +6,7 @@ private import internal.DataFlowPrivate
|
||||
|
||||
/** Any string that may appear as the name of an attribute or access path. */
|
||||
class AttributeName extends string {
|
||||
AttributeName() { this = any(Attribute a).getName() }
|
||||
AttributeName() { this = any(AttrRef a).getAttributeName() }
|
||||
}
|
||||
|
||||
/** Either an attribute name, or the empty string (representing no attribute). */
|
||||
@@ -115,11 +115,10 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
|
||||
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
|
||||
*/
|
||||
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
|
||||
exists(AttributeAssignment a, Node var |
|
||||
a.getName() = attr and
|
||||
simpleLocalFlowStep*(nodeTo, var) and
|
||||
var.asVar() = a.getInput() and
|
||||
nodeFrom.asCfgNode() = a.getValue()
|
||||
exists(AttrWrite a |
|
||||
a.mayHaveAttributeName(attr) and
|
||||
nodeFrom = a.getValue() and
|
||||
simpleLocalFlowStep*(nodeTo, a.getObject())
|
||||
)
|
||||
}
|
||||
|
||||
@@ -127,7 +126,11 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
|
||||
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
|
||||
*/
|
||||
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
|
||||
exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
|
||||
exists(AttrRead a |
|
||||
a.mayHaveAttributeName(attr) and
|
||||
nodeFrom = a.getObject() and
|
||||
nodeTo = a
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
244
python/ql/src/experimental/dataflow/internal/Attributes.qll
Normal file
244
python/ql/src/experimental/dataflow/internal/Attributes.qll
Normal file
@@ -0,0 +1,244 @@
|
||||
/** This module provides an API for attribute reads and writes. */
|
||||
|
||||
import DataFlowUtil
|
||||
import DataFlowPublic
|
||||
private import DataFlowPrivate
|
||||
|
||||
/**
|
||||
* A data flow node that reads or writes an attribute of an object.
|
||||
*
|
||||
* This abstract base class only knows about the base object on which the attribute is being
|
||||
* accessed, and the attribute itself, if it is statically inferrable.
|
||||
*/
|
||||
abstract class AttrRef extends Node {
|
||||
/**
|
||||
* Gets the data flow node corresponding to the object whose attribute is being read or written.
|
||||
*/
|
||||
abstract Node getObject();
|
||||
|
||||
/**
|
||||
* Gets the expression node that defines the attribute being accessed, if any. This is
|
||||
* usually an identifier or literal.
|
||||
*/
|
||||
abstract ExprNode getAttributeNameExpr();
|
||||
|
||||
/**
|
||||
* Holds if this attribute reference may access an attribute named `attrName`.
|
||||
* Uses local data flow to track potential attribute names, which may lead to imprecision. If more
|
||||
* precision is needed, consider using `getAttributeName` instead.
|
||||
*/
|
||||
predicate mayHaveAttributeName(string attrName) {
|
||||
attrName = this.getAttributeName()
|
||||
or
|
||||
exists(Node nodeFrom |
|
||||
localFlow(nodeFrom, this.getAttributeNameExpr()) and
|
||||
attrName = nodeFrom.asExpr().(StrConst).getText()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name of the attribute being read or written. For dynamic attribute accesses, this
|
||||
* method is not guaranteed to return a result. For such cases, using `mayHaveAttributeName` may yield
|
||||
* better results.
|
||||
*/
|
||||
abstract string getAttributeName();
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node that writes an attribute of an object. This includes
|
||||
* - Simple attribute writes: `object.attr = value`
|
||||
* - Dynamic attribute writes: `setattr(object, attr, value)`
|
||||
* - Fields written during class initialization: `class MyClass: attr = value`
|
||||
*/
|
||||
abstract class AttrWrite extends AttrRef {
|
||||
/** Gets the data flow node corresponding to the value that is written to the attribute. */
|
||||
abstract Node getValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a control flow node for a simple attribute assignment. That is,
|
||||
* ```python
|
||||
* object.attr = value
|
||||
* ```
|
||||
* Also gives access to the `value` being written, by extending `DefinitionNode`.
|
||||
*/
|
||||
private class AttributeAssignmentNode extends DefinitionNode, AttrNode {
|
||||
override ControlFlowNode getValue() { result = DefinitionNode.super.getValue() }
|
||||
}
|
||||
|
||||
/** A simple attribute assignment: `object.attr = value`. */
|
||||
private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
|
||||
override AttributeAssignmentNode node;
|
||||
|
||||
override Node getValue() { result.asCfgNode() = node.getValue() }
|
||||
|
||||
override Node getObject() { result.asCfgNode() = node.getObject() }
|
||||
|
||||
override ExprNode getAttributeNameExpr() {
|
||||
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
|
||||
// identifiers, and are therefore represented directly as strings.
|
||||
// Use `getAttributeName` to access the name of the attribute.
|
||||
none()
|
||||
}
|
||||
|
||||
override string getAttributeName() { result = node.getName() }
|
||||
}
|
||||
|
||||
import semmle.python.types.Builtins
|
||||
|
||||
/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */
|
||||
private class BuiltInCallNode extends CallNode {
|
||||
string name;
|
||||
|
||||
BuiltInCallNode() {
|
||||
// TODO disallow instances where the name of the built-in may refer to an in-scope variable of that name.
|
||||
exists(NameNode id | this.getFunction() = id and id.getId() = name and id.isGlobal()) and
|
||||
name = any(Builtin b).getName()
|
||||
}
|
||||
|
||||
/** Gets the name of the built-in function that is called at this `CallNode` */
|
||||
string getBuiltinName() { result = name }
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a call to the built-ins that handle dynamic inspection and modification of
|
||||
* attributes: `getattr`, `setattr`, `hasattr`, and `delattr`.
|
||||
*/
|
||||
private class BuiltinAttrCallNode extends BuiltInCallNode {
|
||||
BuiltinAttrCallNode() { name in ["setattr", "getattr", "hasattr", "delattr"] }
|
||||
|
||||
/** Gets the control flow node for object on which the attribute is accessed. */
|
||||
ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] }
|
||||
|
||||
/**
|
||||
* Gets the control flow node for the value that is being written to the attribute.
|
||||
* Only relevant for `setattr` calls.
|
||||
*/
|
||||
ControlFlowNode getValue() {
|
||||
// only valid for `setattr`
|
||||
name = "setattr" and
|
||||
result in [this.getArg(2), this.getArgByName("value")]
|
||||
}
|
||||
|
||||
/** Gets the control flow node that defines the name of the attribute being accessed. */
|
||||
ControlFlowNode getName() { result in [this.getArg(1), this.getArgByName("name")] }
|
||||
}
|
||||
|
||||
/** Represents calls to the built-in `setattr`. */
|
||||
private class SetAttrCallNode extends BuiltinAttrCallNode {
|
||||
SetAttrCallNode() { name = "setattr" }
|
||||
}
|
||||
|
||||
/** Represents calls to the built-in `getattr`. */
|
||||
private class GetAttrCallNode extends BuiltinAttrCallNode {
|
||||
GetAttrCallNode() { name = "getattr" }
|
||||
}
|
||||
|
||||
/** An attribute assignment using `setattr`, e.g. `setattr(object, attr, value)` */
|
||||
private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode {
|
||||
override SetAttrCallNode node;
|
||||
|
||||
override Node getValue() { result.asCfgNode() = node.getValue() }
|
||||
|
||||
override Node getObject() { result.asCfgNode() = node.getObject() }
|
||||
|
||||
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
|
||||
|
||||
override string getAttributeName() {
|
||||
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents an attribute of a class that is assigned statically during class definition. For instance
|
||||
* ```python
|
||||
* class MyClass:
|
||||
* attr = value
|
||||
* ...
|
||||
* ```
|
||||
* Instances of this class correspond to the `NameNode` for `attr`, and also gives access to `value` by
|
||||
* virtue of being a `DefinitionNode`.
|
||||
*/
|
||||
private class ClassAttributeAssignmentNode extends DefinitionNode, NameNode { }
|
||||
|
||||
/**
|
||||
* An attribute assignment via a class field, e.g.
|
||||
* ```python
|
||||
* class MyClass:
|
||||
* attr = value
|
||||
* ```
|
||||
* is treated as equivalent to `MyClass.attr = value`.
|
||||
*/
|
||||
private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
|
||||
ClassExpr cls;
|
||||
override ClassAttributeAssignmentNode node;
|
||||
|
||||
ClassDefinitionAsAttrWrite() { node.getScope() = cls.getInnerScope() }
|
||||
|
||||
override Node getValue() { result.asCfgNode() = node.getValue() }
|
||||
|
||||
override Node getObject() { result.asCfgNode() = cls.getAFlowNode() }
|
||||
|
||||
override ExprNode getAttributeNameExpr() { none() }
|
||||
|
||||
override string getAttributeName() { result = node.getId() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A read of an attribute on an object. This includes
|
||||
* - Simple attribute reads: `object.attr`
|
||||
* - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
|
||||
* - Qualified imports: `from module import attr as name`
|
||||
*/
|
||||
abstract class AttrRead extends AttrRef, Node { }
|
||||
|
||||
/** A simple attribute read, e.g. `object.attr` */
|
||||
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
|
||||
override AttrNode node;
|
||||
|
||||
override Node getObject() { result.asCfgNode() = node.getObject() }
|
||||
|
||||
override ExprNode getAttributeNameExpr() {
|
||||
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
|
||||
// identifiers, and are therefore represented directly as strings.
|
||||
// Use `getAttributeName` to access the name of the attribute.
|
||||
none()
|
||||
}
|
||||
|
||||
override string getAttributeName() { result = node.getName() }
|
||||
}
|
||||
|
||||
/** An attribute read using `getattr`: `getattr(object, attr)` */
|
||||
private class GetAttrCallAsAttrRead extends AttrRead, CfgNode {
|
||||
override GetAttrCallNode node;
|
||||
|
||||
override Node getObject() { result.asCfgNode() = node.getObject() }
|
||||
|
||||
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
|
||||
|
||||
override string getAttributeName() {
|
||||
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a named import as an attribute read. That is,
|
||||
* ```python
|
||||
* from module import attr as attr_ref
|
||||
* ```
|
||||
* is treated as if it is a read of the attribute `module.attr`, even if `module` is not imported directly.
|
||||
*/
|
||||
private class ModuleAttributeImportAsAttrRead extends AttrRead, CfgNode {
|
||||
override ImportMemberNode node;
|
||||
|
||||
override Node getObject() { result.asCfgNode() = node.getModule(_) }
|
||||
|
||||
override ExprNode getAttributeNameExpr() {
|
||||
// The name of an imported attribute doesn't exist as a `Node` in the control flow graph, as it
|
||||
// can only ever be an identifier, and is therefore represented directly as a string.
|
||||
// Use `getAttributeName` to access the name of the attribute.
|
||||
none()
|
||||
}
|
||||
|
||||
override string getAttributeName() { exists(node.getModule(result)) }
|
||||
}
|
||||
@@ -167,4 +167,9 @@ module Consistency {
|
||||
not isImmutableOrUnobservable(n) and
|
||||
msg = "ArgumentNode is missing PostUpdateNode."
|
||||
}
|
||||
|
||||
query predicate postWithInFlow(PostUpdateNode n, string msg) {
|
||||
simpleLocalFlowStep(_, n) and
|
||||
msg = "PostUpdateNode should not be the target of local flow."
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,11 +11,6 @@ private import semmle.python.essa.SsaCompute
|
||||
//--------
|
||||
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
|
||||
|
||||
/** A control flow node which is also a dataflow node */
|
||||
class DataFlowCfgNode extends ControlFlowNode {
|
||||
DataFlowCfgNode() { isExpressionNode(this) }
|
||||
}
|
||||
|
||||
/** A data flow node for which we should synthesise an associated pre-update node. */
|
||||
abstract class NeedsSyntheticPreUpdateNode extends Node {
|
||||
/** A label for this kind of node. This will figure in the textual representation of the synthesized pre-update node. */
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
private import python
|
||||
private import DataFlowPrivate
|
||||
import experimental.dataflow.TypeTracker
|
||||
import Attributes
|
||||
private import semmle.python.essa.SsaCompute
|
||||
|
||||
/**
|
||||
@@ -22,7 +23,7 @@ newtype TNode =
|
||||
/** A node corresponding to an SSA variable. */
|
||||
TEssaNode(EssaVariable var) or
|
||||
/** A node corresponding to a control flow node. */
|
||||
TCfgNode(DataFlowCfgNode node) or
|
||||
TCfgNode(ControlFlowNode node) { isExpressionNode(node) } or
|
||||
/** A synthetic node representing the value of an object before a state change */
|
||||
TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
|
||||
/** A synthetic node representing the value of an object after a state change */
|
||||
@@ -103,7 +104,7 @@ class EssaNode extends Node, TEssaNode {
|
||||
}
|
||||
|
||||
class CfgNode extends Node, TCfgNode {
|
||||
DataFlowCfgNode node;
|
||||
ControlFlowNode node;
|
||||
|
||||
CfgNode() { this = TCfgNode(node) }
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
|
||||
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
|
||||
|
||||
/**
|
||||
* Gets an EssaNode that holds the module imported by `name`.
|
||||
* Gets a `Node` that refers to the module referenced by `name`.
|
||||
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
|
||||
* reference to the module `pkg`.
|
||||
*
|
||||
@@ -27,16 +27,17 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
|
||||
* 2. `from <package> import <module>` when `<name> = <package> + "." + <module>`
|
||||
* 3. `from <module> import <member>` when `<name> = <module> + "." + <member>`
|
||||
*
|
||||
* Finally, in `from <module> import <member>` we consider the `ImportExpr` corresponding to
|
||||
* `<module>` to be a reference to that module.
|
||||
*
|
||||
* Note:
|
||||
* While it is technically possible that `import mypkg.foo` and `from mypkg import foo` can give different values,
|
||||
* it's highly unlikely that this will be a problem in production level code.
|
||||
* Example: If `mypkg/__init__.py` contains `foo = 42`, then `from mypkg import foo` will not import the module
|
||||
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
|
||||
* to refer to the module.
|
||||
*
|
||||
* Also see `DataFlow::importMember`
|
||||
*/
|
||||
EssaNode importModule(string name) {
|
||||
Node importNode(string name) {
|
||||
exists(Variable var, Import imp, Alias alias |
|
||||
alias = imp.getAName() and
|
||||
alias.getAsname() = var.getAStore() and
|
||||
@@ -45,23 +46,27 @@ EssaNode importModule(string name) {
|
||||
or
|
||||
name = alias.getValue().(ImportExpr).getImportedModuleName()
|
||||
) and
|
||||
result.getVar().(AssignmentDefinition).getSourceVariable() = var
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a EssaNode that holds the value imported by using fully qualified name in
|
||||
*`from <moduleName> import <memberName>`.
|
||||
*
|
||||
* Also see `DataFlow::importModule`.
|
||||
*/
|
||||
EssaNode importMember(string moduleName, string memberName) {
|
||||
exists(Variable var, Import imp, Alias alias, ImportMember member |
|
||||
alias = imp.getAName() and
|
||||
member = alias.getValue() and
|
||||
moduleName = member.getModule().(ImportExpr).getImportedModuleName() and
|
||||
memberName = member.getName() and
|
||||
alias.getAsname() = var.getAStore() and
|
||||
result.getVar().(AssignmentDefinition).getSourceVariable() = var
|
||||
result.(EssaNode).getVar().(AssignmentDefinition).getSourceVariable() = var
|
||||
)
|
||||
or
|
||||
// Although it may seem superfluous to consider the `foo` part of `from foo import bar as baz` to
|
||||
// be a reference to a module (since that reference only makes sense locally within the `import`
|
||||
// statement), it's important for our use of type trackers to consider this local reference to
|
||||
// also refer to the `foo` module. That way, if one wants to track references to the `bar`
|
||||
// attribute using a type tracker, one can simply write
|
||||
//
|
||||
// ```ql
|
||||
// DataFlow::Node bar_attr_tracker(TypeTracker t) {
|
||||
// t.startInAttr("bar") and
|
||||
// result = foo_module_tracker()
|
||||
// or
|
||||
// exists(TypeTracker t2 | result = bar_attr_tracker(t2).track(t2, t))
|
||||
// }
|
||||
// ```
|
||||
//
|
||||
// Where `foo_module_tracker` is a type tracker that tracks references to the `foo` module.
|
||||
// Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz`
|
||||
// is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a
|
||||
// reference to `foo.bar`, as desired.
|
||||
result.asCfgNode().getNode() = any(ImportExpr i | i.getAnImportedModuleName() = name)
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ private module Flask {
|
||||
/** Gets a reference to the `flask` module. */
|
||||
DataFlow::Node flask(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importModule("flask")
|
||||
result = DataFlow::importNode("flask")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = flask(t2).track(t2, t))
|
||||
}
|
||||
@@ -27,7 +27,7 @@ private module Flask {
|
||||
/** Gets a reference to the `flask.request` object. */
|
||||
DataFlow::Node request(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importMember("flask", "request")
|
||||
result = DataFlow::importNode("flask.request")
|
||||
or
|
||||
t.startInAttr("request") and
|
||||
result = flask()
|
||||
|
||||
@@ -17,7 +17,7 @@ private module Stdlib {
|
||||
/** Gets a reference to the `os` module. */
|
||||
private DataFlow::Node os(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importModule("os")
|
||||
result = DataFlow::importNode("os")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = os(t2).track(t2, t))
|
||||
}
|
||||
@@ -42,10 +42,10 @@ private module Stdlib {
|
||||
"path"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importMember("os", attr_name)
|
||||
result = DataFlow::importNode("os." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = DataFlow::importModule("os")
|
||||
result = DataFlow::importNode("os")
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `os_attr(t2, attr_name).track(t2, t)`
|
||||
@@ -85,7 +85,7 @@ private module Stdlib {
|
||||
/** Gets a reference to the `os.path.join` function. */
|
||||
private DataFlow::Node join(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importMember("os.path", "join")
|
||||
result = DataFlow::importNode("os.path.join")
|
||||
or
|
||||
t.startInAttr("join") and
|
||||
result = os::path()
|
||||
@@ -190,7 +190,7 @@ private module Stdlib {
|
||||
/** Gets a reference to the `subprocess` module. */
|
||||
private DataFlow::Node subprocess(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importModule("subprocess")
|
||||
result = DataFlow::importNode("subprocess")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = subprocess(t2).track(t2, t))
|
||||
}
|
||||
@@ -208,10 +208,10 @@ private module Stdlib {
|
||||
attr_name in ["Popen", "call", "check_call", "check_output", "run"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importMember("subprocess", attr_name)
|
||||
result = DataFlow::importNode("subprocess." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = DataFlow::importModule("subprocess")
|
||||
result = subprocess()
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `subprocess_attr(t2, attr_name).track(t2, t)`
|
||||
|
||||
Reference in New Issue
Block a user