mirror of
https://github.com/github/codeql.git
synced 2026-06-12 08:21:09 +02:00
Model instance-attribute type flow
Use a field level step like JS and Ruby.
This commit is contained in:
committed by
Owen Mansel-Chan
parent
a4585d8d94
commit
73bc2d70ae
@@ -172,6 +172,8 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
|
||||
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */
|
||||
predicate levelStepNoCall(Node nodeFrom, LocalSourceNode nodeTo) {
|
||||
TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo)
|
||||
or
|
||||
localFieldStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -317,6 +319,51 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `ref` accesses attribute `attr` of `self`, where `self` is the first
|
||||
* parameter of an instance method of `cls` (i.e. an access of the form `self.attr`).
|
||||
*
|
||||
* Static methods and class methods are excluded, since their first parameter is not a
|
||||
* `self` instance reference.
|
||||
*/
|
||||
private predicate selfAttrRef(Class cls, string attr, DataFlowPublic::AttrRef ref) {
|
||||
exists(Function method, Name selfUse |
|
||||
method = cls.getAMethod() and
|
||||
not DataFlowDispatch::isStaticmethod(method) and
|
||||
not DataFlowDispatch::isClassmethod(method) and
|
||||
selfUse.getVariable() = method.getArg(0).(Name).getVariable() and
|
||||
ref.getObject().asCfgNode().getNode() = selfUse and
|
||||
ref.mayHaveAttributeName(attr)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a
|
||||
* class, and `nodeTo` reads attribute `self.attr` in some (possibly different) instance
|
||||
* method of the same class.
|
||||
*
|
||||
* This models flow through instance attributes (`self.foo`): a value stored into
|
||||
* `self.foo` in one method can be read from `self.foo` in another method. Type-tracking
|
||||
* handles the store and read steps via `AttrWrite`/`AttrRead`, but on its own it cannot
|
||||
* relate the `self` of the writing method to the `self` of the reading method. Following
|
||||
* the approach used for Ruby and JavaScript, we model this directly as a level step from
|
||||
* the written value to the read reference, for any pair of methods on the class (not
|
||||
* just from `__init__`).
|
||||
*
|
||||
* This is an over-approximation: it is instance-insensitive (it does not distinguish
|
||||
* between different instances of the same class) and order-insensitive (it does not
|
||||
* require the write to happen before the read), matching the precision of
|
||||
* instance-attribute handling for Ruby and JavaScript.
|
||||
*/
|
||||
private predicate localFieldStep(Node nodeFrom, LocalSourceNode nodeTo) {
|
||||
exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read |
|
||||
selfAttrRef(cls, attr, write) and
|
||||
nodeFrom = write.getValue() and
|
||||
selfAttrRef(cls, attr, read) and
|
||||
nodeTo = read
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `node1` to `node2` in a way that discards call contexts.
|
||||
*/
|
||||
|
||||
@@ -151,10 +151,10 @@ class MyClass2(object):
|
||||
self.foo = tracked # $ tracked=foo tracked
|
||||
|
||||
def print_foo(self): # $ MISSING: tracked=foo
|
||||
print(self.foo) # $ MISSING: tracked=foo tracked
|
||||
print(self.foo) # $ tracked MISSING: tracked=foo
|
||||
|
||||
def possibly_uncalled_method(self): # $ MISSING: tracked=foo
|
||||
print(self.foo) # $ MISSING: tracked=foo tracked
|
||||
print(self.foo) # $ tracked MISSING: tracked=foo
|
||||
|
||||
instance = MyClass2()
|
||||
print(instance.foo) # $ MISSING: tracked=foo tracked
|
||||
|
||||
@@ -11,11 +11,10 @@ cursor.close()
|
||||
|
||||
# Connection stored in a class attribute (`self._conn`) and used in another method.
|
||||
#
|
||||
# This is currently NOT detected: the `Connection::instance()`/`execute()` predicates in
|
||||
# PEP249.qll are based on type tracking, which cannot follow a value that is stored into a
|
||||
# `self` attribute in one method and read from a `self` attribute in another method (see the
|
||||
# `MISSING` markers below). Regular (global) data flow handles this case correctly, so the
|
||||
# limitation is specific to the type-tracking-based modeling.
|
||||
# This is detected because type tracking includes a level step modelling flow through
|
||||
# instance attributes: a value written to `self._conn` in one method (here `__init__`) can
|
||||
# be read back from `self._conn` (directly or via a getter) in any other method on the same
|
||||
# class. This follows the same approach used for instance fields in Ruby and JavaScript.
|
||||
class Database:
|
||||
def __init__(self):
|
||||
self._conn = dbapi.connect(address="hostname", port=300, user="username")
|
||||
@@ -26,10 +25,10 @@ class Database:
|
||||
def run_via_getter(self):
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("getter sql") # $ MISSING: getSql="getter sql"
|
||||
cursor.execute("getter sql") # $ getSql="getter sql"
|
||||
|
||||
def run_direct(self):
|
||||
self._conn.execute("direct sql") # $ MISSING: getSql="direct sql"
|
||||
self._conn.execute("direct sql") # $ getSql="direct sql"
|
||||
|
||||
|
||||
db = Database()
|
||||
|
||||
Reference in New Issue
Block a user