diff --git a/python/ql/src/Classes/ConflictingAttributesInBaseClasses.py b/python/ql/src/Classes/ConflictingAttributesInBaseClasses.py new file mode 100644 index 00000000000..295e06e86e9 --- /dev/null +++ b/python/ql/src/Classes/ConflictingAttributesInBaseClasses.py @@ -0,0 +1,18 @@ + +class TCPServer(object): + + def process_request(self, request, client_address): + self.do_work(request, client_address) + self.shutdown_request(request) + + +class ThreadingMixIn: + """Mix-in class to handle each request in a new thread.""" + + def process_request(self, request, client_address): + """Start a new thread to process the request.""" + t = threading.Thread(target = self.do_work, args = (request, client_address)) + t.daemon = self.daemon_threads + t.start() + +class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass diff --git a/python/ql/src/Classes/ConflictingAttributesInBaseClasses.qhelp b/python/ql/src/Classes/ConflictingAttributesInBaseClasses.qhelp new file mode 100644 index 00000000000..315ffae585f --- /dev/null +++ b/python/ql/src/Classes/ConflictingAttributesInBaseClasses.qhelp @@ -0,0 +1,59 @@ + + + + +

+When a class subclasses multiple base classes, attribute lookup is performed from left to right amongst the base classes. +This form of attribute lookup is called "method resolution order" and is a solution to the +diamond inheritance problem where several base classes +override a method in a shared superclass. +

+

+Unfortunately, this means that if more than one base class defines the same attribute, the leftmost base class will effectively override +the attribute of the rightmost base class, even though the leftmost base class is not a subclass of the rightmost base class. +Unless the methods in question are designed for inheritance, using super, then this implicit overriding may not be the desired behavior. +Even if it is the desired behavior it makes the code hard to understand and maintain. +

+ +
+ +

There are a number of ways that might be used to address this issue: +

+ +
+ + +

+In this example the class ThreadingTCPServer inherits from ThreadingMixIn and from TCPServer. +However, both these classes implement process_request which means that ThreadingTCPServer will inherit +process_request from ThreadingMixIn. Consequently, the implementation of process_request in TCPServer +will be ignored, which may not be the correct behavior. +

+ + +

+This can be fixed either by overriding the method, as shown in class ThreadingTCPServerOverriding +or by ensuring that the +functionality provided by the two base classes does not overlap, as shown in class ThreadingTCPServerChangedHierarchy. +

+ + + +
+ + +
  • Python Language Reference: Data model.
  • +
  • Python releases: The Python 2.3 Method Resolution Order.
  • +
  • Wikipedia: C3 linearization.
  • +
  • Wikipedia: Composition over inheritance.
  • + + +
    +
    diff --git a/python/ql/src/Classes/ConflictingAttributesInBaseClasses.ql b/python/ql/src/Classes/ConflictingAttributesInBaseClasses.ql new file mode 100644 index 00000000000..44be7ac9157 --- /dev/null +++ b/python/ql/src/Classes/ConflictingAttributesInBaseClasses.ql @@ -0,0 +1,57 @@ +/** + * @name Conflicting attributes in base classes + * @description When a class subclasses multiple base classes and more than one base class defines the same attribute, attribute overriding may result in unexpected behavior by instances of this class. + * @kind problem + * @tags reliability + * maintainability + * modularity + * @problem.severity warning + * @sub-severity low + * @precision high + * @id py/conflicting-attributes + */ + +import python + +predicate does_nothing(PyFunctionObject f) { + not exists(Stmt s | s.getScope() = f.getFunction() | + not s instanceof Pass and not ((ExprStmt)s).getValue() = f.getFunction().getDocString() + ) +} + +/* If a method performs a super() call then it is OK as the 'overridden' method will get called */ +predicate calls_super(FunctionObject f) { + exists(Call sup, Call meth, Attribute attr, GlobalVariable v | + meth.getScope() = f.getFunction() and + meth.getFunc() = attr and + attr.getObject() = sup and + attr.getName() = f.getName() and + sup.getFunc() = v.getAnAccess() and + v.getId() = "super" + ) +} + +/** Holds if the given name is white-listed for some reason */ +predicate whitelisted(string name) { + /* The standard library specifically recommends this :( + * See https://docs.python.org/3/library/socketserver.html#asynchronous-mixins */ + name = "process_request" +} + +from ClassObject c, ClassObject b1, ClassObject b2, string name, +int i1, int i2, Object o1, Object o2 +where c.getBaseType(i1) = b1 and +c.getBaseType(i2) = b2 and +i1 < i2 and o1 != o2 and +o1 = b1.lookupAttribute(name) and +o2 = b2.lookupAttribute(name) and +not name.matches("\\_\\_%\\_\\_") and +not calls_super(o1) and +not does_nothing(o2) and +not whitelisted(name) and +not o1.overrides(o2) and +not o2.overrides(o1) and +not c.declaresAttribute(name) + +select c, "Base classes have conflicting values for attribute '" + name + "': $@ and $@.", o1, o1.toString(), o2, o2.toString() + diff --git a/python/ql/src/Classes/ConflictingAttributesInBaseClasses_Fixed.py b/python/ql/src/Classes/ConflictingAttributesInBaseClasses_Fixed.py new file mode 100644 index 00000000000..c106064f2a8 --- /dev/null +++ b/python/ql/src/Classes/ConflictingAttributesInBaseClasses_Fixed.py @@ -0,0 +1,24 @@ + +#Fixed by overriding. This does not change behavior, but makes it explicit and comprehensible. +class ThreadingTCPServerOverriding(ThreadingMixIn, TCPServer): + + def process_request(self, request, client_address): + #process_request forwards to do_work, so it is OK to call ThreadingMixIn.process_request directly + ThreadingMixIn.process_request(self, request, client_address) + + +#Fixed by separating threading functionality from request handling. +class ThreadingMixIn: + """Mix-in class to help with threads.""" + + def do_job_in_thread(self, job, args): + """Start a new thread to do the job""" + t = threading.Thread(target = job, args = args) + t.start() + +class ThreadingTCPServerChangedHierarchy(ThreadingMixIn, TCPServer): + + def process_request(self, request, client_address): + """Start a new thread to process the request.""" + self.do_job_in_thread(self.do_work, (request, client_address)) + diff --git a/python/ql/src/Classes/DefineEqualsWhenAddingAttributes.py b/python/ql/src/Classes/DefineEqualsWhenAddingAttributes.py new file mode 100644 index 00000000000..e8ae058309f --- /dev/null +++ b/python/ql/src/Classes/DefineEqualsWhenAddingAttributes.py @@ -0,0 +1,40 @@ +class Point(object): + + def __init__(self, x, y): + self._x = x + self._y = y + + def __repr__(self): + return 'Point(%r, %r)' % (self._x, self._y) + + def __eq__(self, other): + if not isinstance(other, Point): + return False + return self._x == other._x and self._y == other._y + +class ColorPoint(Point): + + def __init__(self, x, y, color): + Point.__init__(self, x, y) + self._color = color + + def __repr__(self): + return 'ColorPoint(%r, %r)' % (self._x, self._y, self._color) + +#ColorPoint(0, 0, Red) == ColorPoint(0, 0, Green) should be False, but is True. + +#Fixed version +class ColorPoint(Point): + + def __init__(self, x, y, color): + Point.__init__(self, x, y) + self._color = color + + def __repr__(self): + return 'ColorPoint(%r, %r)' % (self._x, self._y, self._color) + + def __eq__(self, other): + if not isinstance(other, ColorPoint): + return False + return Point.__eq__(self, other) and self._color = other._color + diff --git a/python/ql/src/Classes/DefineEqualsWhenAddingAttributes.qhelp b/python/ql/src/Classes/DefineEqualsWhenAddingAttributes.qhelp new file mode 100644 index 00000000000..0260c6456e6 --- /dev/null +++ b/python/ql/src/Classes/DefineEqualsWhenAddingAttributes.qhelp @@ -0,0 +1,37 @@ + + + + +

    A class that defines attributes that are not present in its superclasses +may need to override the __eq__() method (__ne__() +should also be defined).

    + +

    Adding additional attributes without overriding __eq__() means +that the additional attributes will not be accounted for in equality tests.

    + + +
    + + +

    Override the __eq__ method.

    + + +
    + +

    In the following example the ColorPoint +class subclasses the Point class and adds a new attribute, +but does not override the __eq__ method. +

    + + + + +
    + + +
  • Peter Grogono, Philip Santas: Equality in Object Oriented Languages
  • + +
    +
    diff --git a/python/ql/src/Classes/DefineEqualsWhenAddingAttributes.ql b/python/ql/src/Classes/DefineEqualsWhenAddingAttributes.ql new file mode 100644 index 00000000000..5b80f2fd7bf --- /dev/null +++ b/python/ql/src/Classes/DefineEqualsWhenAddingAttributes.ql @@ -0,0 +1,52 @@ +/** + * @name __eq__ not overridden when adding attributes + * @description When adding new attributes to instances of a class, equality for that class needs to be defined. + * @kind problem + * @tags reliability + * correctness + * @problem.severity warning + * @sub-severity high + * @precision high + * @id py/missing-equals + */ + +import python +import semmle.python.SelfAttribute +import Equality + +predicate class_stores_to_attribute(ClassObject cls, SelfAttributeStore store, string name) { + exists(FunctionObject f | f = cls.declaredAttribute(_) and store.getScope() = f.getFunction() and store.getName() = name) and + /* Exclude classes used as metaclasses */ + not cls.getASuperType() = theTypeType() +} + +predicate should_override_eq(ClassObject cls, Object base_eq) { + not cls.declaresAttribute("__eq__") and + exists(ClassObject sup | sup = cls.getABaseType() and sup.declaredAttribute("__eq__") = base_eq | + not exists(GenericEqMethod eq | eq.getScope() = sup.getPyClass()) and + not exists(IdentityEqMethod eq | eq.getScope() = sup.getPyClass()) and + not base_eq.(FunctionObject).getFunction() instanceof IdentityEqMethod and + not base_eq = theObjectType().declaredAttribute("__eq__") + ) +} + +/** Does the non-overridden __eq__ method access the attribute, + * which implies that the __eq__ method does not need to be overridden. + */ +predicate superclassEqExpectsAttribute(ClassObject cls, PyFunctionObject base_eq, string attrname) { + not cls.declaresAttribute("__eq__") and + exists(ClassObject sup | sup = cls.getABaseType() and sup.declaredAttribute("__eq__") = base_eq | + exists(SelfAttributeRead store | + store.getName() = attrname | + store.getScope() = base_eq.getFunction() + ) + ) +} + +from ClassObject cls, SelfAttributeStore store, Object base_eq +where class_stores_to_attribute(cls, store, _) and should_override_eq(cls, base_eq) and +/* Don't report overridden unittest.TestCase. -- TestCase overrides __eq__, but subclasses do not really need to. */ +not cls.getASuperType().getName() = "TestCase" and +not superclassEqExpectsAttribute(cls, base_eq, store.getName()) + +select cls, "The class '" + cls.getName() + "' does not override $@, but adds the new attribute $@.", base_eq, "'__eq__'", store, store.getName() diff --git a/python/ql/src/Classes/Equality.qll b/python/ql/src/Classes/Equality.qll new file mode 100644 index 00000000000..5f7648fafc4 --- /dev/null +++ b/python/ql/src/Classes/Equality.qll @@ -0,0 +1,71 @@ +import python + + +private Attribute dictAccess(LocalVariable var) { + result.getName() = "__dict__" and + result.getObject() = var.getAnAccess() +} + +private Call getattr(LocalVariable obj, LocalVariable attr) { + result.getFunc().(Name).getId() = "getattr" and + result.getArg(0) = obj.getAnAccess() and + result.getArg(1) = attr.getAnAccess() +} + +/** A generic equality method that compares all attributes in its dict, + * or compares attributes using `getattr`. */ +class GenericEqMethod extends Function { + + GenericEqMethod() { + this.getName() = "__eq__" and + exists(LocalVariable self, LocalVariable other | + self.getAnAccess() = this.getArg(0) and self.getId() = "self" and + other.getAnAccess() = this.getArg(1) and + exists(Compare eq | + eq.getOp(0) instanceof Eq or + eq.getOp(0) instanceof NotEq | + // `self.__dict__ == other.__dict__` + eq.getAChildNode() = dictAccess(self) and + eq.getAChildNode() = dictAccess(other) + or + // `getattr(self, var) == getattr(other, var)` + exists(Variable var | + eq.getAChildNode() = getattr(self, var) and + eq.getAChildNode() = getattr(other, var) + ) + ) + ) + } +} + +/** An __eq__ method that just does self is other */ +class IdentityEqMethod extends Function { + + IdentityEqMethod() { + this.getName() = "__eq__" and + exists(LocalVariable self, LocalVariable other | + self.getAnAccess() = this.getArg(0) and self.getId() = "self" and + other.getAnAccess() = this.getArg(1) and + exists(Compare eq | eq.getOp(0) instanceof Is | + eq.getAChildNode() = self.getAnAccess() and + eq.getAChildNode() = other.getAnAccess() + ) + ) + } + +} + +/** An (in)equality method that delegates to its complement */ +class DelegatingEqualityMethod extends Function { + + DelegatingEqualityMethod() { + exists(Return ret, UnaryExpr not_, Compare comp, Cmpop op, Parameter p0, Parameter p1 | + ret.getScope() = this and + ret.getValue() = not_ and + not_.getOp() instanceof Not and not_.getOperand() = comp and + comp.compares(p0.getVariable().getAnAccess(), op, p1.getVariable().getAnAccess()) | + this.getName() = "__eq__" and op instanceof NotEq or + this.getName() = "__ne__" and op instanceof Eq + ) + } +} diff --git a/python/ql/src/Classes/EqualsOrHash.py b/python/ql/src/Classes/EqualsOrHash.py new file mode 100644 index 00000000000..e89c75b30ad --- /dev/null +++ b/python/ql/src/Classes/EqualsOrHash.py @@ -0,0 +1,52 @@ +# Incorrect: equality method defined but class contains no hash method +class Point(object): + + def __init__(self, x, y): + self._x = x + self._y = y + + def __repr__(self): + return 'Point(%r, %r)' % (self._x, self._y) + + def __eq__(self, other): + if not isinstance(other, Point): + return False + return self._x == other._x and self._y == other._y + + +# Improved: equality and hash method defined (inequality method still missing) +class PointUpdated(object): + + def __init__(self, x, y): + self._x = x + self._y = y + + def __repr__(self): + return 'Point(%r, %r)' % (self._x, self._y) + + def __eq__(self, other): + if not isinstance(other, Point): + return False + return self._x == other._x and self._y == other._y + + def __hash__(self): + return hash(self._x) ^ hash(self._y) + +# Improved: equality method defined and class instances made unhashable +class UnhashablePoint(object): + + def __init__(self, x, y): + self._x = x + self._y = y + + def __repr__(self): + return 'Point(%r, %r)' % (self._x, self._y) + + def __eq__(self, other): + if not isinstance(other, Point): + return False + return self._x == other._x and self._y == other._y + + #Tell the interpreter that instances of this class cannot be hashed + __hash__ = None + diff --git a/python/ql/src/Classes/EqualsOrHash.qhelp b/python/ql/src/Classes/EqualsOrHash.qhelp new file mode 100644 index 00000000000..28579a095f7 --- /dev/null +++ b/python/ql/src/Classes/EqualsOrHash.qhelp @@ -0,0 +1,46 @@ + + + + +

    In order to conform to the object model, classes that define their own equality method should also +define their own hash method, or be unhashable. If the hash method is not defined then the hash of the +super class is used. This is unlikely to result in the expected behavior.

    + +

    A class can be made unhashable by setting its __hash__ attribute to None.

    + +

    In Python 3, if you define a class-level equality method and omit a __hash__ method +then the class is automatically marked as unhashable.

    + +
    + + +

    When you define an __eq__ method for a class, remember to implement a __hash__ method or set +__hash__ = None.

    + +
    + +

    In the following example the Point class defines an equality method but +no hash method. If hash is called on this class then the hash method defined for object +is used. This is unlikely to give the required behavior. The PointUpdated class +is better as it defines both an equality and a hash method. +If Point was not to be used in dicts or sets, then it could be defined as +UnhashablePoint below. +

    +

    +To comply fully with the object model this class should also define an inequality method (identified +by a separate rule).

    + + + +
    + + + +
  • Python Language Reference: object.__hash__.
  • +
  • Python Glossary: hashable.
  • + + +
    +
    diff --git a/python/ql/src/Classes/EqualsOrHash.ql b/python/ql/src/Classes/EqualsOrHash.ql new file mode 100644 index 00000000000..a0ff96c6eae --- /dev/null +++ b/python/ql/src/Classes/EqualsOrHash.ql @@ -0,0 +1,46 @@ +/** + * @name Inconsistent equality and hashing + * @description Defining equality for a class without also defining hashability (or vice-versa) violates the object model. + * @kind problem + * @tags reliability + * correctness + * external/cwe/cwe-581 + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/equals-hash-mismatch + */ + +import python + +FunctionObject defines_equality(ClassObject c, string name) { + (name = "__eq__" or major_version() = 2 and name = "__cmp__") + and + result = c.declaredAttribute(name) +} + +FunctionObject implemented_method(ClassObject c, string name) { + result = defines_equality(c, name) + or + result = c.declaredAttribute("__hash__") and name = "__hash__" +} + +string unimplemented_method(ClassObject c) { + not exists(defines_equality(c, _)) and + (result = "__eq__" and major_version() = 3 or major_version() = 2 and result = "__eq__ or __cmp__") + or + /* Python 3 automatically makes classes unhashable if __eq__ is defined, but __hash__ is not */ + not c.declaresAttribute(result) and result = "__hash__" and major_version() = 2 +} + +predicate violates_hash_contract(ClassObject c, string present, string missing, Object method) { + not c.unhashable() and + missing = unimplemented_method(c) and + method = implemented_method(c, present) and + not c.unknowableAttributes() +} + +from ClassObject c, string present, string missing, FunctionObject method +where violates_hash_contract(c, present, missing, method) and +exists(c.getPyClass()) // Suppress results that aren't from source +select method, "Class $@ implements " + present + " but does not define " + missing + ".", c, c.getName() diff --git a/python/ql/src/Classes/EqualsOrNotEquals.py b/python/ql/src/Classes/EqualsOrNotEquals.py new file mode 100644 index 00000000000..7e1ece7685c --- /dev/null +++ b/python/ql/src/Classes/EqualsOrNotEquals.py @@ -0,0 +1,32 @@ +class PointOriginal(object): + + def __init__(self, x, y): + self._x, x + self._y = y + + def __repr__(self): + return 'Point(%r, %r)' % (self._x, self._y) + + def __eq__(self, other): # Incorrect: equality is defined but inequality is not + if not isinstance(other, Point): + return False + return self._x == other._x and self._y == other._y + + +class PointUpdated(object): + + def __init__(self, x, y): + self._x, x + self._y = y + + def __repr__(self): + return 'Point(%r, %r)' % (self._x, self._y) + + def __eq__(self, other): + if not isinstance(other, Point): + return False + return self._x == other._x and self._y == other._y + + def __ne__(self, other): # Improved: equality and inequality method defined (hash method still missing) + return not self == other + diff --git a/python/ql/src/Classes/EqualsOrNotEquals.qhelp b/python/ql/src/Classes/EqualsOrNotEquals.qhelp new file mode 100644 index 00000000000..c49f3d2529e --- /dev/null +++ b/python/ql/src/Classes/EqualsOrNotEquals.qhelp @@ -0,0 +1,37 @@ + + + + +

    In order to conform to the object model, classes should define either no equality methods, or both +an equality and an inequality method. If only one of __eq__ or __ne__ is +defined then the method from the super class is used. This is unlikely to result in the expected +behavior.

    + +
    + + +

    When you define an equality or an inequality method for a class, remember to implement both an +__eq__ method and an __ne__ method.

    + +
    + +

    In the following example the PointOriginal class defines an equality method but +no inequality method. If this class is tested for inequality then a type error will be raised. The +PointUpdated class is better as it defines both an equality and an inequality method. To +comply fully with the object model this class should also define a hash method (identified by +a separate rule).

    + + + +
    + + + +
  • Python Language Reference: object.__ne__, +Comparisons.
  • + + +
    +
    diff --git a/python/ql/src/Classes/EqualsOrNotEquals.ql b/python/ql/src/Classes/EqualsOrNotEquals.ql new file mode 100644 index 00000000000..9674030a64c --- /dev/null +++ b/python/ql/src/Classes/EqualsOrNotEquals.ql @@ -0,0 +1,50 @@ +/** + * @name Inconsistent equality and inequality + * @description Defining only an equality method or an inequality method for a class violates the object model. + * @kind problem + * @tags reliability + * correctness + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/inconsistent-equality + */ + +import python +import Equality + +string equals_or_ne() { + result = "__eq__" or result = "__ne__" +} + +predicate total_ordering(Class cls) { + exists(Attribute a | a = cls.getADecorator() | + a.getName() = "total_ordering") + or + exists(Name n | n = cls.getADecorator() | + n.getId() = "total_ordering") +} + +FunctionObject implemented_method(ClassObject c, string name) { + result = c.declaredAttribute(name) and name = equals_or_ne() +} + +string unimplemented_method(ClassObject c) { + not c.declaresAttribute(result) and result = equals_or_ne() +} + +predicate violates_equality_contract(ClassObject c, string present, string missing, FunctionObject method) { + missing = unimplemented_method(c) and + method = implemented_method(c, present) and + not c.unknowableAttributes() and + not total_ordering(c.getPyClass()) and + /* Python 3 automatically implements __ne__ if __eq__ is defined, but not vice-versa */ + not (major_version() = 3 and present = "__eq__" and missing = "__ne__") and + not method.getFunction() instanceof DelegatingEqualityMethod and + not c.lookupAttribute(missing).(FunctionObject).getFunction() instanceof DelegatingEqualityMethod +} + +from ClassObject c, string present, string missing, FunctionObject method +where violates_equality_contract(c, present, missing, method) + +select method, "Class $@ implements " + present + " but does not implement " + missing + ".", c, c.getName() diff --git a/python/ql/src/Classes/IncompleteOrdering.py b/python/ql/src/Classes/IncompleteOrdering.py new file mode 100644 index 00000000000..78b306880b0 --- /dev/null +++ b/python/ql/src/Classes/IncompleteOrdering.py @@ -0,0 +1,6 @@ +class IncompleteOrdering(object): + def __init__(self, i): + self.i = i + + def __lt__(self, other): + return self.i < other.i \ No newline at end of file diff --git a/python/ql/src/Classes/IncompleteOrdering.qhelp b/python/ql/src/Classes/IncompleteOrdering.qhelp new file mode 100644 index 00000000000..7983985ccee --- /dev/null +++ b/python/ql/src/Classes/IncompleteOrdering.qhelp @@ -0,0 +1,35 @@ + + + +

    A class that implements an ordering operator +(__lt__, __gt__, __le__ or __ge__) should implement +all four in order that ordering between two objects is consistent and obeys the usual mathematical rules. +If the ordering is inconsistent with default equality, then __eq__ and __ne__ +should also be implemented. +

    + +
    + +

    Ensure that all four ordering comparisons are implemented as well as __eq__ and +__ne__ if required.

    + +

    It is not necessary to manually implement all four comparisons, +the functools.total_ordering class decorator can be used.

    + +
    + +

    In this example only the __lt__ operator has been implemented which could lead to +inconsistent behavior. __gt__, __le__, __ge__, and in this case, +__eq__ and __ne__ should be implemented.

    + + +
    + + +
  • Python Language Reference: Rich comparisons in Python.
  • + + +
    +
    diff --git a/python/ql/src/Classes/IncompleteOrdering.ql b/python/ql/src/Classes/IncompleteOrdering.ql new file mode 100644 index 00000000000..03c2ddf390c --- /dev/null +++ b/python/ql/src/Classes/IncompleteOrdering.ql @@ -0,0 +1,75 @@ +/** + * @name Incomplete ordering + * @description Class defines one or more ordering method but does not define all 4 ordering comparison methods + * @kind problem + * @tags reliability + * correctness + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/incomplete-ordering + */ + +import python + +predicate total_ordering(Class cls) { + exists(Attribute a | a = cls.getADecorator() | + a.getName() = "total_ordering") + or + exists(Name n | n = cls.getADecorator() | + n.getId() = "total_ordering") +} + +string ordering_name(int n) { + result = "__lt__" and n = 1 or + result = "__le__" and n = 2 or + result = "__gt__" and n = 3 or + result = "__ge__" and n = 4 +} + +predicate overrides_ordering_method(ClassObject c, string name) { + name = ordering_name(_) and + ( + c.declaresAttribute(name) + or + exists(ClassObject sup | + sup = c.getASuperType() and not sup = theObjectType() | + sup.declaresAttribute(name) + ) + ) +} + +string unimplemented_ordering(ClassObject c, int n) { + not c = theObjectType() and + not overrides_ordering_method(c, result) and + result = ordering_name(n) +} + +string unimplemented_ordering_methods(ClassObject c, int n) { + n = 0 and result = "" and exists(unimplemented_ordering(c, _)) + or + exists(string prefix, int nm1 | + n = nm1 + 1 and prefix = unimplemented_ordering_methods(c, nm1) | + prefix = "" and result = unimplemented_ordering(c, n) + or + result = prefix and not exists(unimplemented_ordering(c, n)) and n < 5 + or + prefix != "" and result = prefix + " or " + unimplemented_ordering(c, n) + ) +} + +Object ordering_method(ClassObject c, string name) { + /* If class doesn't declare a method then don't blame this class (the superclass will be blamed). */ + name = ordering_name(_) and result = c.declaredAttribute(name) +} + +from ClassObject c, Object ordering, string name +where not c.unknowableAttributes() and +not total_ordering(c.getPyClass()) +and ordering = ordering_method(c, name) and +exists(unimplemented_ordering(c, _)) + +select c, "Class " + c.getName() + " implements $@, but does not implement " + unimplemented_ordering_methods(c, 4) + ".", +ordering, name + + diff --git a/python/ql/src/Classes/InconsistentMRO.py b/python/ql/src/Classes/InconsistentMRO.py new file mode 100644 index 00000000000..767782b25c7 --- /dev/null +++ b/python/ql/src/Classes/InconsistentMRO.py @@ -0,0 +1,6 @@ +class X(object): + def __init__(self): + print("X") +class Y(object,X): + def __init__(self): + print("Y") \ No newline at end of file diff --git a/python/ql/src/Classes/InconsistentMRO.qhelp b/python/ql/src/Classes/InconsistentMRO.qhelp new file mode 100644 index 00000000000..4c06a058a16 --- /dev/null +++ b/python/ql/src/Classes/InconsistentMRO.qhelp @@ -0,0 +1,30 @@ + + + +

    Python 2.3 introduced new-style classes (classes inheriting from object). New-style classes use +the C3 linearization method to determine a method resolution ordering (MRO) for each class. The C3 +linearization method ensures that for a class C, if a class C1 precedes a class C2 in the MRO of C +then C1 should also precede C2 in the MRO of all subclasses of C. It is possible to create a +situation where it is impossible to achieve this consistency and this will guarantee that a +TypeError will be raised at runtime.

    + +
    + +

    Use a class hierarchy that is not ambiguous.

    + +
    + +

    The MRO of class X is just X, object. The program will fail when the MRO +of class Y needs to be calculated because object precedes X in +the definition of Y but the opposite is true in the MRO of X.

    + + +
    + + +
  • Python: The Python 2.3 Method Resolution Order.
  • + +
    +
    diff --git a/python/ql/src/Classes/InconsistentMRO.ql b/python/ql/src/Classes/InconsistentMRO.ql new file mode 100644 index 00000000000..08b1016086c --- /dev/null +++ b/python/ql/src/Classes/InconsistentMRO.ql @@ -0,0 +1,27 @@ +/** + * @name Inconsistent method resolution order + * @description Class definition will raise a type error at runtime due to inconsistent method resolution order(MRO) + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity high + * @precision very-high + * @id py/inconsistent-mro + */ + +import python + +ClassObject left_base(ClassObject type, ClassObject base) { + exists(int i | i > 0 and type.getBaseType(i) = base and result = type.getBaseType(i-1)) +} + +predicate invalid_mro(ClassObject t, ClassObject left, ClassObject right) { + t.isNewStyle() and + left = left_base(t, right) and left = right.getAnImproperSuperType() +} + +from ClassObject t, ClassObject left, ClassObject right +where invalid_mro(t, left, right) +select t, "Construction of class " + t.getName() + " can fail due to invalid method resolution order(MRO) for bases $@ and $@.", +left, left.getName(), right, right.getName() \ No newline at end of file diff --git a/python/ql/src/Classes/InitCallsSubclassMethod.py b/python/ql/src/Classes/InitCallsSubclassMethod.py new file mode 100644 index 00000000000..6e0dedb0142 --- /dev/null +++ b/python/ql/src/Classes/InitCallsSubclassMethod.py @@ -0,0 +1,48 @@ +#Superclass __init__ calls subclass method + +class Super(object): + + def __init__(self, arg): + self._state = "Not OK" + self.set_up(arg) + self._state = "OK" + + def set_up(self, arg): + "Do some set up" + +class Sub(Super): + + def __init__(self, arg): + Super.__init__(self, arg) + self.important_state = "OK" + + def set_up(self, arg): + Super.set_up(self, arg) + "Do some more set up" # Dangerous as self._state is "Not OK" + + +#Improved version with inheritance: + +class Super(object): + + def __init__(self, arg): + self._state = "Not OK" + self.super_set_up(arg) + self._state = "OK" + + def super_set_up(self, arg): + "Do some set up" + + +class Sub(Super): + + def __init__(self, arg): + Super.__init__(self, arg) + self.sub_set_up(self, arg) + self.important_state = "OK" + + + def sub_set_up(self, arg): + "Do some more set up" + + diff --git a/python/ql/src/Classes/InitCallsSubclassMethod.qhelp b/python/ql/src/Classes/InitCallsSubclassMethod.qhelp new file mode 100644 index 00000000000..72904a0bd29 --- /dev/null +++ b/python/ql/src/Classes/InitCallsSubclassMethod.qhelp @@ -0,0 +1,42 @@ + + + +

    +When an instance of a class is initialized, the super-class state should be +fully initialized before it becomes visible to the subclass. +Calling methods of the subclass in the superclass' __init__ +method violates this important invariant. +

    + +
    + + +

    Do not use methods that are subclassed in the construction of an object. +For simpler cases move the initialization into the superclass' __init__ method, +preventing it being overridden. Additional initialization of subclass should +be done in the __init__ method of the subclass. +For more complex cases, it is advisable to use a static method or function to manage +object creation. +

    + +

    Alternatively, avoid inheritance altogether using composition instead.

    + +
    + + + + + + + +
  • CERT Secure Coding: +Rule MET05-J. Although this is a Java rule it applies to most object-oriented languages.
  • +
  • Python Standard Library: Static methods.
  • +
  • Wikipedia: Composition over inheritance.
  • + + + +
    +
    diff --git a/python/ql/src/Classes/InitCallsSubclassMethod.ql b/python/ql/src/Classes/InitCallsSubclassMethod.ql new file mode 100644 index 00000000000..5a191d861bf --- /dev/null +++ b/python/ql/src/Classes/InitCallsSubclassMethod.ql @@ -0,0 +1,35 @@ +/** + * @name __init__ method calls overridden method + * @description Calling a method from __init__ that is overridden by a subclass may result in a partially + * initialized instance being observed. + * @kind problem + * @tags reliability + * correctness + * @problem.severity warning + * @sub-severity low + * @precision high + * @id py/init-calls-subclass + */ + +import python + + +from ClassObject supercls, string method, Call call, + FunctionObject overriding, FunctionObject overridden + +where +exists(FunctionObject init, SelfAttribute sa | + supercls.declaredAttribute("__init__") = init and + call.getScope() = init.getFunction() and call.getFunc() = sa | + sa.getName() = method and + overridden = supercls.declaredAttribute(method) and + overriding.overrides(overridden) +) + +select call, "Call to self.$@ in __init__ method, which is overridden by $@.", + overridden, method, + overriding, overriding.descriptiveString() + + + + diff --git a/python/ql/src/Classes/MaybeUndefinedClassAttribute.py b/python/ql/src/Classes/MaybeUndefinedClassAttribute.py new file mode 100644 index 00000000000..56ef9805196 --- /dev/null +++ b/python/ql/src/Classes/MaybeUndefinedClassAttribute.py @@ -0,0 +1,25 @@ +class Spam: + + def __init__(self): + self.spam = 'spam, spam, spam' + + def set_eggs(eggs): + self.eggs = eggs + + def __str__(self): + return '%s and %s' % (self.spam, self.eggs) # Maybe uninitialized attribute 'eggs' + +#Fixed version + +class Spam: + + def __init__(self): + self.spam = 'spam, spam, spam' + self.eggs = None + + def set_eggs(eggs): + self.eggs = eggs + + def __str__(self): + return '%s and %s' % (self.spam, self.eggs) # OK + diff --git a/python/ql/src/Classes/MaybeUndefinedClassAttribute.qhelp b/python/ql/src/Classes/MaybeUndefinedClassAttribute.qhelp new file mode 100644 index 00000000000..d4be086c535 --- /dev/null +++ b/python/ql/src/Classes/MaybeUndefinedClassAttribute.qhelp @@ -0,0 +1,32 @@ + + + + + +

    A possibly non-existent attribute of self is accessed in a method. +The attribute is set in another method of the class, but may be uninitialized if the +method that uses the attribute is called before the one that sets it. +This may result in an AttributeError at run time. +

    + +
    + + +

    Ensure that all attributes are initialized in the __init__ method.

    + + +
    + + + + + + + +
  • Python Standard Library: exception AttributeError. +
  • + +
    +
    diff --git a/python/ql/src/Classes/MaybeUndefinedClassAttribute.ql b/python/ql/src/Classes/MaybeUndefinedClassAttribute.ql new file mode 100644 index 00000000000..234cf9e2973 --- /dev/null +++ b/python/ql/src/Classes/MaybeUndefinedClassAttribute.ql @@ -0,0 +1,40 @@ +/** + * @name Maybe undefined class attribute + * @description Accessing an attribute of 'self' that is not initialized in the __init__ method may cause an AttributeError at runtime + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision low + * @id py/maybe-undefined-attribute + */ + +import python +import semmle.python.SelfAttribute + +predicate guarded_by_other_attribute(SelfAttributeRead a, CheckClass c) { + c.sometimesDefines(a.getName()) and + exists(SelfAttributeRead guard, If i | + i.contains(a) and + c.assignedInInit(guard.getName()) | + i.getTest() = guard + or + i.getTest().contains(guard) + ) +} + + +predicate maybe_undefined_class_attribute(SelfAttributeRead a, CheckClass c) { + c.sometimesDefines(a.getName()) and + not c.alwaysDefines(a.getName()) and + c.interestingUndefined(a) and + not guarded_by_other_attribute(a, c) +} + +from Attribute a, ClassObject c, SelfAttributeStore sa +where maybe_undefined_class_attribute(a, c) and +sa.getClass() = c.getPyClass() and sa.getName() = a.getName() +select a, "Attribute '" + a.getName() + +"' is not defined in the class body nor in the __init__() method, but it is defined $@", sa, "here" + diff --git a/python/ql/src/Classes/MethodCallOrder.qll b/python/ql/src/Classes/MethodCallOrder.qll new file mode 100644 index 00000000000..fe6ef07266a --- /dev/null +++ b/python/ql/src/Classes/MethodCallOrder.qll @@ -0,0 +1,67 @@ +import python + +// Helper predicates for multiple call to __init__/__del__ queries. + +pragma [noinline] +private predicate multiple_invocation_paths(FunctionInvocation top, FunctionInvocation i1, FunctionInvocation i2, FunctionObject multi) { + i1 != i2 and + i1 = top.getACallee+() and + i2 = top.getACallee+() and + i1.getFunction() = multi and + i2.getFunction() = multi +} + +/** Holds if `self.name` calls `multi` by multiple paths, and thus calls it more than once. */ +predicate multiple_calls_to_superclass_method(ClassObject self, FunctionObject multi, string name) { + exists(FunctionInvocation top, FunctionInvocation i1, FunctionInvocation i2 | + multiple_invocation_paths(top, i1, i2, multi) and + top.runtime(self.declaredAttribute(name)) and + self.getASuperType().declaredAttribute(name) = multi | + /* Only called twice if called from different functions, + * or if one call-site can reach the other */ + i1.getCall().getScope() != i2.getCall().getScope() + or + i1.getCall().strictlyReaches(i2.getCall()) + ) +} + +/** Holds if all attributes called `name` can be inferred to be methods. */ +private predicate named_attributes_not_method(ClassObject cls, string name) { + cls.declaresAttribute(name) and not cls.declaredAttribute(name) instanceof FunctionObject +} + +/** Holds if `f` actually does something. */ +private predicate does_something(FunctionObject f) { + f.isBuiltin() and not f = theObjectType().lookupAttribute("__init__") + or + exists(Stmt s | s = f.getFunction().getAStmt() and not s instanceof Pass) +} + +/** Holds if `meth` looks like it should have a call to `name`, but does not */ +private predicate missing_call(FunctionObject meth, string name) { + exists(CallNode call, AttrNode attr | + call.getScope() = meth.getFunction() and + call.getFunction() = attr and + attr.getName() = name and + not exists(FunctionObject f | f.getACall() = call) + ) +} + +/** Holds if `self.name` does not call `missing`, even though it is expected to. */ +predicate missing_call_to_superclass_method(ClassObject self, FunctionObject top, FunctionObject missing, string name) { + missing = self.getASuperType().declaredAttribute(name) and + top = self.lookupAttribute(name) and + /* There is no call to missing originating from top */ + not top.getACallee*() = missing and + /* Make sure that all named 'methods' are objects that we can understand. */ + not exists(ClassObject sup | + sup = self.getAnImproperSuperType() and + named_attributes_not_method(sup, name) + ) and + not self.isAbstract() + and + does_something(missing) + and + not missing_call(top, name) +} + diff --git a/python/ql/src/Classes/MissingCallToDel.py b/python/ql/src/Classes/MissingCallToDel.py new file mode 100644 index 00000000000..37520071b3a --- /dev/null +++ b/python/ql/src/Classes/MissingCallToDel.py @@ -0,0 +1,26 @@ + +class Vehicle(object): + + def __del__(self): + recycle(self.base_parts) + +class Car(Vehicle): + + def __del__(self): + recycle(self.car_parts) + Vehicle.__del__(self) + +#Car.__del__ is missed out. +class SportsCar(Car, Vehicle): + + def __del__(self): + recycle(self.sports_car_parts) + Vehicle.__del__(self) + +#Fix SportsCar by calling Car.__del__ +class FixedSportsCar(Car, Vehicle): + + def __del__(self): + recycle(self.sports_car_parts) + Car.__del__(self) + diff --git a/python/ql/src/Classes/MissingCallToDel.qhelp b/python/ql/src/Classes/MissingCallToDel.qhelp new file mode 100644 index 00000000000..864ddd1b56b --- /dev/null +++ b/python/ql/src/Classes/MissingCallToDel.qhelp @@ -0,0 +1,50 @@ + + + + +

    Python, unlike statically typed languages such as Java, allows complete freedom when calling methods during object destruction. +However, standard object-oriented principles apply to Python classes using deep inheritance hierarchies. +Therefore the developer has responsibility for ensuring that objects are properly cleaned up when +there are multiple __del__ methods that need to be called. +

    +

    +If the __del__ method of a superclass is not called during object destruction it is likely that +that resources may be leaked. +

    + +

    A call to the __del__ method of a superclass during object destruction may be omitted: +

    + + + +
    + +

    Either be careful to explicitly call the __del__ of the correct base class, or +use super() throughout the inheritance hierarchy.

    + +

    Alternatively refactor one or more of the classes to use composition rather than inheritance.

    + + +
    + +

    In this example, explicit calls to __del__ are used, but SportsCar erroneously calls +Vehicle.__del__. This is fixed in FixedSportsCar by calling Car.__del__. +

    + + + +
    + + +
  • Python Tutorial: Classes.
  • +
  • Python Standard Library: super.
  • +
  • Artima Developer: Things to Know About Python Super.
  • +
  • Wikipedia: Composition over inheritance.
  • + +
    +
    diff --git a/python/ql/src/Classes/MissingCallToDel.ql b/python/ql/src/Classes/MissingCallToDel.ql new file mode 100644 index 00000000000..d08c8399669 --- /dev/null +++ b/python/ql/src/Classes/MissingCallToDel.ql @@ -0,0 +1,26 @@ +/** + * @name Missing call to __del__ during object destruction + * @description An omitted call to a super-class __del__ method may lead to class instances not being cleaned up properly. + * @kind problem + * @tags efficiency + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/missing-call-to-delete + */ + +import python +import MethodCallOrder + + +from ClassObject self, FunctionObject missing + +where + missing_call_to_superclass_method(self, _, missing, "__del__") and + not missing.neverReturns() and + not self.failedInference() and + not missing.isBuiltin() +select self, "Class " + self.getName() + " may not be cleaned up properly as $@ is not called during deletion.", +missing, missing.descriptiveString() + diff --git a/python/ql/src/Classes/MissingCallToInit.py b/python/ql/src/Classes/MissingCallToInit.py new file mode 100644 index 00000000000..1b3e0e3aee5 --- /dev/null +++ b/python/ql/src/Classes/MissingCallToInit.py @@ -0,0 +1,26 @@ + +class Vehicle(object): + + def __init__(self): + self.mobile = True + +class Car(Vehicle): + + def __init__(self): + Vehicle.__init__(self) + self.car_init() + +#Car.__init__ is missed out. +class SportsCar(Car, Vehicle): + + def __init__(self): + Vehicle.__init__(self) + self.sports_car_init() + +#Fix SportsCar by calling Car.__init__ +class FixedSportsCar(Car, Vehicle): + + def __init__(self): + Car.__init__(self) + self.sports_car_init() + diff --git a/python/ql/src/Classes/MissingCallToInit.qhelp b/python/ql/src/Classes/MissingCallToInit.qhelp new file mode 100644 index 00000000000..31ad3d693a5 --- /dev/null +++ b/python/ql/src/Classes/MissingCallToInit.qhelp @@ -0,0 +1,52 @@ + + + + +

    Python, unlike statically typed languages such as Java, allows complete freedom when calling methods during object initialization. +However, standard object-oriented principles apply to Python classes using deep inheritance hierarchies. +Therefore the developer has responsibility for ensuring that objects are properly initialized when +there are multiple __init__ methods that need to be called. +

    +

    +If the __init__ method of a superclass is not called during object initialization it is likely that +that object will end up in an incorrect state. +

    + +

    A call to the __init__ method of a superclass during object initialization may be omitted: +

    + + + +
    + +

    Either be careful to explicitly call the __init__ of the correct base class, or +use super() throughout the inheritance hierarchy.

    + +

    Alternatively refactor one or more of the classes to use composition rather than inheritance.

    + + +
    + +

    In this example, explicit calls to __init__ are used, but SportsCar erroneously calls +Vehicle.__init__. This is fixed in FixedSportsCar by calling Car.__init__. +

    + + + +
    + + +
  • Python Tutorial: Classes.
  • +
  • Python Standard Library: super.
  • +
  • Artima Developer: Things to Know About Python Super.
  • +
  • Wikipedia: Composition over inheritance.
  • + +
    +
    diff --git a/python/ql/src/Classes/MissingCallToInit.ql b/python/ql/src/Classes/MissingCallToInit.ql new file mode 100644 index 00000000000..ad137f817f4 --- /dev/null +++ b/python/ql/src/Classes/MissingCallToInit.ql @@ -0,0 +1,28 @@ +/** + * @name Missing call to __init__ during object initialization + * @description An omitted call to a super-class __init__ method may lead to objects of this class not being fully initialized. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/missing-call-to-init + */ + +import python +import MethodCallOrder + +from ClassObject self, FunctionObject initializer, FunctionObject missing + +where + self.lookupAttribute("__init__") = initializer and + missing_call_to_superclass_method(self, initializer, missing, "__init__") and + // If a superclass is incorrect, don't flag this class as well. + not missing_call_to_superclass_method(self.getASuperType(), _, missing, "__init__") and + not missing.neverReturns() and + not self.failedInference() and + not missing.isBuiltin() and + not self.isAbstract() +select self, "Class " + self.getName() + " may not be initialized properly as $@ is not called from its $@.", +missing, missing.descriptiveString(), initializer, "__init__ method" \ No newline at end of file diff --git a/python/ql/src/Classes/MutatingDescriptor.py b/python/ql/src/Classes/MutatingDescriptor.py new file mode 100644 index 00000000000..eb73aace928 --- /dev/null +++ b/python/ql/src/Classes/MutatingDescriptor.py @@ -0,0 +1,40 @@ + +#This is prone to strange side effects and race conditions. +class MutatingDescriptor(object): + + def __init__(self, func): + self.my_func = func + + def __get__(self, obj, obj_type): + #Modified state is visible to all instances of C that might call "show". + self.my_obj = obj + return self + + def __call__(self, *args): + return self.my_func(self.my_obj, *args) + +def show(obj): + print (obj) + +class C(object): + + def __init__(self, value): + self.value = value + + def __str__(self): + return ("C: " + str(self.value)) + + show = MutatingDescriptor(show) + +c1 = C(1) +c1.show() +c2 = C(2) +c2.show() +c1_show = c1.show +c2.show +c1_show() + +#Outputs: +#C: 1 +#C: 2 +#C: 2 \ No newline at end of file diff --git a/python/ql/src/Classes/MutatingDescriptor.qhelp b/python/ql/src/Classes/MutatingDescriptor.qhelp new file mode 100644 index 00000000000..a424c47dfb0 --- /dev/null +++ b/python/ql/src/Classes/MutatingDescriptor.qhelp @@ -0,0 +1,42 @@ + + + + +

    The descriptor protocol allows user programmable attribute access. +The descriptor protocol is what enables class methods, static methods, properties and super(). +

    + +

    +Descriptor objects are class attributes which control the behavior of instance attributes. Consequently, a single descriptor +is common to all instances of a class and should not be mutated when instance attributes are accessed. +

    + + +
    + + +

    Do not mutate the descriptor object, rather create a new object that contains the necessary state.

    + + +
    + +

    In this example the descriptor class MutatingDescriptor stores a reference to obj in an attribute. +

    + +

    In the following example, the descriptor class NonMutatingDescriptor returns a new object every time __get__ +is called. + +

    + +
    + + +
  • Python Language Reference: Implementing Descriptors.
  • +
  • Mark Lutz. Learning Python, Section 30.6: Methods Are Objects: Bound or Unbound. O'Reilly 2013.
  • +
  • A real world example: NumPy Issue 5247.
  • + + +
    +
    diff --git a/python/ql/src/Classes/MutatingDescriptor.ql b/python/ql/src/Classes/MutatingDescriptor.ql new file mode 100644 index 00000000000..328c1fe86ab --- /dev/null +++ b/python/ql/src/Classes/MutatingDescriptor.ql @@ -0,0 +1,28 @@ +/** + * @name Mutation of descriptor in __get__ or __set__ method. + * @description Descriptor objects can be shared across many instances. Mutating them can cause strange side effects or race conditions. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/mutable-descriptor + */ + +import python + +predicate mutates_descriptor(ClassObject cls, SelfAttributeStore s) { + cls.isDescriptorType() and + exists(PyFunctionObject f | + cls.lookupAttribute(_) = f and + not f.getName() = "__init__" and + s.getScope() = f.getFunction() + ) +} + +from ClassObject cls, SelfAttributeStore s +where +mutates_descriptor(cls, s) + +select s, "Mutation of descriptor $@ object may lead to action-at-a-distance effects or race conditions for properties.", cls, cls.getName() \ No newline at end of file diff --git a/python/ql/src/Classes/MutatingDescriptorFixed.py b/python/ql/src/Classes/MutatingDescriptorFixed.py new file mode 100644 index 00000000000..113a3e69d35 --- /dev/null +++ b/python/ql/src/Classes/MutatingDescriptorFixed.py @@ -0,0 +1,37 @@ +import types + +#Immutable version, which is safe to share. +class NonMutatingDescriptor(object): + + def __init__(self, func): + self.my_func = func + + def __get__(self, obj, obj_type): + #Return a new object to each access. + return types.MethodType(self.my_func, obj) + +def show(obj): + print (obj) + +class C(object): + + def __init__(self, value): + self.value = value + + def __str__(self): + return ("C: " + str(self.value)) + + show = NonMutatingDescriptor(show) + +c1 = C(1) +c1.show() +c2 = C(2) +c2.show() +c1_show = c1.show +c2.show +c1_show() + +#Outputs: +#C: 1 +#C: 2 +#C: 1 \ No newline at end of file diff --git a/python/ql/src/Classes/OverwritingAttributeInSuperClass.py b/python/ql/src/Classes/OverwritingAttributeInSuperClass.py new file mode 100644 index 00000000000..4322093634e --- /dev/null +++ b/python/ql/src/Classes/OverwritingAttributeInSuperClass.py @@ -0,0 +1,35 @@ + +#Attribute set in both superclass and subclass +class C(object): + + def __init__(self): + self.var = 0 + +class D(C): + + def __init__(self): + self.var = 1 # self.var will be overwritten + C.__init__(self) + +class E(object): + + def __init__(self): + self.var = 0 # self.var will be overwritten + +class F(E): + + def __init__(self): + E.__init__(self) + self.var = 1 + +#Fixed version -- Pass explicitly as a parameter +class G(object): + + def __init__(self, var = 0): + self.var = var + +class H(G): + + def __init__(self): + G.__init__(self, 1) + diff --git a/python/ql/src/Classes/OverwritingAttributeInSuperClass.qhelp b/python/ql/src/Classes/OverwritingAttributeInSuperClass.qhelp new file mode 100644 index 00000000000..667ad35e02d --- /dev/null +++ b/python/ql/src/Classes/OverwritingAttributeInSuperClass.qhelp @@ -0,0 +1,29 @@ + + + +

    +Subclasses should not set attributes that are set in the superclass. +Doing so may violate invariants in the superclass.

    + +
    + + +

    +If you did not intend to override the attribute value set in the superclass, +then rename the subclass attribute. +If you do want to be able to set a new value for the attribute of the superclass, +then convert the superclass attribute to a property. +Otherwise the value should be passed as a parameter to the superclass +__init__ method. +

    + +
    + + + + + + +
    diff --git a/python/ql/src/Classes/OverwritingAttributeInSuperClass.ql b/python/ql/src/Classes/OverwritingAttributeInSuperClass.ql new file mode 100644 index 00000000000..bd2be2ed379 --- /dev/null +++ b/python/ql/src/Classes/OverwritingAttributeInSuperClass.ql @@ -0,0 +1,71 @@ +/** + * @name Overwriting attribute in super-class or sub-class + * @description Assignment to self attribute overwrites attribute previously defined in subclass or superclass __init__ method. + * @kind problem + * @tags reliability + * maintainability + * modularity + * @problem.severity warning + * @sub-severity low + * @precision medium + * @id py/overwritten-inherited-attribute + */ + +import python + + +class InitCallStmt extends ExprStmt { + + InitCallStmt() { + exists(Call call, Attribute attr | call = this.getValue() and attr = call.getFunc() | + attr.getName() = "__init__") + } + +} + +predicate overwrites_which(Function subinit, AssignStmt write_attr, string which) { + write_attr.getScope() = subinit and self_write_stmt(write_attr, _) and + exists(Stmt top | top.contains(write_attr) or top = write_attr | + (exists(int i, int j, InitCallStmt call | call.getScope() = subinit | i > j and top = subinit.getStmt(i) and call = subinit.getStmt(j) and which = "superclass") + or + exists(int i, int j, InitCallStmt call | call.getScope() = subinit | i < j and top = subinit.getStmt(i) and call = subinit.getStmt(j) and which = "subclass") + ) + ) +} + +predicate self_write_stmt(Stmt s, string attr) { + exists(Attribute a, Name self | self = a.getObject() and s.contains(a) and self.getId() = "self" and a.getCtx() instanceof Store and a.getName() = attr) +} + +predicate both_assign_attribute(Stmt s1, Stmt s2, Function f1, Function f2) { + exists(string name | s1.getScope() = f1 and s2.getScope() = f2 and self_write_stmt(s1, name) and self_write_stmt(s2, name)) +} + +predicate attribute_overwritten(AssignStmt overwrites, AssignStmt overwritten, string name, string classtype, string classname) +{ + exists(FunctionObject superinit, FunctionObject subinit, ClassObject superclass, ClassObject subclass, AssignStmt subattr, AssignStmt superattr | + (classtype = "superclass" and classname = superclass.getName() and overwrites = subattr and overwritten = superattr or + classtype = "subclass" and classname = subclass.getName() and overwrites = superattr and overwritten = subattr) + and + /* OK if overwritten in subclass and is a class attribute */ + (not exists(superclass.declaredAttribute(name)) or classtype = "subclass") + and + superclass.declaredAttribute("__init__") = superinit + and + subclass.declaredAttribute("__init__") = subinit + and + superclass = subclass.getASuperType() + and + overwrites_which(subinit.getFunction(), subattr, classtype) + and + both_assign_attribute(subattr, superattr, subinit.getFunction(), superinit.getFunction()) + and + self_write_stmt(superattr, name) + ) +} + + +from string classtype, AssignStmt overwrites, AssignStmt overwritten, string name, string classname +where attribute_overwritten(overwrites, overwritten, name, classtype, classname) + +select overwrites, "Assignment overwrites attribute " + name + ", which was previously defined in " + classtype + " $@.", overwritten, classname diff --git a/python/ql/src/Classes/PropertyInOldStyleClass.py b/python/ql/src/Classes/PropertyInOldStyleClass.py new file mode 100644 index 00000000000..73ab9ac7ae2 --- /dev/null +++ b/python/ql/src/Classes/PropertyInOldStyleClass.py @@ -0,0 +1,42 @@ + +class OldStyle: + + def __init__(self, x): + self._x = x + + # Incorrect: 'OldStyle' is not a new-style class and '@property' is not supported + @property + def x(self): + return self._x + + +class InheritOldStyle(OldStyle): + + def __init__(self, x): + self._x = x + + # Incorrect: 'InheritOldStyle' is not a new-style class and '@property' is not supported + @property + def x(self): + return self._x + + +class NewStyle(object): + + def __init__(self, x): + self._x = x + + # Correct: 'NewStyle' is a new-style class and '@property' is supported + @property + def x(self): + return self._x + +class InheritNewStyle(NewStyle): + + def __init__(self, x): + self._x = x + + # Correct: 'InheritNewStyle' inherits from a new-style class and '@property' is supported + @property + def x(self): + return self._x diff --git a/python/ql/src/Classes/PropertyInOldStyleClass.qhelp b/python/ql/src/Classes/PropertyInOldStyleClass.qhelp new file mode 100644 index 00000000000..1236cb785c9 --- /dev/null +++ b/python/ql/src/Classes/PropertyInOldStyleClass.qhelp @@ -0,0 +1,43 @@ + + + + + +

    Property descriptors are only supported for the new-style classes that were introduced in Python +2.1. Property descriptors should only be used in new-style classes.

    + +
    + + +

    If you want to define properties in a class, then ensure that the class is a new-style class. You can +convert an old-style class to a new-style class by inheriting from object.

    + +
    + +

    In the following example all the classes attempt to set a property for x. However, only +the third and fourth classes are new-style classes. Consequently, the x +property is only available for the NewStyle and InheritNewStyle classes.

    + +

    If you define the OldStyle class as inheriting from a new-style class, then the x + property would be available for both the OldStyle and InheritOldStyle classes.

    + + + + +
    + + +
  • Python Glossary: New-style class.
  • +
  • Python Language Reference: New-style and classic +classes, + +Descriptors.
  • +
  • Python Standard Library: Property.
  • +
  • The History of Python: +Inside story on new-style classes.
  • + + +
    +
    diff --git a/python/ql/src/Classes/PropertyInOldStyleClass.ql b/python/ql/src/Classes/PropertyInOldStyleClass.ql new file mode 100644 index 00000000000..fb2c822a573 --- /dev/null +++ b/python/ql/src/Classes/PropertyInOldStyleClass.ql @@ -0,0 +1,17 @@ +/** + * @name Property in old-style class + * @description Using property descriptors in old-style classes does not work from Python 2.1 onward. + * @kind problem + * @tags portability + * correctness + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/property-in-old-style-class + */ + +import python + +from PropertyObject prop, ClassObject cls +where cls.declaredAttribute(_) = prop and not cls.failedInference() and not cls.isNewStyle() +select prop, "Property " + prop.getName() + " will not work properly, as class " + cls.getName() + " is an old-style class." diff --git a/python/ql/src/Classes/ShouldBeContextManager.py b/python/ql/src/Classes/ShouldBeContextManager.py new file mode 100644 index 00000000000..94e890192e3 --- /dev/null +++ b/python/ql/src/Classes/ShouldBeContextManager.py @@ -0,0 +1,33 @@ +class remotelock(object): # Resources can be released using __del__ + + def __init__(self, repo): + self.repo = repo + + def release(self): + self.repo.unlock() + self.repo = None + + def __del__(self): + if self.repo: + self.release() + + +class remotelock2(object): # Resources can be released using a with statement + + def __init__(self, repo): + self.repo = repo + + def __enter__(self): + return self + + def release(self): + self.repo.unlock() + self.repo = None + + def __del__(self): + if self.repo: + self.release() + + def __exit__(self, exct_type, exce_value, traceback): + if self.repo: + self.release() diff --git a/python/ql/src/Classes/ShouldBeContextManager.qhelp b/python/ql/src/Classes/ShouldBeContextManager.qhelp new file mode 100644 index 00000000000..7c497688499 --- /dev/null +++ b/python/ql/src/Classes/ShouldBeContextManager.qhelp @@ -0,0 +1,42 @@ + + + +

    If a class has a close() or similar method to release resources, then it +should be made a context manager. Using a context manager allows instances of the class to be used +in the with statement, improving code size and readability. This is a simpler and more +reliable method than implementing just a __del__ method. +

    + +
    + +

    The context manager requires an __enter__ and an __exit__ method:

    + + +
    + +

    The following example shows how a class definition that implements __del__ can be +updated to use a context manager.

    + + + + +
    + + +
  • Effbot: Python with statement.
  • +
  • Python Standard Library: Context manager +.
  • +
  • Python Language Reference: +With Statement Context Managers.
  • +
  • Python PEP 343: The "with" Statement.
  • + +
    +
    diff --git a/python/ql/src/Classes/ShouldBeContextManager.ql b/python/ql/src/Classes/ShouldBeContextManager.ql new file mode 100644 index 00000000000..d7394728935 --- /dev/null +++ b/python/ql/src/Classes/ShouldBeContextManager.ql @@ -0,0 +1,19 @@ +/** + * @name Class should be a context manager + * @description Making a class a context manager allows instances to be used in a 'with' statement. + * This improves resource handling and code readability. + * @kind problem + * @tags maintainability + * readability + * convention + * @problem.severity recommendation + * @sub-severity high + * @precision medium + * @id py/should-be-context-manager + */ + +import python + +from ClassObject c +where not c.isC() and not c.isContextManager() and exists(c.declaredAttribute("__del__")) +select c, "Class " + c.getName() + " implements __del__ (presumably to release some resource). Consider making it a context manager." diff --git a/python/ql/src/Classes/SlotsInOldStyleClass.py b/python/ql/src/Classes/SlotsInOldStyleClass.py new file mode 100644 index 00000000000..67858d9fd4e --- /dev/null +++ b/python/ql/src/Classes/SlotsInOldStyleClass.py @@ -0,0 +1,20 @@ +class Point: + + __slots__ = [ '_x', '_y' ] # Incorrect: 'Point' is an old-style class. + # No slots are created. + # Instances of Point have an attribute dictionary. + + def __init__(self, x, y): + self._x = x + self._y = y + + +class Point2(object): + + __slots__ = [ '_x', '_y' ] # Correct: 'Point2' is an new-style class + # Two slots '_x' and '_y' are created. + # Instances of Point2 have no attribute dictionary. + + def __init__(self, x, y): + self._x = x + self._y = y diff --git a/python/ql/src/Classes/SlotsInOldStyleClass.qhelp b/python/ql/src/Classes/SlotsInOldStyleClass.qhelp new file mode 100644 index 00000000000..eb7208d6257 --- /dev/null +++ b/python/ql/src/Classes/SlotsInOldStyleClass.qhelp @@ -0,0 +1,37 @@ + + + + +

    The ability to override the class dictionary using a __slots__ declaration +is supported only by new-style classes. When you add a __slots__ declaration to an +old-style class it just creates a class attribute called '__slots__'.

    + +
    + + +

    If you want to override the dictionary for a class, then ensure that the class is a new-style class. +You can convert an old-style class to a new-style class by inheriting from object.

    + +
    + +

    In the following example the KeyedRef class is an old-style class (no inheritance). The +__slots__ declaration in this class creates a class attribute called '__slots__', the class +dictionary is unaffected. The KeyedRef2 class is a new-style class so the +__slots__ declaration causes special compact attributes to be created for each name in +the slots list and saves space by not creating attribute dictionaries.

    + + + +
    + + +
  • Python Glossary: New-style class.
  • +
  • Python Language Reference: New-style and classic +classes, + __slots__.
  • + + +
    +
    diff --git a/python/ql/src/Classes/SlotsInOldStyleClass.ql b/python/ql/src/Classes/SlotsInOldStyleClass.ql new file mode 100644 index 00000000000..78b9bd18f29 --- /dev/null +++ b/python/ql/src/Classes/SlotsInOldStyleClass.ql @@ -0,0 +1,18 @@ +/** + * @name '__slots__' in old-style class + * @description Overriding the class dictionary by declaring '__slots__' is not supported by old-style + * classes. + * @kind problem + * @problem.severity error + * @tags portability + * correctness + * @sub-severity low + * @precision very-high + * @id py/slots-in-old-style-class + */ + +import python + +from ClassObject c +where not c.isNewStyle() and c.declaresAttribute("__slots__") and not c.failedInference() +select c, "Using __slots__ in an old style class just creates a class attribute called '__slots__'" diff --git a/python/ql/src/Classes/SubclassShadowing.py b/python/ql/src/Classes/SubclassShadowing.py new file mode 100644 index 00000000000..617db3c58e0 --- /dev/null +++ b/python/ql/src/Classes/SubclassShadowing.py @@ -0,0 +1,17 @@ +class Mammal(object): + + def __init__(self, milk = 0): + self.milk = milk + + +class Cow(Mammal): + + def __init__(self): + Mammal.__init__(self) + + def milk(self): + return "Milk" + +#Cow().milk() will raise an error as Cow().milk is the 'milk' attribute +#set in Mammal.__init__, not the 'milk' method defined on Cow. + diff --git a/python/ql/src/Classes/SubclassShadowing.qhelp b/python/ql/src/Classes/SubclassShadowing.qhelp new file mode 100644 index 00000000000..90daa9a992a --- /dev/null +++ b/python/ql/src/Classes/SubclassShadowing.qhelp @@ -0,0 +1,27 @@ + + + +

    Subclass shadowing occurs when an instance attribute of a superclass has the +the same name as a method of a subclass, or vice-versa. +The semantics of Python attribute look-up mean that the instance attribute of +the superclass hides the method in the subclass. +

    + +
    + + +

    Rename the method in the subclass or rename the attribute in the superclass.

    + +
    + +

    The following code includes an example of subclass shadowing. When you call Cow().milk() +an error is raised because Cow().milk is interpreted as the 'milk' attribute set in +Mammal.__init__, not the 'milk' method defined within Cow. This can be fixed +by changing the name of either the 'milk' attribute or the 'milk' method.

    + + + +
    +
    diff --git a/python/ql/src/Classes/SubclassShadowing.ql b/python/ql/src/Classes/SubclassShadowing.ql new file mode 100644 index 00000000000..6cdd9edf01d --- /dev/null +++ b/python/ql/src/Classes/SubclassShadowing.ql @@ -0,0 +1,40 @@ +/** + * @name Superclass attribute shadows subclass method + * @description Defining an attribute in a superclass method with a name that matches a subclass + * method, hides the method in the subclass. + * @kind problem + * @problem.severity error + * @tags maintainability + * correctness + * @sub-severity low + * @precision high + * @id py/attribute-shadows-method + */ + +/* Determine if a class defines a method that is shadowed by an attribute + defined in a super-class +*/ + +/* Need to find attributes defined in superclass (only in __init__?) */ + +import python + +predicate shadowed_by_super_class(ClassObject c, ClassObject supercls, Assign assign, FunctionObject f) +{ + c.getASuperType() = supercls and c.declaredAttribute(_) = f and + exists(FunctionObject init, Attribute attr | + supercls.declaredAttribute("__init__") = init and + attr = assign.getATarget() and + ((Name)attr.getObject()).getId() = "self" and + attr.getName() = f.getName() and + assign.getScope() = ((FunctionExpr)init.getOrigin()).getInnerScope() + ) and + /* It's OK if the super class defines the method as well. + * We assume that the original method must have been defined for a reason. */ + not supercls.hasAttribute(f.getName()) +} + +from ClassObject c, ClassObject supercls, Assign assign, FunctionObject shadowed +where shadowed_by_super_class(c, supercls, assign, shadowed) +select shadowed.getOrigin(), "Method " + shadowed.getName() + " is shadowed by $@ in super class '"+ supercls.getName() + "'.", assign, "an attribute" + diff --git a/python/ql/src/Classes/SuperInOldStyleClass.py b/python/ql/src/Classes/SuperInOldStyleClass.py new file mode 100644 index 00000000000..815b9816735 --- /dev/null +++ b/python/ql/src/Classes/SuperInOldStyleClass.py @@ -0,0 +1,18 @@ +class PythonModule(_ModuleIteratorHelper): # '_ModuleIteratorHelper' and 'PythonModule' are old-style classes + + # class definitions .... + + def walkModules(self, importPackages=False): + if importPackages and self.isPackage(): + self.load() + return super(PythonModule, self).walkModules(importPackages=importPackages) # super() will fail + + +class PythonModule2(_ModuleIteratorHelper): # call to super replaced with direct call to class + + # class definitions .... + + def walkModules(self, importPackages=False): + if importPackages and self.isPackage(): + self.load() + return _ModuleIteratorHelper.__init__(PythonModule, self).walkModules(importPackages=importPackages) diff --git a/python/ql/src/Classes/SuperInOldStyleClass.qhelp b/python/ql/src/Classes/SuperInOldStyleClass.qhelp new file mode 100644 index 00000000000..6899c08dc21 --- /dev/null +++ b/python/ql/src/Classes/SuperInOldStyleClass.qhelp @@ -0,0 +1,38 @@ + + + + +

    The ability to access inherited methods that have been overridden in a class using super() +is supported only by new-style classes. When you use the super() function in an old-style +class it fails.

    + +
    + +

    If you want to access inherited methods using the super() built-in, then ensure that +the class is a new-style class. You can convert an old-style class to a new-style class by inheriting +from object. Alternatively, you can call the __init__ method of the superclass +directly from an old-style class using: BaseClass.__init__(...).

    + +
    + +

    In the following example, PythonModule is an old-style class as it inherits from another +old-style class. If the _ModuleIteratorHelper class cannot be converted into a new-style +class, then the call to super() must be replaced. The PythonModule2 class +demonstrates the correct way to call a superclass from an old-style class.

    + + + + +
    + + +
  • Python Glossary: New-style class.
  • +
  • Python Language Reference: New-style and classic +classes.
  • +
  • Python Standard Library: super.
  • + + +
    +
    diff --git a/python/ql/src/Classes/SuperInOldStyleClass.ql b/python/ql/src/Classes/SuperInOldStyleClass.ql new file mode 100644 index 00000000000..b6c7649a1ca --- /dev/null +++ b/python/ql/src/Classes/SuperInOldStyleClass.ql @@ -0,0 +1,22 @@ +/** + * @name 'super' in old style class + * @description Using super() to access inherited methods is not supported by old-style classes. + * @kind problem + * @tags portability + * correctness + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/super-in-old-style + */ + +import python + +predicate uses_of_super_in_old_style_class(Call s) { + exists(Function f, ClassObject c | s.getScope() = f and f.getScope() = c.getPyClass() and not c.failedInference() and + not c.isNewStyle() and ((Name)s.getFunc()).getId() = "super") +} + +from Call c +where uses_of_super_in_old_style_class(c) +select c, "super() will not work in old-style classes" \ No newline at end of file diff --git a/python/ql/src/Classes/SuperclassDelCalledMultipleTimes.py b/python/ql/src/Classes/SuperclassDelCalledMultipleTimes.py new file mode 100644 index 00000000000..0ee6e61bcb1 --- /dev/null +++ b/python/ql/src/Classes/SuperclassDelCalledMultipleTimes.py @@ -0,0 +1,29 @@ +#Calling a method multiple times by using explicit calls when a base inherits from other base +class Vehicle(object): + + def __del__(self): + recycle(self.base_parts) + + +class Car(Vehicle): + + def __del__(self): + recycle(self.car_parts) + Vehicle.__del__(self) + + +class SportsCar(Car, Vehicle): + + # Vehicle.__del__ will get called twice + def __del__(self): + recycle(self.sports_car_parts) + Car.__del__(self) + Vehicle.__del__(self) + + +#Fix SportsCar by only calling Car.__del__ +class FixedSportsCar(Car, Vehicle): + + def __del__(self): + recycle(self.sports_car_parts) + Car.__del__(self) diff --git a/python/ql/src/Classes/SuperclassDelCalledMultipleTimes.qhelp b/python/ql/src/Classes/SuperclassDelCalledMultipleTimes.qhelp new file mode 100644 index 00000000000..d9514b2c68c --- /dev/null +++ b/python/ql/src/Classes/SuperclassDelCalledMultipleTimes.qhelp @@ -0,0 +1,58 @@ + + + + +

    Python, unlike statically typed languages such as Java, allows complete freedom when calling methods during object destruction. +However, standard object-oriented principles apply to Python classes using deep inheritance hierarchies. +Therefore the developer has responsibility for ensuring that objects are properly cleaned up when +there are multiple __del__ methods that need to be called. +

    + +

    +Calling a __del__ method more than once during object destruction risks resources being released multiple +times. The relevant __del__ method may not be designed to be called more than once. +

    + +

    There are a number of ways that a __del__ method may be be called more than once.

    + + + +
    + +

    Either be careful not to explicitly call a __del__ method more than once, or +use super() throughout the inheritance hierarchy.

    + +

    Alternatively refactor one or more of the classes to use composition rather than inheritance.

    + +
    + +

    In the first example, explicit calls to __del__ are used, but SportsCar erroneously calls +both Vehicle.__del__ and Car.__del__. +This can be fixed by removing the call to Vehicle.__del__, as shown in FixedSportsCar. +

    + + + +

    In the second example, there is a mixture of explicit calls to __del__ and calls using super(). +To fix this example, super() should be used throughout. +

    + + + +
    + + +
  • Python Tutorial: Classes.
  • +
  • Python Standard Library: super.
  • +
  • Artima Developer: Things to Know About Python Super.
  • +
  • Wikipedia: Composition over inheritance.
  • + + +
    +
    diff --git a/python/ql/src/Classes/SuperclassDelCalledMultipleTimes.ql b/python/ql/src/Classes/SuperclassDelCalledMultipleTimes.ql new file mode 100644 index 00000000000..b0e4a13469d --- /dev/null +++ b/python/ql/src/Classes/SuperclassDelCalledMultipleTimes.ql @@ -0,0 +1,27 @@ +/** + * @name Multiple calls to __del__ during object destruction + * @description A duplicated call to a super-class __del__ method may lead to class instances not be cleaned up properly. + * @kind problem + * @tags efficiency + * correctness + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/multiple-calls-to-delete + */ + +import python +import MethodCallOrder + + +from ClassObject self, FunctionObject multi +where +multiple_calls_to_superclass_method(self, multi, "__del__") and +not multiple_calls_to_superclass_method(self.getABaseType(), multi, "__del__") and +not exists(FunctionObject better | + multiple_calls_to_superclass_method(self, better, "__del__") and + better.overrides(multi) +) and +not self.failedInference() +select self, "Class " + self.getName() + " may not be cleaned up properly as $@ may be called multiple times during destruction.", +multi, multi.descriptiveString() diff --git a/python/ql/src/Classes/SuperclassDelCalledMultipleTimes2.py b/python/ql/src/Classes/SuperclassDelCalledMultipleTimes2.py new file mode 100644 index 00000000000..8cb1433ac0c --- /dev/null +++ b/python/ql/src/Classes/SuperclassDelCalledMultipleTimes2.py @@ -0,0 +1,32 @@ + +#Calling a method multiple times by using explicit calls when a base uses super() +class Vehicle(object): + + def __del__(self): + recycle(self.base_parts) + super(Vehicle, self).__del__() + +class Car(Vehicle): + + def __del__(self): + recycle(self.car_parts) + super(Car, self).__del__() + + +class SportsCar(Car, Vehicle): + + # Vehicle.__del__ will get called twice + def __del__(self): + recycle(self.sports_car_parts) + Car.__del__(self) + Vehicle.__del__(self) + + +#Fix SportsCar by using super() +class FixedSportsCar(Car, Vehicle): + + def __del__(self): + recycle(self.sports_car_parts) + super(SportsCar, self).__del__() + + diff --git a/python/ql/src/Classes/SuperclassInitCalledMultipleTimes.py b/python/ql/src/Classes/SuperclassInitCalledMultipleTimes.py new file mode 100644 index 00000000000..050d5d389d6 --- /dev/null +++ b/python/ql/src/Classes/SuperclassInitCalledMultipleTimes.py @@ -0,0 +1,36 @@ +#Calling a method multiple times by using explicit calls when a base inherits from other base +class Vehicle(object): + + def __init__(self): + self.mobile = True + +class Car(Vehicle): + + def __init__(self): + Vehicle.__init__(self) + self.car_init() + + def car_init(self): + pass + +class SportsCar(Car, Vehicle): + + # Vehicle.__init__ will get called twice + def __init__(self): + Vehicle.__init__(self) + Car.__init__(self) + self.sports_car_init() + + def sports_car_init(self): + pass + +#Fix SportsCar by only calling Car.__init__ +class FixedSportsCar(Car, Vehicle): + + def __init__(self): + Car.__init__(self) + self.sports_car_init() + + def sports_car_init(self): + pass + diff --git a/python/ql/src/Classes/SuperclassInitCalledMultipleTimes.qhelp b/python/ql/src/Classes/SuperclassInitCalledMultipleTimes.qhelp new file mode 100644 index 00000000000..f1140d68b89 --- /dev/null +++ b/python/ql/src/Classes/SuperclassInitCalledMultipleTimes.qhelp @@ -0,0 +1,58 @@ + + + + +

    Python, unlike statically typed languages such as Java, allows complete freedom when calling methods during object initialization. +However, standard object-oriented principles apply to Python classes using deep inheritance hierarchies. +Therefore the developer has responsibility for ensuring that objects are properly initialized when +there are multiple __init__ methods that need to be called. +

    + +

    +Calling an __init__ method more than once during object initialization risks the object being incorrectly initialized. +It is unlikely that the relevant __init__ method is designed to be called more than once. +

    + +

    There are a number of ways that an __init__ method may be be called more than once.

    + + + +
    + +

    Either be careful not to explicitly call an __init__ method more than once, or +use super() throughout the inheritance hierarchy.

    + +

    Alternatively refactor one or more of the classes to use composition rather than inheritance.

    + +
    + +

    In the first example, explicit calls to __init__ are used, but SportsCar erroneously calls +both Vehicle.__init__ and Car.__init__. +This can be fixed by removing the call to Vehicle.__init__, as shown in FixedSportsCar. +

    + + + +

    In the second example, there is a mixture of explicit calls to __init__ and calls using super(). +To fix this example, super() should be used throughout. +

    + + + +
    + + +
  • Python Tutorial: Classes.
  • +
  • Python Standard Library: super.
  • +
  • Artima Developer: Things to Know About Python Super.
  • +
  • Wikipedia: Composition over inheritance.
  • + + +
    +
    diff --git a/python/ql/src/Classes/SuperclassInitCalledMultipleTimes.ql b/python/ql/src/Classes/SuperclassInitCalledMultipleTimes.ql new file mode 100644 index 00000000000..723527e1de8 --- /dev/null +++ b/python/ql/src/Classes/SuperclassInitCalledMultipleTimes.ql @@ -0,0 +1,26 @@ +/** + * @name Multiple calls to __init__ during object initialization + * @description A duplicated call to a super-class __init__ method may lead to objects of this class not being properly initialized. + * @kind problem + * @tags reliability + * correctness + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/multiple-calls-to-init + */ + +import python +import MethodCallOrder + +from ClassObject self, FunctionObject multi +where multi != theObjectType().lookupAttribute("__init__") and +multiple_calls_to_superclass_method(self, multi, "__init__") and +not multiple_calls_to_superclass_method(self.getABaseType(), multi, "__init__") and +not exists(FunctionObject better | + multiple_calls_to_superclass_method(self, better, "__init__") and + better.overrides(multi) +) and +not self.failedInference() +select self, "Class " + self.getName() + " may not be initialized properly as $@ may be called multiple times during initialization.", +multi, multi.descriptiveString() diff --git a/python/ql/src/Classes/SuperclassInitCalledMultipleTimes2.py b/python/ql/src/Classes/SuperclassInitCalledMultipleTimes2.py new file mode 100644 index 00000000000..e12e86a079e --- /dev/null +++ b/python/ql/src/Classes/SuperclassInitCalledMultipleTimes2.py @@ -0,0 +1,38 @@ + +#Calling a method multiple times by using explicit calls when a base uses super() +class Vehicle(object): + + def __init__(self): + super(Vehicle, self).__init__() + self.mobile = True + +class Car(Vehicle): + + def __init__(self): + super(Car, self).__init__() + self.car_init() + + def car_init(self): + pass + +class SportsCar(Car, Vehicle): + + # Vehicle.__init__ will get called twice + def __init__(self): + Vehicle.__init__(self) + Car.__init__(self) + self.sports_car_init() + + def sports_car_init(self): + pass + +#Fix SportsCar by using super() +class FixedSportsCar(Car, Vehicle): + + def __init__(self): + super(SportsCar, self).__init__() + self.sports_car_init() + + def sports_car_init(self): + pass + diff --git a/python/ql/src/Classes/UndefinedClassAttribute.py b/python/ql/src/Classes/UndefinedClassAttribute.py new file mode 100644 index 00000000000..f94753eed4f --- /dev/null +++ b/python/ql/src/Classes/UndefinedClassAttribute.py @@ -0,0 +1,19 @@ +class Spam: + + def __init__(self): + self.spam = 'spam, spam, spam' + + def __str__(self): + return '%s and %s' % (self.spam, self.eggs) # Uninitialized attribute 'eggs' + +#Fixed version + +class Spam: + + def __init__(self): + self.spam = 'spam, spam, spam' + self.eggs = None + + def __str__(self): + return '%s and %s' % (self.spam, self.eggs) # OK + diff --git a/python/ql/src/Classes/UndefinedClassAttribute.qhelp b/python/ql/src/Classes/UndefinedClassAttribute.qhelp new file mode 100644 index 00000000000..46804576d6c --- /dev/null +++ b/python/ql/src/Classes/UndefinedClassAttribute.qhelp @@ -0,0 +1,33 @@ + + + + + +

    A non-existent attribute of self is accessed in a method. +An attribute is treated as non-existent if it is not a class attribute +and it is not set in any method of the class. +This may result in an AttributeError at run time. + +

    + +
    + + +

    Ensure that all attributes are initialized in the __init__ method.

    + + +
    + + + + + + + +
  • Python Standard Library: exception AttributeError.
  • + + +
    +
    diff --git a/python/ql/src/Classes/UndefinedClassAttribute.ql b/python/ql/src/Classes/UndefinedClassAttribute.ql new file mode 100644 index 00000000000..6b5a8c87804 --- /dev/null +++ b/python/ql/src/Classes/UndefinedClassAttribute.ql @@ -0,0 +1,34 @@ +/** + * @name Undefined class attribute + * @description Accessing an attribute of 'self' that is not initialized anywhere in the class in the __init__ method may cause an AttributeError at runtime + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision low + * @id py/undefined-attribute + */ + +import python +import semmle.python.SelfAttribute + +predicate undefined_class_attribute(SelfAttributeRead a, CheckClass c, int line, string name) { + name = a.getName() and + not c.sometimesDefines(name) and + c.interestingUndefined(a) and + line = a.getLocation().getStartLine() and + not attribute_assigned_in_method(c.getAMethodCalledFromInit(), name) +} + +predicate report_undefined_class_attribute(Attribute a, ClassObject c, string name) { + exists(int line | + undefined_class_attribute(a, c, line, name) and + line = min(int x | undefined_class_attribute(_, c, x, name)) + ) +} + +from Attribute a, ClassObject c, string name +where report_undefined_class_attribute(a, c, name) +select a, "Attribute '" + name + "' is not defined in either the class body or in any method" + diff --git a/python/ql/src/Classes/UselessClass.py b/python/ql/src/Classes/UselessClass.py new file mode 100644 index 00000000000..81683917610 --- /dev/null +++ b/python/ql/src/Classes/UselessClass.py @@ -0,0 +1,16 @@ +class GCDFinder(object): + def __init__(self, a, b): + self.a = a + self.b = b + + def calculate(self): + a = self.a + b = self.b + while a != 0 and b != 0: + if a > b: + a = a % b + else: + b = b % a + if a == 0: + return b + return a \ No newline at end of file diff --git a/python/ql/src/Classes/UselessClass.qhelp b/python/ql/src/Classes/UselessClass.qhelp new file mode 100644 index 00000000000..8881aea59ef --- /dev/null +++ b/python/ql/src/Classes/UselessClass.qhelp @@ -0,0 +1,35 @@ + + + + + +

    If a class has only one public method (other than its __init__) +it should be replaced with a function. +

    + +
    + + +

    Convert the single public method into a function. +If there is an __init__ and it sets attributes on the self +then rename the __init__ method and remove the self parameter +Make the public method an inner function and return that.

    + +

    Delete the class.

    + +
    + +

    In this example the class only has a single method. This method does not need to be in its own +class. It should be a method on its own that takes a and b as parameters. +

    + + +
    + + +
  • Python: Classes.
  • + +
    +
    diff --git a/python/ql/src/Classes/UselessClass.ql b/python/ql/src/Classes/UselessClass.ql new file mode 100644 index 00000000000..e04ea103ad6 --- /dev/null +++ b/python/ql/src/Classes/UselessClass.ql @@ -0,0 +1,83 @@ +/** + * @name Useless class + * @description Class only defines one public method (apart from __init__ or __new__) and should be replaced by a function + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity recommendation + * @sub-severity low + * @precision medium + * @id py/useless-class + */ + +import python + +predicate fewer_than_two_public_methods(Class cls, int methods) { + (methods = 0 or methods = 1) and + methods = count(Function f | f = cls.getAMethod() and not f = cls.getInitMethod()) +} + +predicate does_not_define_special_method(Class cls) { + not exists(Function f | f = cls.getAMethod() and f.isSpecialMethod()) +} + + +predicate no_inheritance(Class c) { + not exists(ClassObject cls, ClassObject other | + cls.getPyClass() = c and + other != theObjectType() | + other.getABaseType() = cls or + cls.getABaseType() = other + ) + and + not exists(Expr base | base = c.getABase() | + not base instanceof Name or ((Name)base).getId() != "object" + ) +} + +predicate is_decorated(Class c) { + exists(c.getADecorator()) +} + +predicate is_stateful(Class c) { + exists(Function method, ExprContext ctx | + method.getScope() = c and (ctx instanceof Store or ctx instanceof AugStore) | + exists(Subscript s | s.getScope() = method and s.getCtx() = ctx) + or + exists(Attribute a | a.getScope() = method and a.getCtx() = ctx) + ) + or + exists(Function method, Call call, Attribute a, string name | + method.getScope() = c and call.getScope() = method and + call.getFunc() = a and a.getName() = name | + name = "pop" or name = "remove" or name = "discard" or + name = "extend" or name = "append" + ) + +} + +predicate useless_class(Class c, int methods) { + c.isTopLevel() + and + c.isPublic() + and + no_inheritance(c) + and + fewer_than_two_public_methods(c, methods) + and + does_not_define_special_method(c) + and + not c.isProbableMixin() + and + not is_decorated(c) + and + not is_stateful(c) +} + +from Class c, int methods, string msg +where useless_class(c, methods) and +(methods = 1 and msg = "Class " + c.getName() + " defines only one public method, which should be replaced by a function." + or + methods = 0 and msg = "Class " + c.getName() + " defines no public methods and could be replaced with a namedtuple or dictionary." +) +select c, msg diff --git a/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.py b/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.py new file mode 100644 index 00000000000..89b909f3cc2 --- /dev/null +++ b/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.py @@ -0,0 +1,6 @@ +class Point(object): + def __init__(self, x, y): + self.x = x + self.y = y + +p = Point(x=1, yy=2) # TypeError: 'yy' is not a valid keyword argument diff --git a/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.qhelp b/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.qhelp new file mode 100644 index 00000000000..ff21b557e4b --- /dev/null +++ b/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.qhelp @@ -0,0 +1,35 @@ + + + +

    +Using a named argument whose name does not correspond to a parameter of the __init__ method of the class being instantiated, will result in a +TypeError at runtime. +

    + +
    + + +

    Check for typos in the name of the arguments and fix those. +If the name is clearly different, then this suggests a logical error. +The change required to correct the error will depend on whether the wrong argument has been +specified or whether the wrong class has been specified. +

    + +
    + + + + + + +
  • Python Glossary: Arguments.
  • +
  • Python Glossary: Parameters.
  • +
  • Python Programming FAQ: + What is the difference between arguments and parameters?.
  • +
  • The Python Language Reference: Data model: object.__init__
  • +
  • The Python Tutorial: Classes
  • + +
    +
    diff --git a/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.ql b/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.ql new file mode 100644 index 00000000000..33bf524ae94 --- /dev/null +++ b/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.ql @@ -0,0 +1,27 @@ +/** + * @name Wrong name for an argument in a class instantiation + * @description Using a named argument whose name does not correspond to a + * parameter of the __init__ method of the class being + * instantiated, will result in a TypeError at runtime. + * @kind problem + * @tags reliability + * correctness + * external/cwe/cwe-628 + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/call/wrong-named-class-argument + */ + +import python + +import Expressions.CallArgs + + +from Call call, ClassObject cls, string name, FunctionObject init +where + illegally_named_parameter(call, cls, name) + and init = get_function_or_initializer(cls) +select + call, "Keyword argument '" + name + "' is not a supported parameter name of $@.", init, init.getQualifiedName() + diff --git a/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.py b/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.py new file mode 100644 index 00000000000..8e1350e4b1d --- /dev/null +++ b/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.py @@ -0,0 +1,7 @@ +class Point(object): + def __init__(self, x, y): + self.x = x + self.y = y + +p = Point(1) # TypeError: too few arguments +p = Point(1,2,3) # TypeError: too many arguments diff --git a/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.qhelp b/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.qhelp new file mode 100644 index 00000000000..ca311912a78 --- /dev/null +++ b/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.qhelp @@ -0,0 +1,45 @@ + + + +

    + A call to the __init__ method of a class must supply an argument + for each parameter that does not have a default value defined, so: +

    + +
    + +

    If there are too few arguments then check to see which arguments have been omitted and supply values for those.

    + +

    If there are too many arguments then check to see if any have been added by mistake and remove those.

    + +

    + Also check where a comma has been inserted instead of an operator or a dot. + For example, the code is obj,attr when it should be obj.attr. +

    + +

    If it is not clear which are the missing or surplus arguments, then this suggests a logical error. +The fix will then depend on the nature of the error. +

    + +
    + + + + + + +
  • Python Glossary: Arguments.
  • +
  • Python Glossary: Parameters.
  • +
  • Python Programming FAQ: + What is the difference between arguments and parameters?.
  • +
  • The Python Language Reference: Data model: object.__init__
  • +
  • The Python Tutorial: Classes
  • +
    +
    diff --git a/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.ql b/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.ql new file mode 100644 index 00000000000..915856319e0 --- /dev/null +++ b/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.ql @@ -0,0 +1,25 @@ +/** + * @name Wrong number of arguments in a class instantiation + * @description Using too many or too few arguments in a call to the __init__ + * method of a class will result in a TypeError at runtime. + * @kind problem + * @tags reliability + * correctness + * external/cwe/cwe-685 + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/call/wrong-number-class-arguments + */ + +import python +import Expressions.CallArgs + +from Call call, ClassObject cls, string too, string should, int limit, FunctionObject init +where +( + too_many_args(call, cls, limit) and too = "too many arguments" and should = "no more than " + or + too_few_args(call, cls, limit) and too = "too few arguments" and should = "no fewer than " +) and init = get_function_or_initializer(cls) +select call, "Call to $@ with " + too + "; should be " + should + limit.toString() + ".", init, init.getQualifiedName() diff --git a/python/ql/src/Exceptions/CatchingBaseException.py b/python/ql/src/Exceptions/CatchingBaseException.py new file mode 100644 index 00000000000..b51dc5f4a2e --- /dev/null +++ b/python/ql/src/Exceptions/CatchingBaseException.py @@ -0,0 +1,27 @@ + +def call_main_program_implicit_handle_base_exception(): + try: + #application.main calls sys.exit() when done. + application.main() + except Exception as ex: + log(ex) + except: + pass + +def call_main_program_explicit_handle_base_exception(): + try: + #application.main calls sys.exit() when done. + application.main() + except Exception as ex: + log(ex) + except BaseException: + pass + +def call_main_program_fixed(): + try: + #application.main calls sys.exit() when done. + application.main() + except Exception as ex: + log(ex) + except SystemExit: + pass diff --git a/python/ql/src/Exceptions/CatchingBaseException.qhelp b/python/ql/src/Exceptions/CatchingBaseException.qhelp new file mode 100644 index 00000000000..725f9ce465b --- /dev/null +++ b/python/ql/src/Exceptions/CatchingBaseException.qhelp @@ -0,0 +1,55 @@ + + + +

    +All exception classes in Python derive from BaseException. BaseException has three important subclasses, +Exception from which all errors and normal exceptions derive, KeyboardInterrupt which is raised when the +user interrupts the program from the keyboard and SystemExit which is raised by the sys.exit() function to +terminate the program. +

    + +

    +Since KeyboardInterrupt and SystemExit are special they should not be grouped together with other +Exception classes. +

    + +

    +Catching BaseException, rather than its subclasses may prevent proper handling of +KeyboardInterrupt or SystemExit. It is easy to catch BaseException +accidentally as it is caught implicitly by an empty except: statement. +

    + +
    + + +

    +Handle Exception, KeyboardInterrupt and SystemExit separately. Do not use the plain except: form. +

    + +
    + +

    +In these examples, a function application.main() is called that might raise SystemExit. +In the first two functions, BaseException is caught, but this will discard KeyboardInterrupt. +In the third function, call_main_program_fixed only SystemExit is caught, +leaving KeyboardInterrupt to propagate. +

    + + +

    In these examples KeyboardInterrupt is accidentally ignored.

    + + + +
    + + +
  • Python Language Reference: The try statement, +Exceptions.
  • +
  • M. Lutz, Learning Python, Section 35.3: Exception Design Tips and Gotchas, O'Reilly Media, 2013.
  • +
  • Python Tutorial: Errors and Exceptions.
  • + + +
    +
    diff --git a/python/ql/src/Exceptions/CatchingBaseException.ql b/python/ql/src/Exceptions/CatchingBaseException.ql new file mode 100644 index 00000000000..4d5be501ecf --- /dev/null +++ b/python/ql/src/Exceptions/CatchingBaseException.ql @@ -0,0 +1,30 @@ +/** + * @name Except block handles 'BaseException' + * @description Handling 'BaseException' means that system exits and keyboard interrupts may be mis-handled. + * @kind problem + * @tags reliability + * readability + * convention + * external/cwe/cwe-396 + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/catch-base-exception + */ + +import python + +predicate doesnt_reraise(ExceptStmt ex) { + ex.getAFlowNode().getBasicBlock().reachesExit() +} + +predicate catches_base_exception(ExceptStmt ex) { + ex.getType().refersTo(theBaseExceptionType()) + or + not exists(ex.getType()) +} + +from ExceptStmt ex +where catches_base_exception(ex) and +doesnt_reraise(ex) +select ex, "Except block directly handles BaseException." diff --git a/python/ql/src/Exceptions/EmptyExcept.py b/python/ql/src/Exceptions/EmptyExcept.py new file mode 100644 index 00000000000..c01c00a0593 --- /dev/null +++ b/python/ql/src/Exceptions/EmptyExcept.py @@ -0,0 +1,6 @@ +# ... +try: + security_manager.drop_privileges() +except SecurityError: + pass +# ... \ No newline at end of file diff --git a/python/ql/src/Exceptions/EmptyExcept.qhelp b/python/ql/src/Exceptions/EmptyExcept.qhelp new file mode 100644 index 00000000000..9b7ef09643f --- /dev/null +++ b/python/ql/src/Exceptions/EmptyExcept.qhelp @@ -0,0 +1,27 @@ + + + +

    Ignoring exceptions that should be dealt with in some way is almost always a bad idea. +The loss of information can lead to hard to debug errors and incomplete log files. +It is even possible that ignoring an exception can cause a security vulnerability. +An empty except block may be an indication that the programmer intended to +handle the exception but never wrote the code to do so.

    + +
    + +

    Ensure all exceptions are handled correctly.

    + +
    + +

    In this example the program keeps running with the same privileges if it fails to drop to lower +privileges.

    + + +
    + + + + +
    diff --git a/python/ql/src/Exceptions/EmptyExcept.ql b/python/ql/src/Exceptions/EmptyExcept.ql new file mode 100755 index 00000000000..592aec421bf --- /dev/null +++ b/python/ql/src/Exceptions/EmptyExcept.ql @@ -0,0 +1,106 @@ +/** + * @name Empty except + * @description Except doesn't do anything and has no comment + * @kind problem + * @tags reliability + * maintainability + * external/cwe/cwe-390 + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/empty-except + */ + +import python + +predicate +empty_except(ExceptStmt ex) { + not exists(Stmt s | s = ex.getAStmt() and not s instanceof Pass) +} + +predicate no_else(ExceptStmt ex) { + not exists(ex.getTry().getOrelse()) +} + +predicate no_comment(ExceptStmt ex) { + not exists(Comment c | + c.getLocation().getFile() = ex.getLocation().getFile() and + c.getLocation().getStartLine() >= ex.getLocation().getStartLine() and + c.getLocation().getEndLine() <= ex.getBody().getLastItem().getLocation().getEndLine() + ) +} + +predicate non_local_control_flow(ExceptStmt ex) { + ex.getType().refersTo(theStopIterationType()) +} + +predicate try_has_normal_exit(Try try) { + exists(ControlFlowNode pred, ControlFlowNode succ | + /* Exists a non-exception predecessor, successor pair */ + pred.getASuccessor() = succ and + not pred.getAnExceptionalSuccessor() = succ | + /* Successor is either a normal flow node or a fall-through exit */ + not exists(Scope s | s.getReturnNode() = succ) and + /* Predecessor is in try body and successor is not */ + pred.getNode().getParentNode*() = try.getAStmt() and + not succ.getNode().getParentNode*() = try.getAStmt() + ) +} + +predicate attribute_access(Stmt s) { + s.(ExprStmt).getValue() instanceof Attribute + or + exists(string name | + s.(ExprStmt).getValue().(Call).getFunc().(Name).getId() = name | + name = "getattr" or name = "setattr" or name = "delattr" + ) + or + s.(Delete).getATarget() instanceof Attribute +} + +predicate subscript(Stmt s) { + s.(ExprStmt).getValue() instanceof Subscript + or + s.(Delete).getATarget() instanceof Subscript +} + +predicate encode_decode(Expr ex, ClassObject type) { + exists(string name | + ex.(Call).getFunc().(Attribute).getName() = name | + name = "encode" and type = builtin_object("UnicodeEncodeError") + or + name = "decode" and type = builtin_object("UnicodeDecodeError") + ) +} + +predicate small_handler(ExceptStmt ex, Stmt s, ClassObject type) { + not exists(ex.getTry().getStmt(1)) and + s = ex.getTry().getStmt(0) and + ex.getType().refersTo(type) +} + +/** Holds if this exception handler is sufficiently small in scope to not need a comment + * as to what it is doing. + */ +predicate focussed_handler(ExceptStmt ex) { + exists(Stmt s, ClassObject type | + small_handler(ex, s, type) | + subscript(s) and type.getAnImproperSuperType() = theLookupErrorType() + or + attribute_access(s) and type = theAttributeErrorType() + or + s.(ExprStmt).getValue() instanceof Name and type = theNameErrorType() + or + encode_decode(s.(ExprStmt).getValue(), type) + ) +} + +Try try_return() { + not exists(result.getStmt(1)) and result.getStmt(0) instanceof Return +} + +from ExceptStmt ex +where empty_except(ex) and no_else(ex) and no_comment(ex) and not non_local_control_flow(ex) + and not ex.getTry() = try_return() and try_has_normal_exit(ex.getTry()) and + not focussed_handler(ex) +select ex, "'except' clause does nothing but pass and there is no explanatory comment." diff --git a/python/ql/src/Exceptions/IllegalExceptionHandlerType.py b/python/ql/src/Exceptions/IllegalExceptionHandlerType.py new file mode 100644 index 00000000000..e6b79af0e1d --- /dev/null +++ b/python/ql/src/Exceptions/IllegalExceptionHandlerType.py @@ -0,0 +1,7 @@ +def handle_int(): + try: + raise_int() + #This will not cause an exception, but it will be ignored + except int: + print("This will never be printed") + diff --git a/python/ql/src/Exceptions/IllegalExceptionHandlerType.qhelp b/python/ql/src/Exceptions/IllegalExceptionHandlerType.qhelp new file mode 100644 index 00000000000..8788b8800d0 --- /dev/null +++ b/python/ql/src/Exceptions/IllegalExceptionHandlerType.qhelp @@ -0,0 +1,40 @@ + + + +

    If the class specified in an except handler (within a try statement) is +not a legal exception class, then it will never match a raised exception and never be executed +

    + +

    Legal exception classes are:

    + +

    +However, it recommended that you only use subclasses of the builtin class +Exception (which is itself a subclass of BaseException). +

    + +
    + +

    Ensure that the specified class is the one intended. If it is not then replace it with +the correct one. Otherwise the entire except block can be deleted. +

    + +
    + + + + + + + + +
  • Python Language Reference: Exceptions.
  • +
  • Python Tutorial: Handling Exceptions.
  • + + +
    +
    diff --git a/python/ql/src/Exceptions/IllegalExceptionHandlerType.ql b/python/ql/src/Exceptions/IllegalExceptionHandlerType.ql new file mode 100644 index 00000000000..24a15198f18 --- /dev/null +++ b/python/ql/src/Exceptions/IllegalExceptionHandlerType.ql @@ -0,0 +1,30 @@ +/** + * @name Non-exception in 'except' clause + * @description An exception handler specifying a non-exception type will never handle any exception. + * @kind problem + * @tags reliability + * correctness + * types + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/useless-except + */ + +import python + +from ExceptFlowNode ex, Object t, ClassObject c, ControlFlowNode origin, string what +where ex.handledException(t, c, origin) and +( + exists(ClassObject x | x = t | + not x.isLegalExceptionType() and + not x.failedInference() and + what = "class '" + x.getName() + "'" + ) + or + not t instanceof ClassObject and + what = "instance of '" + c.getName() + "'" +) + +select ex.getNode(), "Non-exception $@ in exception handler which will never match raised exception.", origin, what + diff --git a/python/ql/src/Exceptions/IllegalRaise.py b/python/ql/src/Exceptions/IllegalRaise.py new file mode 100644 index 00000000000..5f50d3db901 --- /dev/null +++ b/python/ql/src/Exceptions/IllegalRaise.py @@ -0,0 +1,5 @@ +#Cannot raise an int, even if we want to +def raise_int(): + #Will raise a TypeError + raise 4 + diff --git a/python/ql/src/Exceptions/IllegalRaise.qhelp b/python/ql/src/Exceptions/IllegalRaise.qhelp new file mode 100644 index 00000000000..da7bcb75afe --- /dev/null +++ b/python/ql/src/Exceptions/IllegalRaise.qhelp @@ -0,0 +1,38 @@ + + + +

    If the object raised is not a legal Exception class or an instance of one, then +a TypeError will be raised instead.

    + +

    Legal exception classes are:

    + +

    +However, it recommended that you only use subclasses of the builtin class +Exception (which is itself a subclass of BaseException). +

    + +
    + +

    Change the expression in the raise statement to be a legal exception.

    + + +
    + + + + + + + + +
  • Python Language Reference: Exceptions.
  • +
  • Python Tutorial: Handling Exceptions.
  • + + +
    +
    diff --git a/python/ql/src/Exceptions/IllegalRaise.ql b/python/ql/src/Exceptions/IllegalRaise.ql new file mode 100644 index 00000000000..673289b9e97 --- /dev/null +++ b/python/ql/src/Exceptions/IllegalRaise.ql @@ -0,0 +1,21 @@ +/** + * @name Illegal raise + * @description Raising a non-exception object or type will result in a TypeError being raised instead. + * @kind problem + * @tags reliability + * correctness + * types + * @problem.severity error + * @sub-severity high + * @precision very-high + * @id py/illegal-raise + */ + +import python +import Raising +import Exceptions.NotImplemented + +from Raise r, ClassObject t +where type_or_typeof(r, t, _) and not t.isLegalExceptionType() and not t.failedInference() and not use_of_not_implemented_in_raise(r, _) +select r, "Illegal class '" + t.getName() + "' raised; will result in a TypeError being raised instead." + diff --git a/python/ql/src/Exceptions/IncorrectExceptOrder.py b/python/ql/src/Exceptions/IncorrectExceptOrder.py new file mode 100644 index 00000000000..15ad395b905 --- /dev/null +++ b/python/ql/src/Exceptions/IncorrectExceptOrder.py @@ -0,0 +1,10 @@ + + +def incorrect_except_order(val): + try: + val.attr + except Exception: + print ("Exception") + except AttributeError: + print ("AttributeError") + diff --git a/python/ql/src/Exceptions/IncorrectExceptOrder.qhelp b/python/ql/src/Exceptions/IncorrectExceptOrder.qhelp new file mode 100644 index 00000000000..d2854af6ca6 --- /dev/null +++ b/python/ql/src/Exceptions/IncorrectExceptOrder.qhelp @@ -0,0 +1,45 @@ + + + +

    When handling an exception, Python searches the except blocks in source code order +until it finds a matching except block for the exception. +An except block, except E:, specifies a class E and will match any +exception that is an instance of E. +

    +

    +If a more general except block precedes a more specific except block, +then the more general block is always executed and the more specific block is never executed. +An except block, except A:, is more general than another except block, except B:, +if A is a super class of B. +

    +

    +For example: +except Exception: is more general than except Error: as Exception +is a super class of Error. +

    + +
    + + +

    Reorganize the except blocks so that the more specific except +is defined first. Alternatively, if the more specific except block is +no longer required then it should be deleted.

    + +
    + +

    In this example the except Exception: will handle AttributeError preventing the +subsequent handler from ever executing.

    + + + +
    + + +
  • Python Language Reference: The try statement, +Exceptions.
  • + + +
    +
    diff --git a/python/ql/src/Exceptions/IncorrectExceptOrder.ql b/python/ql/src/Exceptions/IncorrectExceptOrder.ql new file mode 100644 index 00000000000..566f3c68175 --- /dev/null +++ b/python/ql/src/Exceptions/IncorrectExceptOrder.ql @@ -0,0 +1,34 @@ +/** + * @name Unreachable 'except' block + * @description Handling general exceptions before specific exceptions means that the specific + * handlers are never executed. + * @kind problem + * @tags reliability + * maintainability + * external/cwe/cwe-561 + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/unreachable-except + */ + +import python + +predicate incorrect_except_order(ExceptStmt ex1, ClassObject cls1, ExceptStmt ex2, ClassObject cls2) { + exists(int i, int j, Try t | + ex1 = t.getHandler(i) and + ex2 = t.getHandler(j) and i < j and + cls1 = except_class(ex1) and + cls2 = except_class(ex2) and + cls1 = cls2.getASuperType() + ) +} + +ClassObject except_class(ExceptStmt ex) { + ex.getType().refersTo(result) +} + +from ExceptStmt ex1, ClassObject cls1, ExceptStmt ex2, ClassObject cls2 +where incorrect_except_order(ex1, cls1, ex2, cls2) +select ex2, "Except block for $@ is unreachable; the more general $@ for $@ will always be executed in preference.", + cls2, cls2.getName(), ex1, "except block", cls1, cls1.getName() diff --git a/python/ql/src/Exceptions/NotImplemented.py b/python/ql/src/Exceptions/NotImplemented.py new file mode 100644 index 00000000000..53b3189f099 --- /dev/null +++ b/python/ql/src/Exceptions/NotImplemented.py @@ -0,0 +1,9 @@ + +class Abstract(object): + + def wrong(self): + # Will raise a TypeError + raise NotImplemented() + + def right(self): + raise NotImplementedError() diff --git a/python/ql/src/Exceptions/NotImplemented.qll b/python/ql/src/Exceptions/NotImplemented.qll new file mode 100644 index 00000000000..016204a7cd1 --- /dev/null +++ b/python/ql/src/Exceptions/NotImplemented.qll @@ -0,0 +1,11 @@ + +import python + +/** Holds if `notimpl` refers to `NotImplemented` or `NotImplemented()` in the `raise` statement */ +predicate use_of_not_implemented_in_raise(Raise raise, Expr notimpl) { + notimpl.refersTo(theNotImplementedObject()) and + ( + notimpl = raise.getException() or + notimpl = raise.getException().(Call).getFunc() + ) +} diff --git a/python/ql/src/Exceptions/NotImplementedIsNotAnException.qhelp b/python/ql/src/Exceptions/NotImplementedIsNotAnException.qhelp new file mode 100644 index 00000000000..3bf09bbfab0 --- /dev/null +++ b/python/ql/src/Exceptions/NotImplementedIsNotAnException.qhelp @@ -0,0 +1,41 @@ + + + + +

    NotImplemented is not an Exception, but is often mistakenly used in place of NotImplementedError. +Executing raise NotImplemented or raise NotImplemented() will raise a TypeError. +When raise NotImplemented is used to mark code that is genuinely never called, this mistake is benign. + +However, should it be called, then a TypeError will be raised rather than the expected NotImplemented, +which might make debugging the issue difficult. +

    + +

    The correct use of NotImplemented is to implement binary operators. +Code that is not intended to be called should raise NotImplementedError.

    + +
    + +

    Replace uses of NotImplemented with NotImplementedError.

    +
    + + +

    +In the example below, the method wrong will incorrectly raise a TypeError when called. +The method right will raise a NotImplementedError. +

    + + + + +
    + + + +
  • Python Language Reference: The NotImplementedError exception.
  • +
  • Python Language Reference: Emulating numeric types.
  • + +
    + +
    diff --git a/python/ql/src/Exceptions/NotImplementedIsNotAnException.ql b/python/ql/src/Exceptions/NotImplementedIsNotAnException.ql new file mode 100644 index 00000000000..89f1bb04568 --- /dev/null +++ b/python/ql/src/Exceptions/NotImplementedIsNotAnException.ql @@ -0,0 +1,19 @@ +/** + * @name NotImplemented is not an Exception + * @description Using 'NotImplemented' as an exception will result in a type error. + * @kind problem + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/raise-not-implemented + * @tags reliability + * maintainability + */ + +import python +import Exceptions.NotImplemented + +from Expr notimpl +where use_of_not_implemented_in_raise(_, notimpl) + +select notimpl, "NotImplemented is not an Exception. Did you mean NotImplementedError?" diff --git a/python/ql/src/Exceptions/Raising.qll b/python/ql/src/Exceptions/Raising.qll new file mode 100644 index 00000000000..b820bfa24e9 --- /dev/null +++ b/python/ql/src/Exceptions/Raising.qll @@ -0,0 +1,14 @@ +import python + +/** Whether the raise statement 'r' raises 'type' from origin 'orig' */ +predicate type_or_typeof(Raise r, ClassObject type, AstNode orig) { + exists(Expr exception | + exception = r.getRaised() | + exception.refersTo(type, _, orig) + or + not exists(ClassObject exc_type | exception.refersTo(exc_type)) and + not type = theTypeType() and // First value is an unknown exception type + exception.refersTo(_, type, orig) + ) + +} diff --git a/python/ql/src/Exceptions/RaisingTuple.py b/python/ql/src/Exceptions/RaisingTuple.py new file mode 100644 index 00000000000..50b7e81f82a --- /dev/null +++ b/python/ql/src/Exceptions/RaisingTuple.py @@ -0,0 +1,5 @@ + + +def raise_tuple(): + ex = Exception, "Important diagnostic information" + raise ex diff --git a/python/ql/src/Exceptions/RaisingTuple.qhelp b/python/ql/src/Exceptions/RaisingTuple.qhelp new file mode 100644 index 00000000000..4f47d3ee302 --- /dev/null +++ b/python/ql/src/Exceptions/RaisingTuple.qhelp @@ -0,0 +1,47 @@ + + + +

    In Python 2, if a tuple is raised then all elements but the first are ignored and only the first part is raised. +If the first element is itself a tuple, then the first element of that is used and so on. +This unlikely to be the intended effect and will most likely indicate some sort of error.

    + +

    It is important to note that the exception in raise Exception, message is not a tuple, whereas the exception +in ex = Exception, message; raise ex is a tuple.

    + +

    +In Python 3, raising a tuple is an error. +

    + + +
    + + +

    Given that all but the first element of the tuple is ignored, +the tuple should be replaced with its first element in order to +improve the clarity of the code. If the subsequent parts of the tuple +were intended to form the message, then they should be passed as an argument +when creating the exception. +

    + + + +
    + + +

    In the following example the intended error message is mistakenly used to form a tuple.

    + +

    This can be fixed, either by using the message to create the exception or using the message in the raise +statement, as shown below.

    + + +
    + + +
  • Python Language Reference: Exceptions.
  • +
  • Python Tutorial: Handling Exceptions.
  • + + +
    +
    diff --git a/python/ql/src/Exceptions/RaisingTuple.ql b/python/ql/src/Exceptions/RaisingTuple.ql new file mode 100644 index 00000000000..8bf5c7705b5 --- /dev/null +++ b/python/ql/src/Exceptions/RaisingTuple.ql @@ -0,0 +1,18 @@ +/** + * @name Raising a tuple + * @description Raising a tuple will result in all but the first element being discarded + * @kind problem + * @tags maintainability + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/raises-tuple + */ + +import python + +from Raise r, AstNode origin +where r.getException().refersTo(_, theTupleType(), origin) and +major_version() = 2 /* Raising a tuple is a type error in Python 3, so is handled by the IllegalRaise query. */ + +select r, "Raising $@ will result in the first element (recursively) being raised and all other elements being discarded.", origin, "a tuple" \ No newline at end of file diff --git a/python/ql/src/Exceptions/RaisingTuple2.py b/python/ql/src/Exceptions/RaisingTuple2.py new file mode 100644 index 00000000000..0d8b85657ff --- /dev/null +++ b/python/ql/src/Exceptions/RaisingTuple2.py @@ -0,0 +1,9 @@ + + +def fixed_raise_tuple1(): + ex = Exception("Important diagnostic information") + raise ex + + +def fixed_raise_tuple2(): + raise Exception, "Important diagnostic information" diff --git a/python/ql/src/Exceptions/UnguardedNextInGenerator.qhelp b/python/ql/src/Exceptions/UnguardedNextInGenerator.qhelp new file mode 100644 index 00000000000..94474906bbc --- /dev/null +++ b/python/ql/src/Exceptions/UnguardedNextInGenerator.qhelp @@ -0,0 +1,51 @@ + + + +

    +The function next() will raise a StopIteration exception +if the underlying iterator is exhausted. +Normally this is fine, but in a generator may cause problems. +Since the StopIteration is an exception it will be propagated out of the generator +causing termination of the generator. This is unlikely to be the expected behavior and may mask +errors. +

    + +

    +This problem is considered sufficiently serious that PEP 479 +has been accepted to modify the handling of StopIteration in generators. Consequently, code that does not handle +StopIteration properly is likely to fail in future versions of Python. +

    + +
    + +

    +Each call to next() should be wrapped in a try-except to explicitly +handle StopIteration exceptions. +

    + +
    + +

    +In the following example, an empty file part way through iteration will silently truncate the output as +the StopIteration exception propagates to the top level. +

    + + + +

    +In the following example StopIteration exception is explicitly handled, +allowing all the files to be processed. +

    + + + +
    + + +
  • Python PEP index: PEP 479.
  • + + +
    +
    diff --git a/python/ql/src/Exceptions/UnguardedNextInGenerator.ql b/python/ql/src/Exceptions/UnguardedNextInGenerator.ql new file mode 100755 index 00000000000..ca3683f0560 --- /dev/null +++ b/python/ql/src/Exceptions/UnguardedNextInGenerator.ql @@ -0,0 +1,61 @@ +/** + * @name Unguarded next in generator + * @description Calling next() in a generator may cause unintended early termination of an iteration. + * @kind problem + * @tags maintainability + * portability + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/unguarded-next-in-generator + */ + +import python + +FunctionObject iter() { + result = builtin_object("iter") +} + +FunctionObject next() { + result = builtin_object("next") +} + +predicate call_to_iter(CallNode call, EssaVariable sequence) { + sequence.getAUse() = iter().getArgumentForCall(call, 0) +} + +predicate call_to_next(CallNode call, ControlFlowNode iter) { + iter = next().getArgumentForCall(call, 0) +} + +predicate guarded_not_empty_sequence(EssaVariable sequence) { + sequence.getDefinition() instanceof EssaEdgeRefinement +} + +/** The pattern `next(iter(x))` is often used where `x` is known not be empty. Check for that. */ +predicate iter_not_exhausted(EssaVariable iterator) { + exists(EssaVariable sequence | + call_to_iter(iterator.getDefinition().(AssignmentDefinition).getValue(), sequence) and + guarded_not_empty_sequence(sequence) + ) +} + +predicate stop_iteration_handled(CallNode call) { + exists(Try t | + t.containsInScope(call.getNode()) and + t.getAHandler().getType().refersTo(theStopIterationType()) + ) +} + +from CallNode call +where call_to_next(call, _) and +not exists(EssaVariable iterator | + call_to_next(call, iterator.getAUse()) and + iter_not_exhausted(iterator) +) and +call.getNode().getScope().(Function).isGenerator() and +not exists(Comp comp | comp.contains(call.getNode())) and +not stop_iteration_handled(call) + +select call, "Call to next() in a generator" + diff --git a/python/ql/src/Exceptions/UnguardedNextInGeneratorBad.py b/python/ql/src/Exceptions/UnguardedNextInGeneratorBad.py new file mode 100644 index 00000000000..550ae35e71a --- /dev/null +++ b/python/ql/src/Exceptions/UnguardedNextInGeneratorBad.py @@ -0,0 +1,19 @@ + +test_files = [ + ["header1", "text10", "text11", "text12"], + ["header2", "text20", "text21", "text22"], + [], + ["header4", "text40", "text41", "text42"], +] + +def separate_headers(files): + for file in files: + lines = iter(file) + header = next(lines) # Will raise StopIteration if lines is exhausted + body = [ l for l in lines ] + yield header, body + +def process_files(files): + for header, body in separate_headers(files): + print(format_page(header, body)) + diff --git a/python/ql/src/Exceptions/UnguardedNextInGeneratorGood.py b/python/ql/src/Exceptions/UnguardedNextInGeneratorGood.py new file mode 100644 index 00000000000..23121888558 --- /dev/null +++ b/python/ql/src/Exceptions/UnguardedNextInGeneratorGood.py @@ -0,0 +1,11 @@ + +def separate_headers(files): + for file in files: + lines = iter(file) + try: + header = next(lines) # Will raise StopIteration if lines is exhausted + except StopIteration: + #Empty file -- Just ignore + continue + body = [ l for l in lines ] + yield header, body diff --git a/python/ql/src/Expressions/CallArgs.qll b/python/ql/src/Expressions/CallArgs.qll new file mode 100644 index 00000000000..fc61f38a826 --- /dev/null +++ b/python/ql/src/Expressions/CallArgs.qll @@ -0,0 +1,130 @@ +import python + +import Testing.Mox + +private int varargs_length(Call call) { + not exists(call.getStarargs()) and result = 0 + or + exists(TupleObject t | + call.getStarargs().refersTo(t) | + result = t.getLength() + ) + or + result = count(call.getStarargs().(List).getAnElt()) +} + +/** Gets a keyword argument that is not a keyword-only parameter. */ +private Keyword not_keyword_only_arg(Call call, FunctionObject func) { + func.getACall().getNode() = call and + result = call.getAKeyword() and + not func.getFunction().getAKeywordOnlyArg().getId() = result.getArg() +} + +/** Gets the count of arguments that are passed as positional parameters even if they + * are named in the call. + * This is the sum of the number of positional arguments, the number of elements in any explicit tuple passed as *arg + * plus the number of keyword arguments that do not match keyword-only arguments (if the function does not take **kwargs). + */ + +private int positional_arg_count_for_call(Call call, Object callable) { + call = get_a_call(callable).getNode() and + exists(int positional_keywords | + exists(FunctionObject func | func = get_function_or_initializer(callable) | + not func.getFunction().hasKwArg() and + positional_keywords = count(not_keyword_only_arg(call, func)) + or + func.getFunction().hasKwArg() and positional_keywords = 0 + ) + | + result = count(call.getAnArg()) + varargs_length(call) + positional_keywords + ) +} + +int arg_count(Call call) { + result = count(call.getAnArg()) + varargs_length(call) + count(call.getAKeyword()) +} + +/* Gets a call corresponding to the given class or function*/ +private ControlFlowNode get_a_call(Object callable) { + result = callable.(ClassObject).getACall() + or + result = callable.(FunctionObject).getACall() +} + +/* Gets the function object corresponding to the given class or function*/ +FunctionObject get_function_or_initializer(Object func_or_cls) { + result = func_or_cls.(FunctionObject) + or + result = func_or_cls.(ClassObject).declaredAttribute("__init__") +} + + +/**Whether there is an illegally named parameter called `name` in the `call` to `func` */ +predicate illegally_named_parameter(Call call, Object func, string name) { + not func.isC() and + name = call.getANamedArgumentName() and + call.getAFlowNode() = get_a_call(func) and + not get_function_or_initializer(func).isLegalArgumentName(name) +} + +/**Whether there are too few arguments in the `call` to `callable` where `limit` is the lowest number of legal arguments */ +predicate too_few_args(Call call, Object callable, int limit) { + // Exclude cases where an incorrect name is used as that is covered by 'Wrong name for an argument in a call' + not illegally_named_parameter(call, callable, _) and + not exists(call.getStarargs()) and not exists(call.getKwargs()) and + arg_count(call) < limit and + exists(FunctionObject func | func = get_function_or_initializer(callable) | + call = func.getAFunctionCall().getNode() and limit = func.minParameters() and + /* The combination of misuse of `mox.Mox().StubOutWithMock()` + * and a bug in mox's implementation of methods results in having to + * pass 1 too few arguments to the mocked function. + */ + not (useOfMoxInModule(call.getEnclosingModule()) and func.isNormalMethod()) + or + call = func.getAMethodCall().getNode() and limit = func.minParameters() - 1 + or + callable instanceof ClassObject and + call.getAFlowNode() = get_a_call(callable) and limit = func.minParameters() - 1 + ) +} + +/**Whether there are too many arguments in the `call` to `func` where `limit` is the highest number of legal arguments */ +predicate too_many_args(Call call, Object callable, int limit) { + // Exclude cases where an incorrect name is used as that is covered by 'Wrong name for an argument in a call' + not illegally_named_parameter(call, callable, _) and + exists(FunctionObject func | + func = get_function_or_initializer(callable) and + not func.getFunction().hasVarArg() and limit >= 0 + | + call = func.getAFunctionCall().getNode() and limit = func.maxParameters() + or + call = func.getAMethodCall().getNode() and limit = func.maxParameters() - 1 + or + callable instanceof ClassObject and + call.getAFlowNode() = get_a_call(callable) and limit = func.maxParameters() - 1 + ) and + positional_arg_count_for_call(call, callable) > limit +} + +/** Holds if `call` has too many or too few arguments for `func` */ +predicate wrong_args(Call call, FunctionObject func, int limit, string too) { + too_few_args(call, func, limit) and too = "too few" + or + too_many_args(call, func, limit) and too = "too many" +} + +/** Holds if `call` has correct number of arguments for `func`. + * Implies nothing about whether `call` could call `func`. + */ + bindingset[call, func] +predicate correct_args_if_called_as_method(Call call, FunctionObject func) { + arg_count(call)+1 >= func.minParameters() + and + arg_count(call) < func.maxParameters() +} + +/** Holds if `call` is a call to `overriding`, which overrides `func`. */ +predicate overridden_call(FunctionObject func, FunctionObject overriding, Call call) { + overriding.overrides(func) and + overriding.getACall().getNode() = call +} diff --git a/python/ql/src/Expressions/CallToSuperWrongClass.py b/python/ql/src/Expressions/CallToSuperWrongClass.py new file mode 100644 index 00000000000..dee56fb7aec --- /dev/null +++ b/python/ql/src/Expressions/CallToSuperWrongClass.py @@ -0,0 +1,34 @@ + + +class Vehicle(object): + pass + +class Car(Vehicle): + + def __init__(self): + #This is OK provided that Car is not subclassed. + super(Vehicle, self).__init__() + self.car_init() + +class StatusSymbol(object): + + def __init__(self): + super(StatusSymbol, self).__init__() + self.show_off() + +class SportsCar(Car, StatusSymbol): + + def __init__(self): + #This will not call StatusSymbol.__init__() + super(SportsCar, self).__init__() + self.sports_car_init() + + +#Fix Car by passing Car to super(). +#SportsCar does not need to be changed. +class Car(Car, Vehicle): + + def __init__(self): + super(Car, self).__init__() + self.car_init() + diff --git a/python/ql/src/Expressions/CallToSuperWrongClass.qhelp b/python/ql/src/Expressions/CallToSuperWrongClass.qhelp new file mode 100644 index 00000000000..dc88b1bea88 --- /dev/null +++ b/python/ql/src/Expressions/CallToSuperWrongClass.qhelp @@ -0,0 +1,45 @@ + + + +

    +The super class should be called with the enclosing class as its first argument and self as its second argument. +

    +

    +Passing a different class may work correctly, provided the class passed is a super class of the enclosing class and the enclosing class +does not define an __init__ method. +However, this may result in incorrect object initialization if the enclosing class is later subclassed using multiple inheritance. +

    + + +
    + + +

    + Ensure that the first argument to super() is the enclosing class. +

    + + +
    + +

    +In this example the call to super(Vehicle, self) in Car.__init__ is incorrect as it +passes Vehicle rather than Car as the first argument to super. +As a result, super(SportsCar, self).__init__() in the SportsCar.__init__ method will not call +all __init__() methods because the call to super(Vehicle, self).__init__() +skips StatusSymbol.__init__(). +

    + + + + +
    + + +
  • Python Standard Library: super.
  • +
  • Artima Developer: Things to Know About Python Super.
  • + + +
    +
    diff --git a/python/ql/src/Expressions/CallToSuperWrongClass.ql b/python/ql/src/Expressions/CallToSuperWrongClass.ql new file mode 100644 index 00000000000..a42cbcefe4b --- /dev/null +++ b/python/ql/src/Expressions/CallToSuperWrongClass.ql @@ -0,0 +1,29 @@ +/** + * @name First argument to super() is not enclosing class + * @description Calling super with something other than the enclosing class may cause incorrect object initialization. + * @kind problem + * @tags reliability + * maintainability + * convention + * external/cwe/cwe-687 + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/super-not-enclosing-class + */ + +import python + +from CallNode call_to_super, string name +where +exists(GlobalVariable gv, ControlFlowNode cn | + call_to_super = theSuperType().getACall() and + gv.getId() = "super" and + cn = call_to_super.getArg(0) and + name = call_to_super.getScope().getScope().(Class).getName() and + exists(ClassObject other | + cn.refersTo(other) and + not other.getPyClass().getName() = name + ) +) +select call_to_super.getNode(), "First argument to super() should be " + name + "." diff --git a/python/ql/src/Expressions/CompareConstants.py b/python/ql/src/Expressions/CompareConstants.py new file mode 100644 index 00000000000..7345433edec --- /dev/null +++ b/python/ql/src/Expressions/CompareConstants.py @@ -0,0 +1,6 @@ + +#Interoperate with very old versions of Python (pre 2.3) +try: + True +except NameError: + __builtins__.True = 1==1 diff --git a/python/ql/src/Expressions/CompareConstants.qhelp b/python/ql/src/Expressions/CompareConstants.qhelp new file mode 100644 index 00000000000..3dcbd1cc4ab --- /dev/null +++ b/python/ql/src/Expressions/CompareConstants.qhelp @@ -0,0 +1,35 @@ + + + + + +

    When two constants are compared it is typically an +indication of a mistake, since the Boolean value of the comparison +will always be the same. In very old code this may be used to initialize +True and False.

    + +
    + + +

    It is never good practice to compare a value with itself. If the constant +behavior is indeed required, use the Boolean literals True or +False, rather than encoding them obscurely as 1 == 1 +or similar. If there is a mistake, ascertain the desired behavior and correct it. +

    + +
    + + +

    In this example, old code uses 1==1 to initialize __builtins__.True. +This code has been unnecessary on all versions of Python released since 2003 and can be deleted. +

    + +
    + + +
  • Python Language Reference: Comparisons.
  • + +
    +
    diff --git a/python/ql/src/Expressions/CompareConstants.ql b/python/ql/src/Expressions/CompareConstants.ql new file mode 100644 index 00000000000..2a66a952c5e --- /dev/null +++ b/python/ql/src/Expressions/CompareConstants.ql @@ -0,0 +1,21 @@ +/** + * @name Comparison of constants + * @description Comparison of constants is always constant, but is harder to read than a simple constant. + * @kind problem + * @tags maintainability + * useless-code + * external/cwe/cwe-570 + * external/cwe/cwe-571 + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/comparison-of-constants + */ + +import python + +from Compare comparison, Expr left, Expr right +where + comparison.compares(left, _, right) and left.isConstant() and right.isConstant() and + not exists(Assert a | a.getTest() = comparison) +select comparison, "Comparison of constants; use 'True' or 'False' instead." diff --git a/python/ql/src/Expressions/CompareIdenticalValues.py b/python/ql/src/Expressions/CompareIdenticalValues.py new file mode 100644 index 00000000000..8d115c99c10 --- /dev/null +++ b/python/ql/src/Expressions/CompareIdenticalValues.py @@ -0,0 +1,8 @@ + +#Using 'x == x' to check that 'x' is not a float('nan'). +def is_normal(f): + return not cmath.isinf(f) and f == f + +#Improved version; intention is explicit. +def is_normal(f): + return not cmath.isinf(f) and not cmath.isnan(f) \ No newline at end of file diff --git a/python/ql/src/Expressions/CompareIdenticalValues.qhelp b/python/ql/src/Expressions/CompareIdenticalValues.qhelp new file mode 100644 index 00000000000..5c8c5371ed3 --- /dev/null +++ b/python/ql/src/Expressions/CompareIdenticalValues.qhelp @@ -0,0 +1,38 @@ + + + + + +

    When two identical expressions are compared it is typically an +indication of a mistake, since the Boolean value of the comparison +will always be the same, unless the value is the floating point value float('nan'). +

    + +
    + + +

    It is not good practice to compare a value with itself, as it makes the code hard to read +and can hide errors with classes that do not correctly implement equality. +If testing whether a floating-point value is not-a-number, then use math.isnan(). +If the value may be a complex number, then use cmath.isnan() instead. +

    + +
    + + +

    In this example f == f is used to check for float('nan'). +This makes the code difficult to understand as the reader may not be immediately familiar with this pattern. +

    + +
    + + +
  • Python Language Reference: Comparisons.
  • +
  • Python Library Reference: math.isnan().
  • +
  • Python Library Reference: cmath.isnan().
  • + + +
    +
    diff --git a/python/ql/src/Expressions/CompareIdenticalValues.ql b/python/ql/src/Expressions/CompareIdenticalValues.ql new file mode 100644 index 00000000000..c950d3ebb2e --- /dev/null +++ b/python/ql/src/Expressions/CompareIdenticalValues.ql @@ -0,0 +1,22 @@ +/** + * @name Comparison of identical values + * @description Comparison of identical values, the intent of which is unclear. + * @kind problem + * @tags reliability + * correctness + * readability + * convention + * external/cwe/cwe-570 + * external/cwe/cwe-571 + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/comparison-of-identical-expressions + */ + +import python +import Expressions.RedundantComparison + +from RedundantComparison comparison +where not comparison.isConstant() and not comparison.maybeMissingSelf() +select comparison, "Comparison of identical values; use cmath.isnan() if testing for not-a-number." diff --git a/python/ql/src/Expressions/CompareIdenticalValuesMissingSelf.py b/python/ql/src/Expressions/CompareIdenticalValuesMissingSelf.py new file mode 100644 index 00000000000..6b4da941e0e --- /dev/null +++ b/python/ql/src/Expressions/CompareIdenticalValuesMissingSelf.py @@ -0,0 +1,20 @@ + +class Customer: + + def __init__(self, data): + self.data = data + + def check_data(self, data): + if data != data: # Forgotten 'self' + raise Exception("Invalid data!") + +#Fixed version + +class Customer: + + def __init__(self, data): + self.data = data + + def check_data(self, data): + if self.data != data: + raise Exception("Invalid data!") diff --git a/python/ql/src/Expressions/CompareIdenticalValuesMissingSelf.qhelp b/python/ql/src/Expressions/CompareIdenticalValuesMissingSelf.qhelp new file mode 100644 index 00000000000..36ae280a3b8 --- /dev/null +++ b/python/ql/src/Expressions/CompareIdenticalValuesMissingSelf.qhelp @@ -0,0 +1,32 @@ + + + + + +

    When two identical expressions are compared it is typically an +indication of a mistake, since the Boolean value of the comparison +will always be the same. Often, it can indicate that self has +been omitted.

    + +
    + + +

    It is never good practice to compare a value with itself. +If self has been omitted, then insert it. If the constant +behavior is indeed required, use the Boolean literals True or +False, rather than encoding them obscurely as x == x +or similar.

    + +
    + + + + + + +
  • Python Language Reference: Comparisons.
  • + +
    +
    diff --git a/python/ql/src/Expressions/CompareIdenticalValuesMissingSelf.ql b/python/ql/src/Expressions/CompareIdenticalValuesMissingSelf.ql new file mode 100644 index 00000000000..9d618c2dbb1 --- /dev/null +++ b/python/ql/src/Expressions/CompareIdenticalValuesMissingSelf.ql @@ -0,0 +1,21 @@ +/** + * @name Maybe missing 'self' in comparison + * @description Comparison of identical values, the intent of which is unclear. + * @kind problem + * @tags reliability + * maintainability + * external/cwe/cwe-570 + * external/cwe/cwe-571 + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/comparison-missing-self + */ + +import python +import Expressions.RedundantComparison + +from RedundantComparison comparison +where + comparison.maybeMissingSelf() +select comparison, "Comparison of identical values; may be missing 'self'." diff --git a/python/ql/src/Expressions/Comparisons/UselessComparisonTest.py b/python/ql/src/Expressions/Comparisons/UselessComparisonTest.py new file mode 100644 index 00000000000..da0f09aca45 --- /dev/null +++ b/python/ql/src/Expressions/Comparisons/UselessComparisonTest.py @@ -0,0 +1,15 @@ + class KeySorter: + + def __init__(self, obj): + self.obj = obj + + def __lt__(self, other): + return self._compare(self.obj, other.obj) < 0 + + def _compare(self, obj1, obj2): + if obj1 < obj2: + return -1 + elif obj1 < obj2: + return 1 + else: + return 0 diff --git a/python/ql/src/Expressions/Comparisons/UselessComparisonTest.qhelp b/python/ql/src/Expressions/Comparisons/UselessComparisonTest.qhelp new file mode 100644 index 00000000000..7d1b1bd7279 --- /dev/null +++ b/python/ql/src/Expressions/Comparisons/UselessComparisonTest.qhelp @@ -0,0 +1,35 @@ + + + + + + +

    The result of certain comparisons can sometimes be inferred from their context and the results of other +comparisons. This can be an indication of faulty logic and may result in dead +code or infinite loops if, for example, a loop condition never changes its value. +

    + +
    + +

    Inspect the code to check whether the logic is correct, and consider +simplifying the logical expression. +

    + +
    + +

    In the following (real world) example the test obj1 < obj2 is repeated and thus the +second test will always be false, and the function _compare will only ever return 0 or -1. +

    + + + +
    + + + +
  • Python Language Reference: Comparisons.
  • + +
    +
    diff --git a/python/ql/src/Expressions/Comparisons/UselessComparisonTest.ql b/python/ql/src/Expressions/Comparisons/UselessComparisonTest.ql new file mode 100644 index 00000000000..35277300e25 --- /dev/null +++ b/python/ql/src/Expressions/Comparisons/UselessComparisonTest.ql @@ -0,0 +1,42 @@ +/** + * @name Redundant comparison + * @description The result of a comparison is implied by a previous comparison. + * @kind problem + * @tags useless-code + * external/cwe/cwe-561 + * external/cwe/cwe-570 + * external/cwe/cwe-571 + * @problem.severity warning + * @sub-severity high + * @precision high + * @id py/redundant-comparison + */ + +import python +import semmle.python.Comparisons + + +/** A test is useless if for every block that it controls there is another test that is at least as + * strict and also controls that block. + */ +private predicate useless_test(Comparison comp, ComparisonControlBlock controls, boolean isTrue) { + controls.impliesThat(comp.getBasicBlock(), comp, isTrue) and + /* Exclude complex comparisons of form `a < x < y`, as we do not (yet) have perfect flow control for those */ + not exists(controls.getTest().getNode().(Compare).getOp(1)) +} + +private predicate useless_test_ast(AstNode comp, AstNode previous, boolean isTrue) { + forex(Comparison compnode, ConditionBlock block| + compnode.getNode() = comp and + block.getLastNode().getNode() = previous + | + useless_test(compnode, block, isTrue) + ) +} + +from Expr test, Expr other, boolean isTrue +where +useless_test_ast(test, other, isTrue) and not useless_test_ast(test.getAChildNode+(), other, _) + + +select test, "Test is always " + isTrue + ", because of $@", other, "this condition" diff --git a/python/ql/src/Expressions/ContainsNonContainer.py b/python/ql/src/Expressions/ContainsNonContainer.py new file mode 100644 index 00000000000..69e556039c9 --- /dev/null +++ b/python/ql/src/Expressions/ContainsNonContainer.py @@ -0,0 +1,9 @@ +class NotAContainer(object): + + def __init__(self, *items): + self.items = items + +def main(): + cont = NotAContainer(1, 2, 3) + if 2 in cont: + print("2 in container") diff --git a/python/ql/src/Expressions/ContainsNonContainer.qhelp b/python/ql/src/Expressions/ContainsNonContainer.qhelp new file mode 100644 index 00000000000..6a2709390a3 --- /dev/null +++ b/python/ql/src/Expressions/ContainsNonContainer.qhelp @@ -0,0 +1,42 @@ + + + +

    A membership test, that is a binary expression with +in or not in as the operator, expects that the +expression to the right of the operator will be a container.

    +

    As well as standard containers such as list, tuple, +dict or set, +a container can be an instance of any class that has the __contains__, +__iter__ or __getitem__ method. + +

    + +

    +Ensure that the right hand side of the expression is a container, or add a guard +clause for other cases. +For example, if the right side may be a container or None then change +if x in seq: to if seq is not None and x in seq: +

    + +
    + +

    In this example the NotAContainer class has no __contains__, +__iter__ or __getitem__ method. +Consequently, when the line if 2 in cont: is executed a TypeError +will be raised. Adding a __getitem__ method to the +NotAContainer class would solve the problem. +

    + + + +
    + + +
  • Python: Membership test details.
  • +
  • Python: The __contains__ method.
  • + + +
    +
    diff --git a/python/ql/src/Expressions/ContainsNonContainer.ql b/python/ql/src/Expressions/ContainsNonContainer.ql new file mode 100644 index 00000000000..653574d62b1 --- /dev/null +++ b/python/ql/src/Expressions/ContainsNonContainer.ql @@ -0,0 +1,30 @@ +/** + * @name Membership test with a non-container + * @description A membership test, such as 'item in sequence', with a non-container on the right hand side will raise a 'TypeError'. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity high + * @precision high + * @id py/member-test-non-container + */ + +import python + +predicate rhs_in_expr(ControlFlowNode rhs, Compare cmp) { + exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs.getNode() | + op instanceof In or op instanceof NotIn + ) +} + +from ControlFlowNode non_seq, Compare cmp, ClassObject cls, ControlFlowNode origin +where rhs_in_expr(non_seq, cmp) and +non_seq.refersTo(_, cls, origin) and +not cls.failedInference() and +not cls.hasAttribute("__contains__") and +not cls.hasAttribute("__iter__") and +not cls.hasAttribute("__getitem__") and +not cls = theNoneType() + +select cmp, "This test may raise an Exception as the $@ may be of non-container class $@.", origin, "target", cls, cls.getName() \ No newline at end of file diff --git a/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.py b/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.py new file mode 100644 index 00000000000..14804d31300 --- /dev/null +++ b/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.py @@ -0,0 +1,2 @@ +dictionary = {1:"a", 2:"b", 2:"c"} +print dictionary[2] \ No newline at end of file diff --git a/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.qhelp b/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.qhelp new file mode 100644 index 00000000000..19c4df9a558 --- /dev/null +++ b/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.qhelp @@ -0,0 +1,28 @@ + + + +

    Dictionary literals are constructed in the order given in the source. +This means that if a key is duplicated the second key-value pair will overwrite +the first as a dictionary can only have one value per key. +

    + +
    + +

    Check for typos to ensure that the keys are supposed to be the same. +If they are then decide which value is wanted and delete the other one.

    + +
    + +

    This example will output "c" because the mapping between 2 and "b" is overwritten by the +mapping from 2 to "c". The programmer may have meant to map 3 to "c" instead.

    + + +
    + + +
  • Python: Dictionary literals.
  • + +
    +
    diff --git a/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql b/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql new file mode 100644 index 00000000000..20678da8dc0 --- /dev/null +++ b/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql @@ -0,0 +1,44 @@ +/** + * @name Duplicate key in dict literal + * @description Duplicate key in dict literal. All but the last will be lost. + * @kind problem + * @tags reliability + * useless-code + * external/cwe/cwe-561 + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/duplicate-key-dict-literal + */ + +import python +import semmle.python.strings + +predicate dict_key(Dict d, Expr k, string s) { + k = d.getAKey() and + ( + s = ((Num)k).getN() + or + // We use � to mark unrepresentable characters + // so two instances of � may represent different strings in the source code + not "�" = s.charAt(_) and + exists(StrConst c | + c = k | + s = "u\"" + c.getText() + "\"" and c.isUnicode() + or + s = "b\"" + c.getText() + "\"" and not c.isUnicode() + ) + ) +} + +from Dict d, Expr k1, Expr k2 +where exists(string s | dict_key(d, k1, s) and dict_key(d, k2, s) and k1 != k2) and +( + exists(BasicBlock b, int i1, int i2 | + k1.getAFlowNode() = b.getNode(i1) and + k2.getAFlowNode() = b.getNode(i2) and + i1 < i2 + ) or + k1.getAFlowNode().getBasicBlock().strictlyDominates(k2.getAFlowNode().getBasicBlock()) +) +select k1, "Dictionary key " + repr(k1) + " is subsequently $@.", k2, "overwritten" diff --git a/python/ql/src/Expressions/EqualsNone.py b/python/ql/src/Expressions/EqualsNone.py new file mode 100644 index 00000000000..d48507ff3ba --- /dev/null +++ b/python/ql/src/Expressions/EqualsNone.py @@ -0,0 +1,12 @@ + +def filter1(function, iterable=None) + if iterable == None: # Comparison using '__eq__' + return [item for item in iterable if item] + else: + return [item for item in iterable if function(item)] + +def filter2(function, iterable=None) + if iterable is None: # Comparison using identity + return [item for item in iterable if item] + else: + return [item for item in iterable if function(item)] diff --git a/python/ql/src/Expressions/EqualsNone.qhelp b/python/ql/src/Expressions/EqualsNone.qhelp new file mode 100644 index 00000000000..2ca33bc4b1b --- /dev/null +++ b/python/ql/src/Expressions/EqualsNone.qhelp @@ -0,0 +1,34 @@ + + + +

    When you compare an object to None, use is rather than ==. +None is a singleton object, comparing using == invokes the __eq__ +method on the object in question, which may be slower than identity comparison. Comparing to +None using the is operator is also easier for other programmers to read.

    + + +
    + + +

    Replace == with is.

    + +
    + +

    The filter2 function is likely to be more efficient than the filter1 +function because it uses an identity comparison.

    + + + + +
    + + + +
  • Python Language Reference: Comparisons, +object.__eq__.
  • + + +
    +
    diff --git a/python/ql/src/Expressions/EqualsNone.ql b/python/ql/src/Expressions/EqualsNone.ql new file mode 100644 index 00000000000..fa36dffb724 --- /dev/null +++ b/python/ql/src/Expressions/EqualsNone.ql @@ -0,0 +1,17 @@ +/** + * @name Testing equality to None + * @description Testing whether an object is 'None' using the == operator is inefficient and potentially incorrect. + * @kind problem + * @tags efficiency + * maintainability + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/test-equals-none + */ + +import python + +from Compare c +where c.getOp(0) instanceof Eq and c.getAComparator() instanceof None +select c, "Testing for None should use the 'is' operator." diff --git a/python/ql/src/Expressions/ExpectedMappingForFormatString.py b/python/ql/src/Expressions/ExpectedMappingForFormatString.py new file mode 100644 index 00000000000..410c044adf0 --- /dev/null +++ b/python/ql/src/Expressions/ExpectedMappingForFormatString.py @@ -0,0 +1,7 @@ + +def unsafe_format(): + if unlikely_condition(): + args = (1,2,3) + else: + args = {a:1,b:2,c:3} + return "%(a)s %(b)s %(c)s" % args \ No newline at end of file diff --git a/python/ql/src/Expressions/ExpectedMappingForFormatString.qhelp b/python/ql/src/Expressions/ExpectedMappingForFormatString.qhelp new file mode 100644 index 00000000000..8a9789b418e --- /dev/null +++ b/python/ql/src/Expressions/ExpectedMappingForFormatString.qhelp @@ -0,0 +1,30 @@ + + + +

    If a format string includes conversion specifiers of the form %(name)s then the right hand side of the operation must be a mapping. +A string is a format string if it appears on the left of a modulo (%) operator, the right hand side being the value to be formatted. +If the right hand side is not a mapping then a TypeError will be raised. +Mappings are usually dicts but can be any type that implements the mapping protocol. +

    + + +
    + +

    Change the format to match the arguments and ensure that the right hand side is always a mapping. + +

    + +

    In the following example the right hand side of the formatting operation can be a tuple, which is not a mapping. +To fix this example, ensure that args is a mapping when unlike_condition occurs. +

    + + +
    + + +
  • Python Library Reference: String Formatting.
  • + +
    +
    diff --git a/python/ql/src/Expressions/ExpectedMappingForFormatString.ql b/python/ql/src/Expressions/ExpectedMappingForFormatString.ql new file mode 100644 index 00000000000..9c398a3a6ee --- /dev/null +++ b/python/ql/src/Expressions/ExpectedMappingForFormatString.ql @@ -0,0 +1,19 @@ +/** + * @name Formatted object is not a mapping + * @description The formatted object must be a mapping when the format includes a named specifier; otherwise a TypeError will be raised." + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/percent-format/not-mapping + */ + +import python +import semmle.python.strings + +from Expr e, ClassObject t +where exists(BinaryExpr b | b.getOp() instanceof Mod and format_string(b.getLeft()) and e = b.getRight() and +mapping_format(b.getLeft()) and e.refersTo(_, t, _) and not t.isMapping()) +select e, "Right hand side of a % operator must be a mapping, not class $@.", t, t.getName() diff --git a/python/ql/src/Expressions/ExplicitCallToDel.py b/python/ql/src/Expressions/ExplicitCallToDel.py new file mode 100644 index 00000000000..59601a6f34d --- /dev/null +++ b/python/ql/src/Expressions/ExplicitCallToDel.py @@ -0,0 +1,16 @@ + + +def extract_bad(zippath, dest): + zipped = ZipFile(zippath) + try: + zipped.extractall(dest) + finally: + zipped.__del__() + +def extract_good(zippath, dest): + zipped = ZipFile(zippath) + try: + zipped.extractall(dest) + finally: + zipped.close() + diff --git a/python/ql/src/Expressions/ExplicitCallToDel.qhelp b/python/ql/src/Expressions/ExplicitCallToDel.qhelp new file mode 100644 index 00000000000..9ec18b46918 --- /dev/null +++ b/python/ql/src/Expressions/ExplicitCallToDel.qhelp @@ -0,0 +1,33 @@ + + + + +

    The __del__ special method is designed to be called by the Python virtual machine when an object is no longer reachable, +but before it is destroyed. Calling a __del__ method explicitly may cause an object to enter an unsafe state.

    + + +
    + + +

    If explicit clean up of an object is required, a close() method should be called or, better still, +wrap the use of the object in a with statement. +

    + +
    + +

    In the first example, rather than close the zip file in a conventional manner the programmer has called __del__. +A safer alternative is shown in the second example. +

    + + + + +
    + + +
  • Python Standard Library: object.__del__
  • + +
    +
    diff --git a/python/ql/src/Expressions/ExplicitCallToDel.ql b/python/ql/src/Expressions/ExplicitCallToDel.ql new file mode 100644 index 00000000000..1cb2782c885 --- /dev/null +++ b/python/ql/src/Expressions/ExplicitCallToDel.ql @@ -0,0 +1,35 @@ +/** + * @name __del__ is called explicitly + * @description The __del__ special method is called by the virtual machine when an object is being finalized. It should not be called explicitly. + * @kind problem + * @tags reliability + * correctness + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/explicit-call-to-delete + */ + +import python + +class DelCall extends Call { + DelCall() { + ((Attribute)this.getFunc()).getName() = "__del__" + } + + predicate isSuperCall() { + exists(Function f | f = this.getScope() and f.getName() = "__del__" | + // We pass in `self` as the first argument... + f.getArg(0).asName().getVariable() = ((Name)this.getArg(0)).getVariable() or + // ... or the call is of the form `super(Type, self).__del__()`, or the equivalent + // Python 3: `super().__del__()`. + exists(Call superCall | superCall = ((Attribute)this.getFunc()).getObject() | + ((Name)superCall.getFunc()).getId() = "super" + ) + ) + } +} + +from DelCall del +where not del.isSuperCall() +select del, "The __del__ special method is called explicitly." \ No newline at end of file diff --git a/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll b/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll new file mode 100644 index 00000000000..7a451ada1bd --- /dev/null +++ b/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll @@ -0,0 +1,142 @@ +import python + + +library class PossibleAdvancedFormatString extends StrConst { + + PossibleAdvancedFormatString() { + this.getText().matches("%{%}%") + } + + private predicate field(int start, int end) { + brace_pair(this, start, end) and + this.getText().substring(start, end) != "{{}}" + } + + /** Gets the number of the formatting field at [start, end) */ + int getFieldNumber(int start, int end) { + result = this.fieldId(start, end).toInt() + or + this.implicitlyNumberedField(start, end) and + result = count(int s | this.implicitlyNumberedField(s, _) and s < start) + } + + /** Gets the text of the formatting field at [start, end) */ + string getField(int start, int end) { + this.field(start, end) and + result = this.getText().substring(start, end) + } + + private string fieldId(int start, int end) { + this.field(start, end) and + ( + result = this.getText().substring(start, end).regexpCapture("\\{([^!:.\\[]+)[!:.\\[].*", 1) + or + result = this.getText().substring(start+1, end-1) and result.regexpMatch("[^!:.\\[]+") + ) + } + + /** Gets the name of the formatting field at [start, end) */ + string getFieldName(int start, int end) { + result = this.fieldId(start, end) + and not exists(this.getFieldNumber(start, end)) + } + + private predicate implicitlyNumberedField(int start, int end) { + this.field(start, end) and + exists(string c | + start+1 = this.getText().indexOf(c) | + c = "}" or c = ":" or c = "!" or c = "." + ) + } + + /** Whether this format string has implicitly numbered fields */ + predicate isImplicitlyNumbered() { + this.implicitlyNumberedField(_, _) + } + + /** Whether this format string has explicitly numbered fields */ + predicate isExplicitlyNumbered() { + exists(this.fieldId(_, _).toInt()) + } + +} + +predicate brace_sequence(PossibleAdvancedFormatString fmt, int index, int len) { + exists(string text | + text = fmt.getText() | + text.charAt(index) = "{" and not text.charAt(index-1) = "{" and len = 1 + or + text.charAt(index) = "{" and text.charAt(index-1) = "{" and brace_sequence(fmt, index-1, len-1) + ) +} + +predicate escaped_brace(PossibleAdvancedFormatString fmt, int index) { + exists(int len | + brace_sequence(fmt, index, len) | + len % 2 = 0 + ) +} + +predicate escaping_brace(PossibleAdvancedFormatString fmt, int index) { + escaped_brace(fmt, index+1) +} + +private predicate inner_brace_pair(PossibleAdvancedFormatString fmt, int start, int end) { + not escaping_brace(fmt, start) and + not escaped_brace(fmt, start) and + fmt.getText().charAt(start) = "{" and + exists(string pair | pair = fmt.getText().suffix(start).regexpCapture("(?s)(\\{([^{}]|\\{\\{)*+\\}).*", 1) | + end = start + pair.length() + ) +} + +private predicate brace_pair(PossibleAdvancedFormatString fmt, int start, int end) { + inner_brace_pair(fmt, start, end) + or + not escaping_brace(fmt, start) and + not escaped_brace(fmt, start) and + exists(string prefix, string postfix, int innerstart, int innerend | + brace_pair(fmt, innerstart, innerend) and + prefix = fmt.getText().regexpFind("\\{([^{}]|\\{\\{)+\\{", _, start) and + innerstart = start+prefix.length()-1 and + postfix = fmt.getText().regexpFind("\\}([^{}]|\\}\\})*\\}", _, innerend-1) and + end = innerend + postfix.length()-1 + ) +} + +private predicate advanced_format_call(Call format_expr, PossibleAdvancedFormatString fmt, int args) { + exists(CallNode call | + call = format_expr.getAFlowNode() | + call.getFunction().refersTo(theFormatFunction()) and call.getArg(0).refersTo(_, fmt.getAFlowNode()) and + args = count(format_expr.getAnArg()) - 1 + or + call.getFunction().(AttrNode).getObject("format").refersTo(_, fmt.getAFlowNode()) and + args = count(format_expr.getAnArg()) + ) +} + +class AdvancedFormatString extends PossibleAdvancedFormatString { + + AdvancedFormatString() { + advanced_format_call(_, this, _) + } + +} + +class AdvancedFormattingCall extends Call { + + AdvancedFormattingCall() { + advanced_format_call(this, _, _) + } + + /** Count of the arguments actually provided */ + int providedArgCount() { + advanced_format_call(this, _, result) + } + + AdvancedFormatString getAFormat() { + advanced_format_call(this, result, _) + } + +} + diff --git a/python/ql/src/Expressions/Formatting/MixedExplicitImplicitIn3101Format.py b/python/ql/src/Expressions/Formatting/MixedExplicitImplicitIn3101Format.py new file mode 100644 index 00000000000..1d483cdf24c --- /dev/null +++ b/python/ql/src/Expressions/Formatting/MixedExplicitImplicitIn3101Format.py @@ -0,0 +1,2 @@ +def illegal_format(): + "{} {1}".format("spam", "eggs") diff --git a/python/ql/src/Expressions/Formatting/MixedExplicitImplicitIn3101Format.qhelp b/python/ql/src/Expressions/Formatting/MixedExplicitImplicitIn3101Format.qhelp new file mode 100644 index 00000000000..014ba097cdd --- /dev/null +++ b/python/ql/src/Expressions/Formatting/MixedExplicitImplicitIn3101Format.qhelp @@ -0,0 +1,31 @@ + + + +

    A formatting expression, that is an expression of the form the_format.format(args) or format(the_format, args), +can use explicitly numbered fields, like {1}, or implicitly numbered fields, such as {}, but it cannot use both. +Doing so will raise a ValueError. +

    + +
    + +

    +Use either explicitly numbered fields or implicitly numbered fields, but be consistent. +

    + +
    + +

    +In the following example the formatting uses both implicit, {}, and explicit, {1}, numbering for fields, which is illegal. +

    + + +
    + + +
  • Python Library Reference: String Formatting.
  • + + +
    +
    diff --git a/python/ql/src/Expressions/Formatting/MixedExplicitImplicitIn3101Format.ql b/python/ql/src/Expressions/Formatting/MixedExplicitImplicitIn3101Format.ql new file mode 100644 index 00000000000..3f488aa9507 --- /dev/null +++ b/python/ql/src/Expressions/Formatting/MixedExplicitImplicitIn3101Format.ql @@ -0,0 +1,18 @@ +/** + * @name Formatting string mixes implicitly and explicitly numbered fields + * @description Using implicit and explicit numbering in string formatting operations, such as '"{}: {1}".format(a,b)', will raise a ValueError. + * @kind problem + * @problem.severity error + * @tags reliability + * correctness + * @sub-severity low + * @precision high + * @id py/str-format/mixed-fields + */ + +import python +import AdvancedFormatting + +from AdvancedFormattingCall call, AdvancedFormatString fmt +where call.getAFormat() = fmt and fmt.isImplicitlyNumbered() and fmt.isExplicitlyNumbered() +select fmt, "Formatting string mixes implicitly and explicitly numbered fields." \ No newline at end of file diff --git a/python/ql/src/Expressions/Formatting/UnusedArgumentIn3101Format.py b/python/ql/src/Expressions/Formatting/UnusedArgumentIn3101Format.py new file mode 100644 index 00000000000..591f461437a --- /dev/null +++ b/python/ql/src/Expressions/Formatting/UnusedArgumentIn3101Format.py @@ -0,0 +1,3 @@ +def surplus_argument(): + the_format = "{} {}" # Used to be "{} {} {}" + return the_format.format(1, 2, 3) diff --git a/python/ql/src/Expressions/Formatting/UnusedArgumentIn3101Format.qhelp b/python/ql/src/Expressions/Formatting/UnusedArgumentIn3101Format.qhelp new file mode 100644 index 00000000000..707ddaf7181 --- /dev/null +++ b/python/ql/src/Expressions/Formatting/UnusedArgumentIn3101Format.qhelp @@ -0,0 +1,33 @@ + + + +

    A formatting expression, that is an expression of the form the_format.format(args) or format(the_format, args), +can have any number of arguments, provided that there are enough to match the format. +However, surplus arguments are redundant and clutter the code, making it harder to read. +

    + +

    +It is also possible that surplus arguments indicate a mistake in the format string. +

    + +
    + +

    +Check that the format string is correct and then remove any surplus arguments. +

    + +
    + +

    In the following example there are three arguments for the call to the str.format() method, but the format string only requires two. +The third argument should be deleted.

    + + +
    + + +
  • Python Library Reference: String Formatting.
  • + +
    +
    diff --git a/python/ql/src/Expressions/Formatting/UnusedArgumentIn3101Format.ql b/python/ql/src/Expressions/Formatting/UnusedArgumentIn3101Format.ql new file mode 100644 index 00000000000..67c95277375 --- /dev/null +++ b/python/ql/src/Expressions/Formatting/UnusedArgumentIn3101Format.ql @@ -0,0 +1,26 @@ +/** + * @name Unused argument in a formatting call + * @description Including surplus arguments in a formatting call makes code more difficult to read and may indicate an error. + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity warning + * @sub-severity high + * @precision high + * @id py/str-format/surplus-argument + */ + +import python + + +import python +import AdvancedFormatting + +int field_count(AdvancedFormatString fmt) { result = max(fmt.getFieldNumber(_, _)) + 1 } + +from AdvancedFormattingCall call, AdvancedFormatString fmt, int arg_count, int max_field +where arg_count = call.providedArgCount() and max_field = field_count(fmt) and +call.getAFormat() = fmt and not exists(call.getStarargs()) and +forall(AdvancedFormatString other | other = call.getAFormat() | field_count(other) < arg_count) +select call, "Too many arguments for string format. Format $@ requires only " + max_field + ", but " + +arg_count.toString() + " are provided.", fmt, "\"" + fmt.getText() + "\"" diff --git a/python/ql/src/Expressions/Formatting/UnusedNamedArgumentIn3101Format.py b/python/ql/src/Expressions/Formatting/UnusedNamedArgumentIn3101Format.py new file mode 100644 index 00000000000..e2c3b0cde92 --- /dev/null +++ b/python/ql/src/Expressions/Formatting/UnusedNamedArgumentIn3101Format.py @@ -0,0 +1,3 @@ +def surplus_argument(): + the_format = "{spam} {eggs}" # Used to be "{spam} {eggs} {chips}" + return the_format.format(spam = "spam", eggs="eggs", chips="chips") diff --git a/python/ql/src/Expressions/Formatting/UnusedNamedArgumentIn3101Format.qhelp b/python/ql/src/Expressions/Formatting/UnusedNamedArgumentIn3101Format.qhelp new file mode 100644 index 00000000000..12e482ae2c8 --- /dev/null +++ b/python/ql/src/Expressions/Formatting/UnusedNamedArgumentIn3101Format.qhelp @@ -0,0 +1,35 @@ + + + +

    A formatting expression, that is an expression of the form the_format.format(args) or format(the_format, args) +can have keyword arguments of any name, as long as all the required names are provided. +However, surplus keyword arguments, those with names that are not in the format, are redundant. +These surplus arguments clutter the code, making it harder to read. +

    + +

    +It is also possible that surplus keyword arguments indicate a mistake in the format string. +

    + +
    + +

    +Check that the format string is correct and then remove any surplus keyword arguments. +

    + +
    + +

    In the following example, the comment indicates that the chips keyword argument is no longer required and should be deleted. +

    + + +
    + + +
  • Python Library Reference: String Formatting.
  • + + +
    +
    diff --git a/python/ql/src/Expressions/Formatting/UnusedNamedArgumentIn3101Format.ql b/python/ql/src/Expressions/Formatting/UnusedNamedArgumentIn3101Format.ql new file mode 100644 index 00000000000..c902b992b1c --- /dev/null +++ b/python/ql/src/Expressions/Formatting/UnusedNamedArgumentIn3101Format.ql @@ -0,0 +1,27 @@ +/** + * @name Unused named argument in formatting call + * @description Including surplus keyword arguments in a formatting call makes code more difficult to read and may indicate an error. + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/str-format/surplus-named-argument + */ + +import python +import AdvancedFormatting + +from AdvancedFormattingCall call, AdvancedFormatString fmt, string name, string fmt_repr +where call.getAFormat() = fmt and +name = call.getAKeyword().getArg() and +forall(AdvancedFormatString format | format = call.getAFormat() | not format.getFieldName(_, _) = name) +and not exists(call.getKwargs()) and +(strictcount(call.getAFormat()) = 1 and fmt_repr = "format \"" + fmt.getText() + "\"" + or + strictcount(call.getAFormat()) != 1 and fmt_repr = "any format used." +) + +select call, "Surplus named argument for string format. An argument named '" + name + + "' is provided, but it is not required by $@.", fmt, fmt_repr diff --git a/python/ql/src/Expressions/Formatting/WrongNameInArgumentsFor3101Format.py b/python/ql/src/Expressions/Formatting/WrongNameInArgumentsFor3101Format.py new file mode 100644 index 00000000000..21fe107eaf9 --- /dev/null +++ b/python/ql/src/Expressions/Formatting/WrongNameInArgumentsFor3101Format.py @@ -0,0 +1,6 @@ +def unsafe_named_format(): + the_format = "{spam} {eggs}" + if unlikely_condition(): + return the_format.format(spam="spam", completely_different="eggs") + else: + return the_format.format(spam="spam", eggs="eggs") diff --git a/python/ql/src/Expressions/Formatting/WrongNameInArgumentsFor3101Format.qhelp b/python/ql/src/Expressions/Formatting/WrongNameInArgumentsFor3101Format.qhelp new file mode 100644 index 00000000000..39c75a0c67f --- /dev/null +++ b/python/ql/src/Expressions/Formatting/WrongNameInArgumentsFor3101Format.qhelp @@ -0,0 +1,31 @@ + + + +

    A formatting expression, that is an expression of the form the_format.format(args) or format(the_format, args), +can use named fields. If it does, then keyword arguments must be supplied for all named fields. +If any of the keyword arguments are missing then a KeyError will be raised. +

    + +
    + +

    +Change the format to match the arguments and ensure that the arguments have the correct names. +

    + +
    + +

    In the following example, if unlikely_condition() is true, then a KeyError will be raised +as the keyword parameter eggs is missing. +Adding a keyword parameter named eggs would fix this. +

    + + +
    + + +
  • Python Library Reference: String Formatting.
  • + +
    +
    diff --git a/python/ql/src/Expressions/Formatting/WrongNameInArgumentsFor3101Format.ql b/python/ql/src/Expressions/Formatting/WrongNameInArgumentsFor3101Format.ql new file mode 100644 index 00000000000..412d8d55830 --- /dev/null +++ b/python/ql/src/Expressions/Formatting/WrongNameInArgumentsFor3101Format.ql @@ -0,0 +1,23 @@ +/** + * @name Missing named arguments in formatting call + * @description A string formatting operation, such as '"{name}".format(key=b)', + * where the names of format items in the format string differs from the names of the values to be formatted will raise a KeyError. + * @kind problem + * @problem.severity error + * @tags reliability + * correctness + * @sub-severity low + * @precision high + * @id py/str-format/missing-named-argument + */ + +import python +import AdvancedFormatting + +from AdvancedFormattingCall call, AdvancedFormatString fmt, string name +where call.getAFormat() = fmt and +not name = call.getAKeyword().getArg() and +fmt.getFieldName(_, _) = name +and not exists(call.getKwargs()) +select call, "Missing named argument for string format. Format $@ requires '" + name + "', but it is omitted.", +fmt, "\"" + fmt.getText() + "\"" \ No newline at end of file diff --git a/python/ql/src/Expressions/Formatting/WrongNumberArgumentsFor3101Format.py b/python/ql/src/Expressions/Formatting/WrongNumberArgumentsFor3101Format.py new file mode 100644 index 00000000000..0105d035e32 --- /dev/null +++ b/python/ql/src/Expressions/Formatting/WrongNumberArgumentsFor3101Format.py @@ -0,0 +1,7 @@ +def unsafe_format(): + the_format = "{} {} {}" + if unlikely_condition(): + return the_format.format(1, 2) + else: + return the_format.format(1, 2, 3) + diff --git a/python/ql/src/Expressions/Formatting/WrongNumberArgumentsFor3101Format.qhelp b/python/ql/src/Expressions/Formatting/WrongNumberArgumentsFor3101Format.qhelp new file mode 100644 index 00000000000..bc342bd2c05 --- /dev/null +++ b/python/ql/src/Expressions/Formatting/WrongNumberArgumentsFor3101Format.qhelp @@ -0,0 +1,29 @@ + + + +

    A formatting expression, that is an expression of the form the_format.format(args) or format(the_format, args), +must have sufficient arguments to match the format. Otherwise, an IndexError will be raised. +

    + +
    + +

    +Either change the format to match the arguments, or ensure that there are sufficient arguments. +

    + +
    + +

    In the following example, only 2 arguments may be provided for the call to the str.format method, +which is insufficient for the format string used. To fix this a third parameter should be provided on line 4. +

    + + +
    + + +
  • Python Library Reference: String Formatting.
  • + +
    +
    diff --git a/python/ql/src/Expressions/Formatting/WrongNumberArgumentsFor3101Format.ql b/python/ql/src/Expressions/Formatting/WrongNumberArgumentsFor3101Format.ql new file mode 100644 index 00000000000..fe766ae2d8b --- /dev/null +++ b/python/ql/src/Expressions/Formatting/WrongNumberArgumentsFor3101Format.ql @@ -0,0 +1,23 @@ +/** + * @name Too few arguments in formatting call + * @description A string formatting operation, such as '"{0}: {1}, {2}".format(a,b)', + * where the number of values to be formatted is too few for the format string will raise an IndexError. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/str-format/missing-argument + */ + +import python +import AdvancedFormatting + +from AdvancedFormattingCall call, AdvancedFormatString fmt, +int arg_count, int max_field, string provided +where arg_count = call.providedArgCount() and max_field = max(fmt.getFieldNumber(_, _)) and +call.getAFormat() = fmt and not exists(call.getStarargs()) and arg_count <= max_field and +(if arg_count = 1 then provided = " is provided." else provided = " are provided.") +select call, "Too few arguments for string format. Format $@ requires at least " + (max_field+1) + ", but " + +arg_count.toString() + provided, fmt, "\"" + fmt.getText() + "\"" \ No newline at end of file diff --git a/python/ql/src/Expressions/HashedButNoHash.py b/python/ql/src/Expressions/HashedButNoHash.py new file mode 100644 index 00000000000..dad0d5ea862 --- /dev/null +++ b/python/ql/src/Expressions/HashedButNoHash.py @@ -0,0 +1,5 @@ + +def lookup_with_default_key(mapping, key=None): + if key is None: + key = [] # Should be key = () + return mapping[key] diff --git a/python/ql/src/Expressions/HashedButNoHash.qhelp b/python/ql/src/Expressions/HashedButNoHash.qhelp new file mode 100644 index 00000000000..6df27d4e60f --- /dev/null +++ b/python/ql/src/Expressions/HashedButNoHash.qhelp @@ -0,0 +1,43 @@ + + + +

    If an object is used as a key in a dictionary or as a member of a set then it must be hashable, +that is it must define a __hash__ method. All built-in immutable types are hashable, but +mutable ones are not. Common hashable types include all numbers, strings (both unicode and bytes) +and tuple. Common unhashable types include list, dict and set. +

    + +

    +In order to store a key in a dict or set a hash value is needed. To determine this value the built-in +function hash() is called which in turn calls the __hash__ method on the object. +If the object's class does not have the __hash__ method, then a TypeError will be raised. +

    + + +
    + +

    Since this problem usually indicates a logical error, it is not possible to give a general recipe for fixing it. +Mutable collections can be converted into immutable equivalents where appropriate. For example sets can be hashed by converting any instances +of set into frozenset instances. +

    + +
    + +

    lists are not hashable. In this example, an attempt is made to use a list +as a key in a mapping which will fail with a TypeError. +

    + + + +
    + + +
  • Python Standard Library: hash.
  • +
  • Python Language Reference: object.__hash__.
  • +
  • Python Standard Library: Mapping Types — dict.
  • +
  • Python Standard Library: Set Types — set, frozenset.
  • + +
    +
    diff --git a/python/ql/src/Expressions/HashedButNoHash.ql b/python/ql/src/Expressions/HashedButNoHash.ql new file mode 100644 index 00000000000..310d56b273a --- /dev/null +++ b/python/ql/src/Expressions/HashedButNoHash.ql @@ -0,0 +1,59 @@ +/** + * @name Unhashable object hashed + * @description Hashing an object which is not hashable will result in a TypeError at runtime. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/hash-unhashable-value + */ + +import python + +/* This assumes that any indexing operation where the value is not a sequence or numpy array involves hashing. + * For sequences, the index must be an int, which are hashable, so we don't need to treat them specially. + * For numpy arrays, the index may be a list, which are not hashable and needs to be treated specially. + */ + +predicate numpy_array_type(ClassObject na) { + exists(ModuleObject np | np.getName() = "numpy" or np.getName() = "numpy.core" | + na.getAnImproperSuperType() = np.getAttribute("ndarray") + ) +} + +predicate has_custom_getitem(ClassObject cls) { + cls.lookupAttribute("__getitem__") instanceof PyFunctionObject + or + numpy_array_type(cls) +} + +predicate explicitly_hashed(ControlFlowNode f) { + exists(CallNode c, GlobalVariable hash | c.getArg(0) = f and c.getFunction().(NameNode).uses(hash) and hash.getId() = "hash") +} + +predicate unhashable_subscript(ControlFlowNode f, ClassObject c, ControlFlowNode origin) { + is_unhashable(f, c, origin) and + exists(SubscriptNode sub | sub.getIndex() = f | + exists(ClassObject custom_getitem | + sub.getObject().refersTo(_, custom_getitem, _) and + not has_custom_getitem(custom_getitem) + ) + ) +} + +predicate is_unhashable(ControlFlowNode f, ClassObject cls, ControlFlowNode origin) { + f.refersTo(_, cls, origin) and + (not cls.hasAttribute("__hash__") and not cls.unknowableAttributes() and cls.isNewStyle() + or + cls.lookupAttribute("__hash__") = theNoneObject() + ) +} + +from ControlFlowNode f, ClassObject c, ControlFlowNode origin +where +explicitly_hashed(f) and is_unhashable(f, c, origin) +or +unhashable_subscript(f, c, origin) +select f.getNode(), "This $@ of $@ is unhashable.", origin, "instance", c, c.getQualifiedName() diff --git a/python/ql/src/Expressions/IncorrectComparisonUsingIs.py b/python/ql/src/Expressions/IncorrectComparisonUsingIs.py new file mode 100644 index 00000000000..faf78fd8f12 --- /dev/null +++ b/python/ql/src/Expressions/IncorrectComparisonUsingIs.py @@ -0,0 +1,27 @@ + +DEFAULT = "default" + +def get_color(name, fallback): + if name in COLORS: + return COLORS[name] + elif fallback is DEFAULT: + return DEFAULT_COLOR + else: + return fallback + +#This works +print (get_color("spam", "def" + "ault")) + +#But this does not +print (get_color("spam", "default-spam"[:7])) + +#To fix the above code change to object +DEFAULT = object() + +#Or if you want better repr() output: +class Default(object): + + def __repr__(self): + return "DEFAULT" + +DEFAULT = Default() diff --git a/python/ql/src/Expressions/IncorrectComparisonUsingIs.qhelp b/python/ql/src/Expressions/IncorrectComparisonUsingIs.qhelp new file mode 100644 index 00000000000..b8c25fa04a2 --- /dev/null +++ b/python/ql/src/Expressions/IncorrectComparisonUsingIs.qhelp @@ -0,0 +1,43 @@ + + + + +

    When you compare two values using the is or is not operator, it is the +object identities of the two values that is tested rather than their equality. + If the class of either of the values in the comparison redefines equality then the + is operator may return False even though the objects compare as equal. + Equality is defined by the __eq__ or, in Python2, __cmp__ method. + To compare two objects for equality, use the == or != operator instead.

    + +
    + + +

    When you want to compare the value of two literals, use the comparison operator == or +!= in place of is or is not.

    + +

    If the uniqueness property or performance are important then use an object that does not redefine equality.

    + +
    + + +

    In the first line of the following example the programmer tests the value of value against +DEFAULT using the is operator. Unfortunately, this may fail when the function +is called with the string "default".

    +

    +To function correctly, change the expression value is DEFAULT to value == DEFAULT. +Alternatively, if the uniqueness property is desirable, then change the definition of DEFAULT to +either of the alternatives below. +

    + + + + +
    + + +
  • Python Standard Library: Comparisons.
  • + +
    +
    diff --git a/python/ql/src/Expressions/IncorrectComparisonUsingIs.ql b/python/ql/src/Expressions/IncorrectComparisonUsingIs.ql new file mode 100644 index 00000000000..1cb59866e5a --- /dev/null +++ b/python/ql/src/Expressions/IncorrectComparisonUsingIs.ql @@ -0,0 +1,20 @@ +/** + * @name Comparison using is when operands support __eq__ + * @description Comparison using 'is' when equivalence is not the same as identity + * @kind problem + * @tags reliability + * correctness + * @problem.severity warning + * @sub-severity low + * @precision high + * @id py/comparison-using-is + */ + +import python +import IsComparisons + +from Compare comp, Cmpop op, ClassObject c, string alt +where invalid_portable_is_comparison(comp, op, c) and +not cpython_interned_constant(comp.getASubExpression()) and +(op instanceof Is and alt = "==" or op instanceof IsNot and alt = "!=") +select comp, "Values compared using '" + op.getSymbol() + "' when equivalence is not the same as identity. Use '" + alt + "' instead." diff --git a/python/ql/src/Expressions/IsComparisons.qll b/python/ql/src/Expressions/IsComparisons.qll new file mode 100644 index 00000000000..270c951f3cb --- /dev/null +++ b/python/ql/src/Expressions/IsComparisons.qll @@ -0,0 +1,112 @@ +import python + + +predicate comparison_using_is(Compare comp, ControlFlowNode left, Cmpop op, ControlFlowNode right) { + exists(CompareNode fcomp | fcomp = comp.getAFlowNode() | + fcomp.operands(left, op, right) and (op instanceof Is or op instanceof IsNot) + ) +} + +predicate overrides_eq_or_cmp(ClassObject c) { + major_version() = 2 and c.hasAttribute("__eq__") + or + c.declaresAttribute("__eq__") and not c = theObjectType() + or + exists(ClassObject sup | + sup = c.getASuperType() and not sup = theObjectType() | + sup.declaresAttribute("__eq__") + ) + or + major_version() = 2 and c.hasAttribute("__cmp__") +} + +predicate invalid_to_use_is_portably(ClassObject c) { + overrides_eq_or_cmp(c) and + /* Exclude type/builtin-function/bool as it is legitimate to compare them using 'is' but they implement __eq__ */ + not c = theTypeType() and not c = theBuiltinFunctionType() and not c = theBoolType() and + /* OK to compare with 'is' if a singleton */ + not exists(c.getProbableSingletonInstance()) +} + +predicate simple_constant(ControlFlowNode f) { + exists(Object obj | f.refersTo(obj) | obj = theTrueObject() or obj = theFalseObject() or obj = theNoneObject()) +} + +private predicate cpython_interned_value(Expr e) { + exists(string text | text = e.(StrConst).getText() | + text.length() = 0 or + text.length() = 1 and text.regexpMatch("[U+0000-U+00ff]") + ) + or + exists(int i | + i = e.(IntegerLiteral).getN().toInt() | + -5 <= i and i <= 256 + ) + or + exists(Tuple t | t = e and not exists(t.getAnElt())) +} + +/** The set of values that can be expected to be interned across + * the main implementations of Python. PyPy, Jython, etc tend to + * follow CPython, but it varies, so this is a best guess. + */ +private predicate universally_interned_value(Expr e) { + e.(IntegerLiteral).getN().toInt() = 0 + or + exists(Tuple t | t = e and not exists(t.getAnElt())) + or + e.(StrConst).getText() = "" +} + +predicate cpython_interned_constant(Expr e) { + exists(Expr const | + e.refersTo(_, const) | + cpython_interned_value(const) + ) +} + +predicate universally_interned_constant(Expr e) { + exists(Expr const | + e.refersTo(_, const) | + universally_interned_value(const) + ) +} + +private predicate comparison_both_types(Compare comp, Cmpop op, ClassObject cls1, ClassObject cls2) { + exists(ControlFlowNode op1, ControlFlowNode op2 | + comparison_using_is(comp, op1, op, op2) or comparison_using_is(comp, op2, op, op1) | + op1.refersTo(_, cls1, _) and + op2.refersTo(_, cls2, _) + ) +} + +private predicate comparison_one_type(Compare comp, Cmpop op, ClassObject cls) { + not comparison_both_types(comp, _, _, _) and + exists(ControlFlowNode operand | + comparison_using_is(comp, operand, op, _) or comparison_using_is(comp, _, op, operand) | + operand.refersTo(_, cls, _) + ) +} + +predicate invalid_portable_is_comparison(Compare comp, Cmpop op, ClassObject cls) { + /* OK to use 'is' when defining '__eq__' */ + not exists(Function eq | eq.getName() = "__eq__" or eq.getName() = "__ne__" | eq = comp.getScope().getScope*()) + and + ( + comparison_one_type(comp, op, cls) and invalid_to_use_is_portably(cls) + or + exists(ClassObject other | comparison_both_types(comp, op, cls, other) | + invalid_to_use_is_portably(cls) and + invalid_to_use_is_portably(other) + ) + ) + and + /* OK to use 'is' when comparing items from a known set of objects */ + not exists(Expr left, Expr right, Object obj | + comp.compares(left, op, right) and + left.refersTo(obj) and right.refersTo(obj) and + exists(ImmutableLiteral il | il.getLiteralObject() = obj) + ) +} + + diff --git a/python/ql/src/Expressions/NonCallableCalled.py b/python/ql/src/Expressions/NonCallableCalled.py new file mode 100644 index 00000000000..76b6a920861 --- /dev/null +++ b/python/ql/src/Expressions/NonCallableCalled.py @@ -0,0 +1,2 @@ +a_list = [] +a_list() diff --git a/python/ql/src/Expressions/NonCallableCalled.qhelp b/python/ql/src/Expressions/NonCallableCalled.qhelp new file mode 100644 index 00000000000..35411ca62b3 --- /dev/null +++ b/python/ql/src/Expressions/NonCallableCalled.qhelp @@ -0,0 +1,39 @@ + + + +

    If an object is called, obj(), then that object must be a callable or +a TypeError will be raised. A callable object is any object whose class defines +the __call__ special method. +Callable objects include functions, methods, classes.

    + +

    The callable(object) builtin function determines if an object is callable or not.

    + +

    +When the Python interpreter attempts to evaluate a call such as func(arg) it will +invoke the __call__ special method on func. +Thus, func(arg) is roughly equivalent to type(func).__call__(func, arg) +which means that the class must define the attribute __call__, +merely adding it to the instance is not sufficient. +

    + +
    + +

    Since this problem usually indicates a logical error, it is not possible to give a general recipe for fixing it.

    + +
    + +

    lists are not callable. In this example, an attempt is made to call a list +which will fail with a TypeError. +

    + + +
    + + +
  • Python Standard Library: callable.
  • +
  • Python Language Reference: object.__call__.
  • + +
    +
    diff --git a/python/ql/src/Expressions/NonCallableCalled.ql b/python/ql/src/Expressions/NonCallableCalled.ql new file mode 100644 index 00000000000..5d58ae04ec9 --- /dev/null +++ b/python/ql/src/Expressions/NonCallableCalled.ql @@ -0,0 +1,24 @@ +/** + * @name Non-callable called + * @description A call to an object which is not a callable will raise a TypeError at runtime. + * @kind problem + * @tags reliability + * correctness + * types + * @problem.severity error + * @sub-severity high + * @precision high + * @id py/call-to-non-callable + */ + +import python +import Exceptions.NotImplemented + +from Call c, ClassObject t, Expr f, AstNode origin +where f = c.getFunc() and f.refersTo(_, t, origin) and + not t.isCallable() and not t.unknowableAttributes() + and not t.isDescriptorType() + and not t = theNoneType() + and not use_of_not_implemented_in_raise(_, f) + +select c, "Call to a $@ of $@.", origin, "non-callable", t, t.toString() diff --git a/python/ql/src/Expressions/NonPortableComparisonUsingIs.py b/python/ql/src/Expressions/NonPortableComparisonUsingIs.py new file mode 100644 index 00000000000..12be9d143e7 --- /dev/null +++ b/python/ql/src/Expressions/NonPortableComparisonUsingIs.py @@ -0,0 +1,8 @@ + +CONSTANT = 12 + +def equals_to_twelve(x): + return x is CONSTANT + +#This works in CPython, but might not for other implementations. +print (equals_to_twelve(5 + 7)) diff --git a/python/ql/src/Expressions/NonPortableComparisonUsingIs.qhelp b/python/ql/src/Expressions/NonPortableComparisonUsingIs.qhelp new file mode 100644 index 00000000000..4b5fa7c840a --- /dev/null +++ b/python/ql/src/Expressions/NonPortableComparisonUsingIs.qhelp @@ -0,0 +1,44 @@ + + + + +

    When you compare two values using the is or is not operator, it is the +object identities of the two values that is tested rather than their equality. +If the class of either of the values in the comparison redefines equality then the +is operator may return False even though the objects compare as equal. +

    +

    +CPython interns a number of commonly used values, such as small integers, which means that using +is instead of == will work correctly. However, this might not be portable +to other implementations such as PyPy, IronPython, Jython or MicroPython. +

    + +
    + + +

    When you want to compare the value of two literals, use the comparison operator == or +!= in place of is or is not.

    + +

    If the uniqueness property or performance are important then use an object that does not redefine equality.

    + +
    + + +

    The function equals_to_twelve() relies on CPython interning small integers.

    +

    +To function correctly for all implementations, change the expression x is CONSTANT to x == CONSTANT. +

    + + + + +
    + + +
  • Python Standard Library: Comparisons.
  • +
  • Stack Overflow: Python "is" operator behaves unexpectedly with integers.
  • + +
    +
    diff --git a/python/ql/src/Expressions/NonPortableComparisonUsingIs.ql b/python/ql/src/Expressions/NonPortableComparisonUsingIs.ql new file mode 100644 index 00000000000..397aec01065 --- /dev/null +++ b/python/ql/src/Expressions/NonPortableComparisonUsingIs.ql @@ -0,0 +1,23 @@ +/** + * @name Non-portable comparison using is when operands support __eq__ + * @description Comparison using 'is' when equivalence is not the same as identity and may not be portable. + * @kind problem + * @tags portability + * maintainability + * @problem.severity recommendation + * @sub-severity low + * @precision medium + * @id py/comparison-using-is-non-portable + */ + +import python +import IsComparisons + +from Compare comp, Cmpop op, ClassObject c +where invalid_portable_is_comparison(comp, op, c) and +exists(Expr sub | + sub = comp.getASubExpression() | + cpython_interned_constant(sub) and + not universally_interned_constant(sub) +) +select comp, "The result of this comparison with '" + op.getSymbol() + "' may differ between implementations of Python." \ No newline at end of file diff --git a/python/ql/src/Expressions/RedundantComparison.qll b/python/ql/src/Expressions/RedundantComparison.qll new file mode 100644 index 00000000000..64f80ce31b5 --- /dev/null +++ b/python/ql/src/Expressions/RedundantComparison.qll @@ -0,0 +1,46 @@ +import python + +class RedundantComparison extends Compare { + + RedundantComparison() { + exists(Expr left, Expr right | + this.compares(left, _, right) + and + same_variable(left, right) + ) + } + + predicate maybeMissingSelf() { + exists(Name left | + this.compares(left, _, _) and + not this.isConstant() and + exists(Class cls | left.getScope().getScope() = cls | + exists(SelfAttribute sa | sa.getName() = left.getId() | + sa.getClass() = cls + ) + ) + ) + } + +} + +private predicate same_variable(Expr left, Expr right) { + same_name(left, right) + or + same_attribute(left, right) +} + +private predicate name_in_comparison(Compare comp, Name n, Variable v) { + comp.contains(n) and v = n.getVariable() +} + +private predicate same_name(Name n1, Name n2) { + n1 != n2 and + exists(Compare comp, Variable v | name_in_comparison(comp, n1, v) and name_in_comparison(comp, n2, v)) +} + +private predicate same_attribute(Attribute a1, Attribute a2) { + a1 != a2 and + exists(Compare comp | comp.contains(a1) and comp.contains(a2)) and + a1.getName() = a2.getName() and same_name(a1.getObject(), a2.getObject()) +} diff --git a/python/ql/src/Expressions/Regex/BackspaceEscape.py b/python/ql/src/Expressions/Regex/BackspaceEscape.py new file mode 100644 index 00000000000..2c1fa4bad4e --- /dev/null +++ b/python/ql/src/Expressions/Regex/BackspaceEscape.py @@ -0,0 +1,5 @@ +import re +matcher = re.compile(r"\b[\t\b]") + +def match_data(data): + return bool(matcher.match(data)) diff --git a/python/ql/src/Expressions/Regex/BackspaceEscape.qhelp b/python/ql/src/Expressions/Regex/BackspaceEscape.qhelp new file mode 100644 index 00000000000..18599e13e64 --- /dev/null +++ b/python/ql/src/Expressions/Regex/BackspaceEscape.qhelp @@ -0,0 +1,40 @@ + + + + +

    +The meaning of the \b escape sequence inside a regular expression depends on its +syntactic context: inside a character class, it matches the backspace character; outside of a +character class, it matches a word boundary. This context dependency makes regular expressions +hard to read, so the \b escape sequence should not be used inside character classes. +

    + +
    + + +

    +Replace \b in character classes with the semantically identical escape sequence \x08. +

    + +
    + +

    +In the following example, the regular expression contains two uses of \b: in the +first case, it matches a word boundary, in the second case it matches a backspace character. +

    + + + +

    +You can make the regular expression easier for other developers to interpret, by rewriting it as r"\b[\t\x08]". +

    + +
    + + +
  • Python Standard Library: Regular expression operations.
  • + +
    +
    diff --git a/python/ql/src/Expressions/Regex/BackspaceEscape.ql b/python/ql/src/Expressions/Regex/BackspaceEscape.ql new file mode 100644 index 00000000000..b80893b04f0 --- /dev/null +++ b/python/ql/src/Expressions/Regex/BackspaceEscape.ql @@ -0,0 +1,22 @@ +/** + * @name Backspace escape in regular expression + * @description Using '\b' to escape the backspace character in a regular expression is confusing + * since it could be mistaken for a word boundary assertion. + * @kind problem + * @tags maintainability + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/regex/backspace-escape + */ + +import python +import semmle.python.regex + +from Regex r, int offset +where r.escapingChar(offset) and r.getChar(offset+1) = "b" and +exists(int start, int end | + start < offset and end > offset | + r.charSet(start, end) +) +select r, "Backspace escape in regular expression at offset " + offset + "." \ No newline at end of file diff --git a/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.py b/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.py new file mode 100644 index 00000000000..cf2b0511235 --- /dev/null +++ b/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.py @@ -0,0 +1,6 @@ +import re +matcher = re.compile(r"[password|pwd]") + +def find_password(data): + if matcher.match(data): + print("Found password!") diff --git a/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.qhelp b/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.qhelp new file mode 100644 index 00000000000..d9b1e9ed6d9 --- /dev/null +++ b/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.qhelp @@ -0,0 +1,44 @@ + + + + +

    +Character classes in regular expressions represent sets of characters, so there is no need to specify +the same character twice in one character class. Duplicate characters in character classes are at best +useless, and may even indicate a latent bug. +

    + +
    + + +

    Determine whether a character is simply duplicated or whether the character class was in fact meant as a group. +If it is just a duplicate, then remove the duplicate character. +If was supposed to be a group, then replace the square brackets with parentheses. +

    + + +
    + +

    +In the following example, the character class [password|pwd] contains two instances each +of the characters d, p, s, and w. The programmer most likely meant +to write (password|pwd) (a pattern that matches either the string "password" +or the string "pwd"), and accidentally mistyped the enclosing brackets. +

    + + + +

    +To fix this problem, the regular expression should be rewritten to r"(password|pwd)". +

    + +
    + + +
  • Python Standard Library: Regular expression operations.
  • +
  • Regular-Expressions.info: Character Classes or Character Sets.
  • + +
    +
    diff --git a/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.ql b/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.ql new file mode 100644 index 00000000000..88c265fb370 --- /dev/null +++ b/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.ql @@ -0,0 +1,34 @@ +/** + * @name Duplication in regular expression character class + * @description Duplicate characters in a class have no effect and may indicate an error in the regular expression. + * @kind problem + * @tags reliability + * readability + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/regex/duplicate-in-character-class + */ + +import python +import semmle.python.regex + +predicate duplicate_char_in_class(Regex r, string char) { + exists(int i, int j, int x, int y, int start, int end | + i != x and j != y and + start < i and j < end and + start < x and y < end and + r.character(i, j) and char = r.getText().substring(i, j) and + r.character(x, y) and char = r.getText().substring(x, y) and + r.charSet(start, end) + ) and + /* Exclude � as we use it for any unencodable character */ + char != "�" and + //Ignore whitespace in verbose mode + not (r.getAMode() = "VERBOSE" and (char = " " or char = "\t" or char = "\r" or char = "\n")) +} + +from Regex r, string char +where duplicate_char_in_class(r, char) +select r, "This regular expression includes duplicate character '" + char + "' in a set of characters." + diff --git a/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.py b/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.py new file mode 100644 index 00000000000..29580414b5b --- /dev/null +++ b/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.py @@ -0,0 +1,10 @@ +import re +matcher = re.compile(r'(P[\w]+)') + +def only_letters(text): + m = matcher.match(text) + if m: + print("Letters are: " + m.group('name')) + +#Fix the pattern by adding the missing '?' +fixed_matcher = re.compile(r'(?P[\w]+)') \ No newline at end of file diff --git a/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.qhelp b/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.qhelp new file mode 100644 index 00000000000..289bd4622b9 --- /dev/null +++ b/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.qhelp @@ -0,0 +1,37 @@ + + + +

    +One of the problems with using regular expressions is that almost any sequence of characters is a valid pattern. +This means that it is easy to omit a necessary character and still have a valid regular expression. +Omitting a character in a named capturing group is a specific case which can dramatically change the meaning of a regular expression. +

    + +
    + + +

    +Examine the regular expression to find and correct any typos. +

    + +
    + +

    +In the following example, the regular expression for matcher, r"(P<name>[\w]+)", is missing a "?" and will +match only strings of letters that start with "P<name>", instead of matching any sequence of letters +and placing the result in a named group. +The fixed version, fixed_matcher, includes the "?" and will work as expected. +

    + + + +
    + + +
  • Python Standard Library: Regular expression operations.
  • +
  • Regular-Expressions.info: Named Capturing Groups.
  • + +
    +
    diff --git a/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.ql b/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.ql new file mode 100644 index 00000000000..7a1974fc514 --- /dev/null +++ b/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.ql @@ -0,0 +1,20 @@ +/** + * @name Missing part of special group in regular expression + * @description Incomplete special groups are parsed as normal groups and are unlikely to match the intended strings. + * @kind problem + * @tags reliability + * correctness + * @problem.severity warning + * @sub-severity high + * @precision high + * @id py/regex/incomplete-special-group + */ + +import python +import semmle.python.regex + +from Regex r, string missing, string part +where r.getText().regexpMatch(".*\\(P<\\w+>.*") and missing = "?" and part = "named group" +select r, "Regular expression is missing '" + missing + "' in " + part + "." + + diff --git a/python/ql/src/Expressions/Regex/UnmatchableCaret.py b/python/ql/src/Expressions/Regex/UnmatchableCaret.py new file mode 100644 index 00000000000..7a51c4a8f93 --- /dev/null +++ b/python/ql/src/Expressions/Regex/UnmatchableCaret.py @@ -0,0 +1,11 @@ +import re +#Regular expression includes a caret, but not at the start. +matcher = re.compile(r"\[^.]*\.css") + +def find_css(filename): + if matcher.match(filename): + print("Found it!") + +#Regular expression for a css file name +fixed_matcher_css = re.compile(r"[^.]*\.css") + diff --git a/python/ql/src/Expressions/Regex/UnmatchableCaret.qhelp b/python/ql/src/Expressions/Regex/UnmatchableCaret.qhelp new file mode 100644 index 00000000000..32914e64a60 --- /dev/null +++ b/python/ql/src/Expressions/Regex/UnmatchableCaret.qhelp @@ -0,0 +1,40 @@ + + + +

    +The caret character ^ anchors a regular expression to the beginning of the input, or +(for multi-line regular expressions) to the beginning of a line. +If it is preceded by a pattern that must match a non-empty sequence of (non-newline) input characters, +then the entire regular expression cannot match anything. +

    + +
    + + +

    +Examine the regular expression to find and correct any typos. +

    + +
    + +

    +In the following example, the regular expression r"\[^.]*\.css" cannot match any +string, since it contains a caret assertion preceded by an escape sequence that matches an +opening bracket. +

    +

    +In the second regular expression, r"[^.]*\.css", the caret is part of a character class, and will not match the start of the string. +

    + + + +
    + + +
  • Python Standard Library: Regular expression operations.
  • +
  • Regular-Expressions.info: Start of String and End of String Anchors.
  • + +
    +
    diff --git a/python/ql/src/Expressions/Regex/UnmatchableCaret.ql b/python/ql/src/Expressions/Regex/UnmatchableCaret.ql new file mode 100644 index 00000000000..7fc0c6f219e --- /dev/null +++ b/python/ql/src/Expressions/Regex/UnmatchableCaret.ql @@ -0,0 +1,25 @@ +/** + * @name Unmatchable caret in regular expression + * @description Regular expressions containing a caret '^' in the middle cannot be matched, whatever the input. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/regex/unmatchable-caret + */ + +import python +import semmle.python.regex + +predicate unmatchable_caret(Regex r, int start) { + not r.getAMode() = "MULTILINE" and + not r.getAMode() = "VERBOSE" and + r.specialCharacter(start, start+1, "^") and + not r.firstItem(start, start+1) +} + +from Regex r, int offset +where unmatchable_caret(r, offset) +select r, "This regular expression includes an unmatchable caret at offset " + offset.toString() + "." diff --git a/python/ql/src/Expressions/Regex/UnmatchableDollar.py b/python/ql/src/Expressions/Regex/UnmatchableDollar.py new file mode 100644 index 00000000000..8e7a19eb4c1 --- /dev/null +++ b/python/ql/src/Expressions/Regex/UnmatchableDollar.py @@ -0,0 +1,10 @@ +import re +#Regular expression that includes a dollar, but not at the end. +matcher = re.compile(r"\.\(\w+$\)") + +def find_it(filename): + if matcher.match(filename): + print("Found it!") + +#Regular expression anchored to end of input. +fixed_matcher = re.compile(r"\.\(\w+\)$") \ No newline at end of file diff --git a/python/ql/src/Expressions/Regex/UnmatchableDollar.qhelp b/python/ql/src/Expressions/Regex/UnmatchableDollar.qhelp new file mode 100644 index 00000000000..2ff1071430f --- /dev/null +++ b/python/ql/src/Expressions/Regex/UnmatchableDollar.qhelp @@ -0,0 +1,41 @@ + + + +

    +A dollar assertion $ in a regular expression only matches at the end of the input, or +(for multi-line regular expressions) at the end of a line. If it is followed by a pattern +that must match a non-empty sequence of (non-newline) input characters, it cannot possibly match, +rendering the entire regular expression unmatchable. +

    + +
    + + +

    +Examine the regular expression to find and correct any typos. +

    + +
    + +

    +In the following example, the regular expression r"\.\(\w+$\)" cannot match any +string, since it contains a dollar assertion followed by an escape sequence that matches a +closing parenthesis. +

    + +

    +The second regular expression, r"\.\(\w+\)$", has the dollar at the end and will work as expected. +

    + + + +
    + + +
  • Python Standard Library: Regular expression operations.
  • +
  • Regular-Expressions.info: Start of String and End of String Anchors.
  • + +
    +
    diff --git a/python/ql/src/Expressions/Regex/UnmatchableDollar.ql b/python/ql/src/Expressions/Regex/UnmatchableDollar.ql new file mode 100644 index 00000000000..49cef2bded1 --- /dev/null +++ b/python/ql/src/Expressions/Regex/UnmatchableDollar.ql @@ -0,0 +1,26 @@ +/** + * @name Unmatchable dollar in regular expression + * @description Regular expressions containing a dollar '$' in the middle cannot be matched, whatever the input. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/regex/unmatchable-dollar + */ + +import python +import semmle.python.regex + +predicate unmatchable_dollar(Regex r, int start) { + not r.getAMode() = "MULTILINE" and + not r.getAMode() = "VERBOSE" and + r.specialCharacter(start, start+1, "$") + and + not r.lastItem(start, start+1) +} + +from Regex r, int offset +where unmatchable_dollar(r, offset) +select r, "This regular expression includes an unmatchable dollar at offset " + offset.toString() + "." diff --git a/python/ql/src/Expressions/TruncatedDivision.py b/python/ql/src/Expressions/TruncatedDivision.py new file mode 100644 index 00000000000..63ed31a8663 --- /dev/null +++ b/python/ql/src/Expressions/TruncatedDivision.py @@ -0,0 +1,7 @@ +# Incorrect: + +def average(l): + return sum(l) / len(l) + +print average([1.0, 2.0]) # Prints "1.5". +print average([1, 2]) # Prints "1", which is incorrect. diff --git a/python/ql/src/Expressions/TruncatedDivision.qhelp b/python/ql/src/Expressions/TruncatedDivision.qhelp new file mode 100644 index 00000000000..1daa6f7fef6 --- /dev/null +++ b/python/ql/src/Expressions/TruncatedDivision.qhelp @@ -0,0 +1,42 @@ + + + +

    + In Python 2, the result of dividing two integers is silently truncated into an integer. This may lead to unexpected behavior. +

    + +
    + + +

    + If the division should never be truncated, add + from __future__ import division + to the beginning of the file. If the division should always + be truncated, replace the division operator / with the + truncated division operator //. +

    + +
    + +

    + The first example shows a function for calculating the average of a sequence + of numbers. When the function runs under Python 2, and the sequence contains + only integers, an incorrect result may be returned because the result is + truncated. The second example corrects this error by following the + recommendation listed above. +

    + + + + + +
    + + +
  • Python Language Reference: Binary arithmetic operations.
  • +
  • PEP 238: Changing the Division Operator.
  • +
  • PEP 236: Back to the __future__.
  • +
    +
    diff --git a/python/ql/src/Expressions/TruncatedDivision.ql b/python/ql/src/Expressions/TruncatedDivision.ql new file mode 100644 index 00000000000..3d4deb9ba54 --- /dev/null +++ b/python/ql/src/Expressions/TruncatedDivision.ql @@ -0,0 +1,37 @@ + /** + * @name Result of integer division may be truncated + * @description The arguments to a division statement may be integers, which + * may cause the result to be truncated in Python 2. + * @kind problem + * @tags maintainability + * correctness + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/truncated-division + */ + +import python + +from BinaryExpr div, ControlFlowNode left, ControlFlowNode right +where + // Only relevant for Python 2, as all later versions implement true division + major_version() = 2 + and + exists(BinaryExprNode bin, Object lobj, Object robj | + bin = div.getAFlowNode() + and bin.getNode().getOp() instanceof Div + and bin.getLeft().refersTo(lobj, theIntType(), left) + and bin.getRight().refersTo(robj, theIntType(), right) + // Ignore instances where integer division leaves no remainder + and not lobj.(NumericObject).intValue() % robj.(NumericObject).intValue() = 0 + and not bin.getNode().getEnclosingModule().hasFromFuture("division") + // Filter out results wrapped in `int(...)` + and not exists(CallNode c, ClassObject cls | + c.getAnArg() = bin + and c.getFunction().refersTo(cls) + and cls.getName() = "int" + ) + ) +select div, "Result of division may be truncated as its $@ and $@ arguments may both be integers.", + left.getLocation(), "left", right.getLocation(), "right" diff --git a/python/ql/src/Expressions/TruncatedDivisionCorrect.py b/python/ql/src/Expressions/TruncatedDivisionCorrect.py new file mode 100644 index 00000000000..eb51454bb86 --- /dev/null +++ b/python/ql/src/Expressions/TruncatedDivisionCorrect.py @@ -0,0 +1,8 @@ +# Correct: +from __future__ import division + +def average(l): + return sum(l) / len(l) + +print average([1.0, 2.0]) # Prints "1.5". +print average([1, 2]) # Prints "1.5". diff --git a/python/ql/src/Expressions/UnintentionalImplicitStringConcatenation.py b/python/ql/src/Expressions/UnintentionalImplicitStringConcatenation.py new file mode 100644 index 00000000000..7bc0862ae9b --- /dev/null +++ b/python/ql/src/Expressions/UnintentionalImplicitStringConcatenation.py @@ -0,0 +1,19 @@ + +def unclear(): + # Returns [ "first part of long string and the second part", "/usr/local/usr/bin" ] + return [ + + "first part of long string" + " and the second part", + "/usr/local" + "/usr/bin" + ] + +def clarified(): + # Returns [ "first part of long string and the second part", "/usr/local", "/usr/bin" ] + return [ + "first part of long string" + + " and the second part", + "/usr/local", + "/usr/bin" + ] diff --git a/python/ql/src/Expressions/UnintentionalImplicitStringConcatenation.qhelp b/python/ql/src/Expressions/UnintentionalImplicitStringConcatenation.qhelp new file mode 100644 index 00000000000..281d684224e --- /dev/null +++ b/python/ql/src/Expressions/UnintentionalImplicitStringConcatenation.qhelp @@ -0,0 +1,39 @@ + + + + +

    When two string literals abut each other the Python interpreter implicitly concatenates them into a +single string. On occasion this can be useful, but is more commonly misleading or incorrect. +

    + +
    + + + +

    If the concatenation is deliberate, then use + to join the strings. This has no runtime overhead, +and makes the intention clear. +

    + + +
    + + +

    +In the first function below, unclear, implicit string concatenation is used twice; once deliberately and once by accident. +In the second function, clarified, the first concatenation is made explicit and the second is removed. +

    + + + + +
    + + + + +
  • Python language reference: String literal concatenation.
  • + +
    +
    diff --git a/python/ql/src/Expressions/UnintentionalImplicitStringConcatenation.ql b/python/ql/src/Expressions/UnintentionalImplicitStringConcatenation.ql new file mode 100644 index 00000000000..70128406915 --- /dev/null +++ b/python/ql/src/Expressions/UnintentionalImplicitStringConcatenation.ql @@ -0,0 +1,35 @@ +/** + * @name Implicit string concatenation in a list + * @description Omitting a comma between strings causes implicit concatenation which is confusing in a list. + * @kind problem + * @tags reliability + * maintainability + * convention + * external/cwe/cwe-665 + * @problem.severity warning + * @sub-severity high + * @precision high + * @id py/implicit-string-concatenation-in-list + */ + +import python + +predicate string_const(Expr s) { + s instanceof StrConst + or + string_const(s.(BinaryExpr).getLeft()) and string_const(s.(BinaryExpr).getRight()) +} + +from StrConst s +where +// Implicitly concatenated string is in a list and that list contains at least one other string. +exists(List l, Expr other | + not s = other and + l.getAnElt() = s and + l.getAnElt() = other and + string_const(other) +) and +exists(s.getAnImplicitlyConcatenatedPart()) and +not s.isParenthesized() + +select s, "Implicit string concatenation. Maybe missing a comma?" diff --git a/python/ql/src/Expressions/UnnecessaryLambda.py b/python/ql/src/Expressions/UnnecessaryLambda.py new file mode 100644 index 00000000000..7c296bfceab --- /dev/null +++ b/python/ql/src/Expressions/UnnecessaryLambda.py @@ -0,0 +1,7 @@ +import math + +def call_with_x_squared(x, function): + x = x*x + return function(x) + +print call_with_x_squared(2, lambda x: math.factorial(x)) \ No newline at end of file diff --git a/python/ql/src/Expressions/UnnecessaryLambda.qhelp b/python/ql/src/Expressions/UnnecessaryLambda.qhelp new file mode 100644 index 00000000000..04ca0174b07 --- /dev/null +++ b/python/ql/src/Expressions/UnnecessaryLambda.qhelp @@ -0,0 +1,29 @@ + + + +

    A lambda that calls a function without modifying any of its parameters is unnecessary. +Python functions are first class objects and can be passed around in the same way as the resulting lambda. +

    + +
    + +

    Remove the lambda, use the function directly.

    + +
    + +

    In this example a lambda is used unnecessarily in order to pass a method as an argument to +call_with_x_squared.

    + + +

    This is not necessary as methods can be passed directly. They behave as callable objects.

    + + +
    + + +
  • Python: lambdas.
  • + +
    +
    diff --git a/python/ql/src/Expressions/UnnecessaryLambda.ql b/python/ql/src/Expressions/UnnecessaryLambda.ql new file mode 100644 index 00000000000..93b78238e9a --- /dev/null +++ b/python/ql/src/Expressions/UnnecessaryLambda.ql @@ -0,0 +1,57 @@ +/** + * @name Unnecessary lambda + * @description A lambda is used that calls through to a function without modifying any parameters + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/unnecessary-lambda + */ + +import python + +/* f consists of a single return statement, whose value is a call. The arguments of the call are exactly the parameters of f */ +predicate simple_wrapper(Lambda l, Expr wrapped) { + exists(Function f, Call c | f = l.getInnerScope() and c = l.getExpression() | + wrapped = c.getFunc() and + count(f.getAnArg()) = count(c.getAnArg()) and + forall(int arg | exists(f.getArg(arg)) | + f.getArgName(arg) = ((Name)c.getArg(arg)).getId()) and + /* Either no **kwargs or they must match */ + (not exists(f.getKwarg()) and not exists(c.getKwargs()) or + ((Name)f.getKwarg()).getId() = ((Name)c.getKwargs()).getId()) and + /* Either no *args or they must match */ + (not exists(f.getVararg()) and not exists(c.getStarargs()) or + ((Name)f.getVararg()).getId() = ((Name)c.getStarargs()).getId()) and + /* No named parameters in call */ + not exists(c.getAKeyword()) + ) + and + // f is not necessarily a drop-in replacement for the lambda if there are default argument values + not exists(l.getArgs().getADefault()) +} + +/* The expression called will refer to the same object if evaluated when the lambda is created or when the lambda is executed. */ +predicate unnecessary_lambda(Lambda l, Expr e) { + simple_wrapper(l, e) and + ( + /* plain class */ + exists(ClassObject c | e.refersTo(c)) + or + /* plain function */ + exists(FunctionObject f | e.refersTo(f)) + or + /* bound-method of enclosing instance */ + exists(ClassObject cls, Attribute a | + cls.getPyClass() = l.getScope().getScope() and a = e | + ((Name)a.getObject()).getId() = "self" and + cls.hasAttribute(a.getName()) + ) + ) +} + +from Lambda l, Expr e +where unnecessary_lambda(l, e) +select l, "This 'lambda' is just a simple wrapper around a callable object. Use that object directly." \ No newline at end of file diff --git a/python/ql/src/Expressions/UnnecessaryLambdaFix.py b/python/ql/src/Expressions/UnnecessaryLambdaFix.py new file mode 100644 index 00000000000..bbfcdb98aa0 --- /dev/null +++ b/python/ql/src/Expressions/UnnecessaryLambdaFix.py @@ -0,0 +1,7 @@ +import math + +def call_with_x_squared(x, function): + x = x*x + return function(x) + +print call_with_x_squared(2, math.factorial) \ No newline at end of file diff --git a/python/ql/src/Expressions/UnsupportedFormatCharacter.py b/python/ql/src/Expressions/UnsupportedFormatCharacter.py new file mode 100644 index 00000000000..60c4ebc53b9 --- /dev/null +++ b/python/ql/src/Expressions/UnsupportedFormatCharacter.py @@ -0,0 +1,6 @@ + +def format_as_tuple_incorrect(args): + return "%t" % args + +def format_as_tuple_correct(args): + return "%r" % (args,) diff --git a/python/ql/src/Expressions/UnsupportedFormatCharacter.qhelp b/python/ql/src/Expressions/UnsupportedFormatCharacter.qhelp new file mode 100644 index 00000000000..b22d59a209c --- /dev/null +++ b/python/ql/src/Expressions/UnsupportedFormatCharacter.qhelp @@ -0,0 +1,28 @@ + + + +

    A format string, that is the string on the left hand side of an expression like fmt % arguments, must consist of legal conversion specifiers. +Otherwise, a ValueError will be raised. + +

    + +
    + +

    Choose a legal conversion specifier.

    + +
    + +

    In format_as_tuple_incorrect, "t" is not a legal conversion specifier. + +

    + + +
    + + +
  • Python Library Reference: String Formatting.
  • + +
    +
    diff --git a/python/ql/src/Expressions/UnsupportedFormatCharacter.ql b/python/ql/src/Expressions/UnsupportedFormatCharacter.ql new file mode 100644 index 00000000000..d3876725233 --- /dev/null +++ b/python/ql/src/Expressions/UnsupportedFormatCharacter.ql @@ -0,0 +1,18 @@ +/** + * @name Unsupported format character + * @description An unsupported format character in a format string + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/percent-format/unsupported-character + */ + +import python +import semmle.python.strings + +from Expr e, int start +where start = illegal_conversion_specifier(e) +select e, "Invalid conversion specifier at index " + start + " of " + repr(e) + "." diff --git a/python/ql/src/Expressions/UseofApply.qhelp b/python/ql/src/Expressions/UseofApply.qhelp new file mode 100644 index 00000000000..afc4e7dea1e --- /dev/null +++ b/python/ql/src/Expressions/UseofApply.qhelp @@ -0,0 +1,28 @@ + + + + +

    The 'apply' function is deprecated and makes code harder to read as most Python programmers +will not be familiar with it (it has been deprecated since 2003). +

    + +
    + + +

    Replace apply(function, args) with function(*args). +

    +Replace apply(function, args, keywords) with function(*args, **keywords). +

    + + +
    + + +
  • Python Standard Library: apply.
  • +
  • Python PEP-290: Code Migration and Modernization.
  • + + +
    +
    diff --git a/python/ql/src/Expressions/UseofApply.ql b/python/ql/src/Expressions/UseofApply.ql new file mode 100644 index 00000000000..f9419962c29 --- /dev/null +++ b/python/ql/src/Expressions/UseofApply.ql @@ -0,0 +1,17 @@ +/** + * @name 'apply' function used + * @description The builtin function 'apply' is obsolete and should not be used. + * @kind problem + * @tags maintainability + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/use-of-apply + */ + +import python + +from CallNode call, ControlFlowNode func +where +major_version() = 2 and call.getFunction() = func and func.refersTo(theApplyFunction()) +select call, "Call to the obsolete builtin function 'apply'." diff --git a/python/ql/src/Expressions/UseofInput.qhelp b/python/ql/src/Expressions/UseofInput.qhelp new file mode 100644 index 00000000000..44baace8c43 --- /dev/null +++ b/python/ql/src/Expressions/UseofInput.qhelp @@ -0,0 +1,23 @@ + + + +

    A call to the input() function, input(prompt) is equivalent to eval(raw_input(prompt)). Evaluating user input without any checking can be a serious security flaw.

    + +
    + + +

    Get user input with raw_input(prompt) and then validate that input before evaluating. If the expected input is a number or +string, then ast.literal_eval() can always be used safely.

    + + +
    + + +
  • Python Standard Library: input, + ast.literal_eval.
  • +
  • Wikipedia: Data validation.
  • + +
    +
    diff --git a/python/ql/src/Expressions/UseofInput.ql b/python/ql/src/Expressions/UseofInput.ql new file mode 100644 index 00000000000..39289dd8a84 --- /dev/null +++ b/python/ql/src/Expressions/UseofInput.ql @@ -0,0 +1,18 @@ +/** + * @name 'input' function used + * @description The built-in function 'input' is used which can allow arbitrary code to be run. + * @kind problem + * @tags security + * correctness + * @problem.severity error + * @sub-severity high + * @precision high + * @id py/use-of-input + */ + +import python + +from CallNode call, Context context, ControlFlowNode func +where +context.getAVersion().includes(2, _) and call.getFunction() = func and func.refersTo(context, theInputFunction(), _, _) +select call, "The unsafe built-in function 'input' is used." diff --git a/python/ql/src/Expressions/WrongNameForArgumentInCall.qhelp b/python/ql/src/Expressions/WrongNameForArgumentInCall.qhelp new file mode 100644 index 00000000000..79be173c107 --- /dev/null +++ b/python/ql/src/Expressions/WrongNameForArgumentInCall.qhelp @@ -0,0 +1,30 @@ + + + +

    +Using a named argument whose name does not correspond to a parameter of the called function (or method), will result in a +TypeError at runtime. +

    + +
    + + +

    Check for typos in the name of the arguments and fix those. +If the name is clearly different, then this suggests a logical error. +The change required to correct the error will depend on whether the wrong argument has been +specified or whether the wrong function (or method) has been specified. +

    + +
    + + +
  • Python Glossary: Arguments.
  • +
  • Python Glossary: Parameters.
  • +
  • Python Programming FAQ: + What is the difference between arguments and parameters?.
  • + + +
    +
    diff --git a/python/ql/src/Expressions/WrongNameForArgumentInCall.ql b/python/ql/src/Expressions/WrongNameForArgumentInCall.ql new file mode 100644 index 00000000000..92c17f8e2ee --- /dev/null +++ b/python/ql/src/Expressions/WrongNameForArgumentInCall.ql @@ -0,0 +1,26 @@ +/** + * @name Wrong name for an argument in a call + * @description Using a named argument whose name does not correspond to a + * parameter of the called function or method, will result in a + * TypeError at runtime. + * @kind problem + * @tags reliability + * correctness + * external/cwe/cwe-628 + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/call/wrong-named-argument + */ + +import python +import Expressions.CallArgs + + +from Call call, FunctionObject func, string name +where +illegally_named_parameter(call, func, name) and +not func.isAbstract() and +not exists(FunctionObject overridden | func.overrides(overridden) and overridden.getFunction().getAnArg().(Name).getId() = name) +select +call, "Keyword argument '" + name + "' is not a supported parameter name of $@.", func, func.descriptiveString() diff --git a/python/ql/src/Expressions/WrongNumberArgumentsForFormat.py b/python/ql/src/Expressions/WrongNumberArgumentsForFormat.py new file mode 100644 index 00000000000..20ab7f15dab --- /dev/null +++ b/python/ql/src/Expressions/WrongNumberArgumentsForFormat.py @@ -0,0 +1,6 @@ +def unsafe_format(): + if unlikely_condition(): + args = (1,2) + else: + args = (1, 2, 3) + return "%s %s %s" % args diff --git a/python/ql/src/Expressions/WrongNumberArgumentsForFormat.qhelp b/python/ql/src/Expressions/WrongNumberArgumentsForFormat.qhelp new file mode 100644 index 00000000000..00d5cebe701 --- /dev/null +++ b/python/ql/src/Expressions/WrongNumberArgumentsForFormat.qhelp @@ -0,0 +1,26 @@ + + + +

    A formatting expression, that is an expression of the format fmt % arguments must have the correct number of +arguments on the right hand side of the expression. Otherwise, a TypeError will be raised. + +

    + +
    + +

    Change the format to match the arguments and ensure that the right hand argument always has the correct number of elements. + +

    + +

    In the following example the right hand side of the formatting operation can be of length 2, which does not match the format string<./p> +

    + +
    + + +
  • Python Library Reference: String Formatting.
  • + +
    +
    diff --git a/python/ql/src/Expressions/WrongNumberArgumentsForFormat.ql b/python/ql/src/Expressions/WrongNumberArgumentsForFormat.ql new file mode 100644 index 00000000000..aa163d91544 --- /dev/null +++ b/python/ql/src/Expressions/WrongNumberArgumentsForFormat.ql @@ -0,0 +1,45 @@ +/** + * @name Wrong number of arguments for format + * @description A string formatting operation, such as '"%s: %s, %s" % (a,b)', where the number of conversion specifiers in the + * format string differs from the number of values to be formatted will raise a TypeError. + * @kind problem + * @tags reliability + * correctness + * external/cwe/cwe-685 + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/percent-format/wrong-arguments + */ + +import python +import semmle.python.strings + +predicate string_format(BinaryExpr operation, StrConst str, Object args, AstNode origin) { + exists(Object fmt, Context ctx | operation.getOp() instanceof Mod | + operation.getLeft().refersTo(ctx, fmt, _, str) and + operation.getRight().refersTo(ctx, args, _, origin) + ) +} + +int sequence_length(Object args) { + /* Guess length of sequence */ + exists(Tuple seq | + seq = args.getOrigin() | + result = strictcount(seq.getAnElt()) and + not seq.getAnElt() instanceof Starred + ) + or + exists(ImmutableLiteral i | + i.getLiteralObject() = args | + result = 1 + ) +} + + +from BinaryExpr operation, StrConst fmt, Object args, int slen, int alen, AstNode origin, string provided +where string_format(operation, fmt, args, origin) and slen = sequence_length(args) and alen = format_items(fmt) and slen != alen and +(if slen = 1 then provided = " is provided." else provided = " are provided.") +select operation, "Wrong number of $@ for string format. Format $@ takes " + alen.toString() + ", but " + slen.toString() + provided, + origin, "arguments", + fmt, fmt.getText() diff --git a/python/ql/src/Expressions/WrongNumberArgumentsInCall.qhelp b/python/ql/src/Expressions/WrongNumberArgumentsInCall.qhelp new file mode 100644 index 00000000000..6215a3c35e9 --- /dev/null +++ b/python/ql/src/Expressions/WrongNumberArgumentsInCall.qhelp @@ -0,0 +1,39 @@ + + + +

    + A function call must supply an argument for each parameter that does not have a default value defined, so: +

    +
      +
    • The minimum number of arguments is the number of parameters without default values.
    • +
    • The maximum number of arguments is the total number of parameters, + unless the function takes a varargs (starred) parameter in which case there + is no limit.
    • +
    +
    + +

    If there are too few arguments then check to see which arguments have been omitted and supply values for those.

    + +

    If there are too many arguments then check to see if any have been added by mistake and remove those.

    + +

    + Also check where a comma has been inserted instead of an operator or a dot. + For example, the code is obj,attr when it should be obj.attr. +

    +

    If it is not clear which are the missing or surplus arguments, then this suggests a logical error. +The fix will then depend on the nature of the error. +

    + +
    + + +
  • Python Glossary: Arguments.
  • +
  • Python Glossary: Parameters.
  • +
  • Python Programming FAQ: + What is the difference between arguments and parameters?.
  • + + +
    +
    diff --git a/python/ql/src/Expressions/WrongNumberArgumentsInCall.ql b/python/ql/src/Expressions/WrongNumberArgumentsInCall.ql new file mode 100644 index 00000000000..b31e9e70445 --- /dev/null +++ b/python/ql/src/Expressions/WrongNumberArgumentsInCall.ql @@ -0,0 +1,30 @@ +/** + * @name Wrong number of arguments in a call + * @description Using too many or too few arguments in a call to a function will result in a TypeError at runtime. + * @kind problem + * @tags reliability + * correctness + * external/cwe/cwe-685 + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/call/wrong-arguments + */ + +import python +import CallArgs + +from Call call, FunctionObject func, string too, string should, int limit +where +( + too_many_args(call, func, limit) and too = "too many arguments" and should = "no more than " + or + too_few_args(call, func, limit) and too = "too few arguments" and should = "no fewer than " +) and +not func.isAbstract() and +not exists(FunctionObject overridden | func.overrides(overridden) and correct_args_if_called_as_method(call, overridden)) +/* The semantics of `__new__` can be a bit subtle, so we simply exclude `__new__` methods */ +and not func.getName() = "__new__" + +select call, "Call to $@ with " + too + "; should be " + should + limit.toString() + ".", func, func.descriptiveString() + diff --git a/python/ql/src/Filters/ClassifyFiles.ql b/python/ql/src/Filters/ClassifyFiles.ql new file mode 100644 index 00000000000..4448d993903 --- /dev/null +++ b/python/ql/src/Filters/ClassifyFiles.ql @@ -0,0 +1,20 @@ +/** + * @name Classify files + * @description This query produces a list of all files in a snapshot + * that are classified as generated code or test code. + * @kind file-classifier + * @id py/file-classifier + */ + +import python +import semmle.python.filters.GeneratedCode +import semmle.python.filters.Tests + +predicate classify(File f, string tag) { + f instanceof GeneratedFile and tag = "generated" or + exists (TestScope t | t.getLocation().getFile() = f) and tag = "test" +} + +from File f, string tag +where classify(f, tag) +select f, tag diff --git a/python/ql/src/Filters/ImportAdditionalLibraries.ql b/python/ql/src/Filters/ImportAdditionalLibraries.ql new file mode 100644 index 00000000000..aa55f486747 --- /dev/null +++ b/python/ql/src/Filters/ImportAdditionalLibraries.ql @@ -0,0 +1,16 @@ +/** + * @name (Import additional libraries) + * @description This query produces no results but imports some libraries we + * would like to make available in the LGTM query console even + * if they are not used by any queries. + * @kind file-classifier + * @id py/lgtm/import-additional-libraries + */ + +private import external.CodeDuplication +private import external.Thrift +private import external.VCS + +from File f, string tag +where none() +select f, tag diff --git a/python/ql/src/Filters/NotGenerated.ql b/python/ql/src/Filters/NotGenerated.ql new file mode 100644 index 00000000000..121fc3c7a45 --- /dev/null +++ b/python/ql/src/Filters/NotGenerated.ql @@ -0,0 +1,12 @@ +/** + * @name Filter: non-generated files + * @description Only keep results that aren't (or don't appear to be) generated. + * @kind file-classifier + * @id py/not-generated-file-filter + */ +import external.DefectFilter +import semmle.python.filters.GeneratedCode + +from DefectResult res +where not exists(GeneratedFile f | res.getFile() = f) +select res, res.getMessage() diff --git a/python/ql/src/Filters/NotTest.ql b/python/ql/src/Filters/NotTest.ql new file mode 100644 index 00000000000..4d6b0ec5162 --- /dev/null +++ b/python/ql/src/Filters/NotTest.ql @@ -0,0 +1,12 @@ +/** + * @name Filter: non-test files + * @description Only keep results that aren't in tests + * @kind file-classifier + * @id py/not-test-file-filter + */ +import external.DefectFilter +import semmle.python.filters.Tests + +from DefectResult res +where not exists(TestScope s | contains(s.getLocation(), res)) +select res, res.getMessage() diff --git a/python/ql/src/Functions/ConsistentReturns.py b/python/ql/src/Functions/ConsistentReturns.py new file mode 100644 index 00000000000..8a15b865574 --- /dev/null +++ b/python/ql/src/Functions/ConsistentReturns.py @@ -0,0 +1,19 @@ + def check_state1(state, interactive=True): + if not state['good'] or not state['bad']: + if (good or bad or skip or reset) and interactive: + return # implicitly return None + if not state['good']: + raise util.Abort(_('cannot bisect (no known good revisions)')) + else: + raise util.Abort(_('cannot bisect (no known bad revisions)')) + return True + + def check_state2(state, interactive=True): + if not state['good'] or not state['bad']: + if (good or bad or skip or reset) and interactive: + return False # return an explicit value + if not state['good']: + raise util.Abort(_('cannot bisect (no known good revisions)')) + else: + raise util.Abort(_('cannot bisect (no known bad revisions)')) + return True diff --git a/python/ql/src/Functions/ConsistentReturns.qhelp b/python/ql/src/Functions/ConsistentReturns.qhelp new file mode 100644 index 00000000000..cd29062ada6 --- /dev/null +++ b/python/ql/src/Functions/ConsistentReturns.qhelp @@ -0,0 +1,37 @@ + + + + + +

    When a function contains both explicit returns (return value) and implicit returns +(where code falls off the end of a function) this often indicates that a return +statement has been forgotten. It is best to return an explicit return value even when returning +None because this makes it easier for other developers to read your code. +

    + +
    + + +

    Add an explicit return at the end of the function.

    + + +
    + +

    In the check_state1 function, the developer probably did intend to use an implicit +return value of None as this equates to False. However, the function in +check_state2 is easier to read.

    + + + + +
    + + +
  • Python Language Reference: Function definitions. +
  • + + +
    +
    diff --git a/python/ql/src/Functions/ConsistentReturns.ql b/python/ql/src/Functions/ConsistentReturns.ql new file mode 100644 index 00000000000..f3344bd8f74 --- /dev/null +++ b/python/ql/src/Functions/ConsistentReturns.ql @@ -0,0 +1,32 @@ +/** + * @name Explicit returns mixed with implicit (fall through) returns + * @description Mixing implicit and explicit returns indicates a likely error as implicit returns always return 'None'. + * @kind problem + * @tags reliability + * maintainability + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/mixed-returns + */ + +import python + +predicate explicitly_returns_non_none(Function func) { + exists(Return return | return.getScope() = func and + exists(Expr val | + val= return.getValue() | + not val instanceof None + ) + ) +} + +predicate has_implicit_return(Function func) { + exists(ControlFlowNode fallthru | fallthru = func.getFallthroughNode() and not fallthru.unlikelyReachable()) or + exists(Return return | return.getScope() = func and not exists(return.getValue())) +} + + +from Function func +where explicitly_returns_non_none(func) and has_implicit_return(func) +select func, "Mixing implicit and explicit returns may indicate an error as implicit returns always return None." diff --git a/python/ql/src/Functions/DeprecatedSliceMethod.qhelp b/python/ql/src/Functions/DeprecatedSliceMethod.qhelp new file mode 100644 index 00000000000..9a47eeaf327 --- /dev/null +++ b/python/ql/src/Functions/DeprecatedSliceMethod.qhelp @@ -0,0 +1,37 @@ + + + +

    The __getslice__, __setslice__ and __delslice__ methods have been deprecated since Python 2.0. +In general, no class should implement these methods. +

    + +

    +The only exceptions to this rule are classes that inherit from list and override __getitem__, +__setitem__ or __delitem__. +Since list implements the slicing methods any class inheriting from list must implement the +the slicing methods to ensure correct behavior of __getitem__, __setitem__ and __delitem__. +These exceptions to the rule will not be treated as violations. +

    + +
    + +

    +Delete the slicing method. Any functionality should be moved to the equivalent __xxxitem__ method: +

    +
      +
    • __getslice__ should be replaced with __getitem__
    • +
    • __setslice__ should be replaced with __setitem__
    • +
    • __delslice__ should be replaced with __delitem__
    • +
    + +
    + + +
  • Python Language Reference: +Additional methods for emulation of sequence types. +
  • + +
    +
    diff --git a/python/ql/src/Functions/DeprecatedSliceMethod.ql b/python/ql/src/Functions/DeprecatedSliceMethod.ql new file mode 100644 index 00000000000..b81d0b750a6 --- /dev/null +++ b/python/ql/src/Functions/DeprecatedSliceMethod.ql @@ -0,0 +1,24 @@ +/** + * @name Deprecated slice method + * @description Defining special methods for slicing has been deprecated since Python 2.0. + * @kind problem + * @tags maintainability + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/deprecated-slice-method + */ + +import python + +predicate slice_method_name(string name) { + name = "__getslice__" or name = "__setslice__" or name = "__delslice__" +} + +from PyFunctionObject f, string meth + +where f.getFunction().isMethod() and not f.isOverridingMethod() and + slice_method_name(meth) and f.getName() = meth + + +select f, meth + " method has been deprecated since Python 2.0" \ No newline at end of file diff --git a/python/ql/src/Functions/ExplicitReturnInInit.py b/python/ql/src/Functions/ExplicitReturnInInit.py new file mode 100644 index 00000000000..b0ab9760d7c --- /dev/null +++ b/python/ql/src/Functions/ExplicitReturnInInit.py @@ -0,0 +1,4 @@ +class ExplicitReturnInInit(object): + def __init__(self, i): + self.i = i + return self \ No newline at end of file diff --git a/python/ql/src/Functions/ExplicitReturnInInit.qhelp b/python/ql/src/Functions/ExplicitReturnInInit.qhelp new file mode 100644 index 00000000000..c789f24c4e5 --- /dev/null +++ b/python/ql/src/Functions/ExplicitReturnInInit.qhelp @@ -0,0 +1,28 @@ + + + +

    The __init__ method of a class is used to initialize new objects, +not create them. As such, it should not return any value. Returning None +is correct in the sense that no runtime error will occur, +but it suggests that the returned value is meaningful, which it is not.

    + +
    + +

    Convert the return expr statement to a plain return statement, +or omit it altogether if it is at the end of the method.

    + +
    + +

    In this example, the __init__ method attempts to return the newly created +object. This is an error and the return method should be removed.

    + + +
    + + +
  • Python: The __init__ method.
  • + +
    +
    diff --git a/python/ql/src/Functions/ExplicitReturnInInit.ql b/python/ql/src/Functions/ExplicitReturnInInit.ql new file mode 100644 index 00000000000..0885e7cbdd7 --- /dev/null +++ b/python/ql/src/Functions/ExplicitReturnInInit.ql @@ -0,0 +1,23 @@ +/** + * @name __init__ method returns a value + * @description Explicitly returning a value from an __init__ method will raise a TypeError. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/explicit-return-in-init + */ + +import python + +from Return r +where exists(Function init | init.isInitMethod() and +r.getScope() = init and exists(r.getValue())) and +not r.getValue() instanceof None and +not exists(FunctionObject f | f.getACall() = r.getValue().getAFlowNode() | + f.neverReturns() +) and +not exists(Attribute meth | meth = ((Call)r.getValue()).getFunc() | meth.getName() = "__init__") +select r, "Explicit return in __init__ method." diff --git a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.py b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.py new file mode 100644 index 00000000000..e76c27145db --- /dev/null +++ b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.py @@ -0,0 +1,16 @@ +#Incorrect unhashable class +class MyMutableThing(object): + + def __init__(self): + pass + + def __hash__(self): + raise NotImplementedError("%r is unhashable" % self) + +#Make class unhashable in the standard way +class MyCorrectMutableThing(object): + + def __init__(self): + pass + + __hash__ = None diff --git a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.qhelp b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.qhelp new file mode 100644 index 00000000000..f4f0cd6920a --- /dev/null +++ b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.qhelp @@ -0,0 +1,71 @@ + + + +

    User-defined classes interact with the Python virtual machine via special methods (also called "magic methods"). +For example, for a class to support addition it must implement the __add__ and __radd__ special methods. +When the expression a + b is evaluated the Python virtual machine will call type(a).__add__(a, b) and if that +is not implemented it will call type(b).__radd__(b, a).

    +

    +Since the virtual machine calls these special methods for common expressions, users of the class will expect these operations to raise standard exceptions. +For example, users would expect that the expression a.b might raise an AttributeError +if the object a does not have an attribute b. +If a KeyError were raised instead, +then this would be unexpected and may break code that expected an AttributeError, but not a KeyError. +

    + +

    +Therefore, if a method is unable to perform the expected operation then its response should conform to the standard protocol, described below. +

    + +
      +
    • Attribute access, a.b: Raise AttributeError
    • +
    • Arithmetic operations, a + b: Do not raise an exception, return NotImplemented instead.
    • +
    • Indexing, a[b]: Raise KeyError.
    • +
    • Hashing, hash(a): Use __hash__ = None to indicate that an object is unhashable.
    • +
    • Equality methods, a != b: Never raise an exception, always return True or False.
    • +
    • Ordering comparison methods, a < b: Raise a TypeError if the objects cannot be ordered.
    • +
    • Most others: Ideally, do not implement the method at all, otherwise raise TypeError to indicate that the operation is unsupported.
    • +
    + +
    + +

    If the method is meant to be abstract, then declare it so using the @abstractmethod decorator. +Otherwise, either remove the method or ensure that the method raises an exception of the correct type. +

    + +
    + + +

    +This example shows two unhashable classes. The first class is unhashable in a non-standard way which may cause maintenance problems. +The second, corrected, class uses the standard idiom for unhashable classes. +

    + +

    +In this example, the first class is implicitly abstract; the __add__ method is unimplemented, +presumably with the expectation that it will be implemented by sub-classes. +The second class makes this explicit with an @abstractmethod decoration on the unimplemented __add__ method. +

    + +

    +In this last example, the first class implements a collection backed by the file store. +However, should an IOError be raised in the __getitem__ it will propagate to the caller. +The second class handles any IOError by reraising a KeyError which is the standard exception for +the __getitem__ method. +

    + + + + +
    + + +
  • Python Language Reference: Special Method Names.
  • +
  • Python Library Reference: Exceptions.
  • + + + +
    +
    diff --git a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.ql b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.ql new file mode 100644 index 00000000000..7d54c0b49ef --- /dev/null +++ b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.ql @@ -0,0 +1,112 @@ +/** + * @name Non-standard exception raised in special method + * @description Raising a non-standard exception in a special method alters the expected interface of that method. + * @kind problem + * @tags reliability + * maintainability + * convention + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/unexpected-raise-in-special-method + */ + +import python + +private predicate attribute_method(string name) { + name = "__getattribute__" or name = "__getattr__" or name = "__setattr__" +} + +private predicate indexing_method(string name) { + name = "__getitem__" or name = "__setitem__" or name = "__delitem__" +} + +private predicate arithmetic_method(string name) { + name = "__add__" or name = "__sub__" or name = "__div__" or + name = "__pos__" or name = "__abs__" or name = "__floordiv__" or + name = "__div__" or name = "__divmod__" or name = "__lshift__" or + name = "__and__" or name = "__or__"or name = "__xor__" or name = "__rshift__" or + name = "__pow__" or name = "__mul__" or name = "__neg__" or + name = "__radd__" or name = "__rsub__" or name = "__rdiv__" or + name = "__rfloordiv__" or name = "__rdiv__" or name = "__rlshift__" or + name = "__rand__" or name = "__ror__"or name = "__rxor__" or name = "__rrshift__" or + name = "__rpow__" or name = "__rmul__" or name = "__truediv__" or name = "__rtruediv__" or + name = "__iadd__" or name = "__isub__" or name = "__idiv__" or + name = "__ifloordiv__" or name = "__idiv__" or name = "__ilshift__" or + name = "__iand__" or name = "__ior__"or name = "__ixor__" or name = "__irshift__" or + name = "__ipow__" or name = "__imul__" or name = "__itruediv__" +} + +private predicate ordering_method(string name) { + name = "__lt__" or name = "__le__" or name = "__gt__" or name = "__ge__" or + name = "__cmp__" and major_version() = 2 +} + +private predicate cast_method(string name) { + name = "__nonzero__" and major_version() = 2 or + name = "__bool__" or + name = "__int__" or name = "__float__" or + name = "__long__" or + name = "__trunc__" or + name = "__complex__" +} + +predicate correct_raise(string name, ClassObject ex) { + ex.getAnImproperSuperType() = theTypeErrorType() + and + ( + name = "__copy__" or + name = "__deepcopy__" or + name = "__call__" or + indexing_method(name) or + attribute_method(name) + ) + or + preferred_raise(name, ex) + or + preferred_raise(name, ex.getASuperType()) +} + +predicate preferred_raise(string name, ClassObject ex) { + attribute_method(name) and ex = theAttributeErrorType() + or + indexing_method(name) and ex = builtin_object("LookupError") + or + ordering_method(name) and ex = theTypeErrorType() + or + arithmetic_method(name) and ex = builtin_object("ArithmeticError") +} + +predicate no_need_to_raise(string name, string message) { + name = "__hash__" and message = "use __hash__ = None instead" + or + cast_method(name) and message = "there is no need to implement the method at all." +} + +predicate is_abstract(FunctionObject func) { + ((Name)func.getFunction().getADecorator()).getId().matches("%abstract%") +} + +predicate always_raises(FunctionObject f, ClassObject ex) { + ex = f.getARaisedType() and + strictcount(f.getARaisedType()) = 1 and + not exists(f.getFunction().getANormalExit()) and + /* raising StopIteration is equivalent to a return in a generator */ + not ex = theStopIterationType() +} + +from FunctionObject f, ClassObject cls, string message +where f.getFunction().isSpecialMethod() and +not is_abstract(f) and +always_raises(f, cls) and +( + no_need_to_raise(f.getName(), message) and not cls.getName() = "NotImplementedError" + or + not correct_raise(f.getName(), cls) and not cls.getName() = "NotImplementedError" + and + exists(ClassObject preferred | + preferred_raise(f.getName(), preferred) | + message = "raise " + preferred.getName() + " instead" + ) +) +select f, "Function always raises $@; " + message, cls, cls.toString() diff --git a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod2.py b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod2.py new file mode 100644 index 00000000000..405400bfe61 --- /dev/null +++ b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod2.py @@ -0,0 +1,15 @@ + +#Abstract base class, but don't declare it. +class ImplicitAbstractClass(object): + + def __add__(self, other): + raise NotImplementedError() + +#Make abstractness explicit. +class ExplicitAbstractClass: + __metaclass__ = ABCMeta + + @abstractmethod + def __add__(self, other): + raise NotImplementedError() + diff --git a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod3.py b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod3.py new file mode 100644 index 00000000000..048d5043b4d --- /dev/null +++ b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod3.py @@ -0,0 +1,27 @@ + +#Incorrect file-backed table +class FileBackedTable(object): + + def __getitem__(self, key): + if key not in self.index: + raise IOError("Key '%s' not in table" % key) + else: + #May raise an IOError + return self.backing.get_row(key) + +#Correct by transforming exception +class ObjectLikeFileBackedTable(object): + + def get_from_key(self, key): + if key not in self.index: + raise IOError("Key '%s' not in table" % key) + else: + #May raise an IOError + return self.backing.get_row(key) + + def __getitem__(self, key): + try: + return self.get_from_key(key) + except IOError: + raise KeyError(key) + diff --git a/python/ql/src/Functions/IncorrectlyOverriddenMethod.qhelp b/python/ql/src/Functions/IncorrectlyOverriddenMethod.qhelp new file mode 100644 index 00000000000..89869efda71 --- /dev/null +++ b/python/ql/src/Functions/IncorrectlyOverriddenMethod.qhelp @@ -0,0 +1,41 @@ + + + + + +

    There is a call to the overridden method, and potentially the overriding method, +with arguments that are not legal for the overriding method. +This will cause an error if the overriding method is called and is a +violation of the Liskov substitution principle. +

    + +
    + + +

    Ensure that the overriding method accepts all the parameters that are legal for the +overridden method.

    + +
    + +

    In this example there is a mismatch between the legal parameters for the base +class method (self, source, filename, symbol) and the extension method +(self, source). The extension method can be used to override the base +method as long as values are not specified for the filename and (optional) +symbol parameters. If the extension method was passed the additional +parameters accepted by the base method then an error would occur.

    + + + +

    The extension method should be updated to support the filename and +symbol parameters supported by the overridden method.

    + +
    + + +
  • Wikipedia: Liskov Substitution Principle, Method overriding.
  • + + +
    +
    diff --git a/python/ql/src/Functions/IncorrectlyOverriddenMethod.ql b/python/ql/src/Functions/IncorrectlyOverriddenMethod.ql new file mode 100644 index 00000000000..e5d3947a1a7 --- /dev/null +++ b/python/ql/src/Functions/IncorrectlyOverriddenMethod.ql @@ -0,0 +1,27 @@ +/** + * @name Mismatch between signature and use of an overriding method + * @description Method has a different signature from the overridden method and, if it were called, would be likely to cause an error. + * @kind problem + * @tags maintainability + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/inheritance/incorrect-overriding-signature + */ + +import python +import Expressions.CallArgs + +from Call call, FunctionObject func, FunctionObject overridden, string problem +where +func.overrides(overridden) and ( + wrong_args(call, func, _, problem) and correct_args_if_called_as_method(call, overridden) + or + exists(string name | + illegally_named_parameter(call, func, name) and problem = "an argument named '" + name + "'" and + overridden.getFunction().getAnArg().(Name).getId() = name + ) +) + +select func, "Overriding method signature does not match $@, where it is passed " + problem + ". Overridden method $@ is correctly specified.", +call, "here", overridden, overridden.descriptiveString() diff --git a/python/ql/src/Functions/IncorrectlySpecifiedOverriddenMethod.qhelp b/python/ql/src/Functions/IncorrectlySpecifiedOverriddenMethod.qhelp new file mode 100644 index 00000000000..6e2ef60596f --- /dev/null +++ b/python/ql/src/Functions/IncorrectlySpecifiedOverriddenMethod.qhelp @@ -0,0 +1,39 @@ + + + + + +

    There is a call to the overriding method, and potentially the overridden method, +with arguments that are not legal for the overridden method. +This will cause an error if the overridden method is called and is a +violation of the Liskov substitution principle. +

    +
    + + +

    Ensure that the overridden method accepts all the parameters that are legal for +overriding method(s).

    + +
    + +

    In this example there is a mismatch between the legal parameters for the base +class method (self, source, filename) and the extension method +(self, source). Since there is a call that uses the signature of the extension method +then it can be inferred that the base signature is erroneous and should be updated to +match that of the extension method. +

    + + + +

    The base method should be updated to either remove the filename parameters, or add a default value for it.

    + +
    + + +
  • Wikipedia: Liskov Substitution Principle, Method overriding.
  • + + +
    +
    diff --git a/python/ql/src/Functions/IncorrectlySpecifiedOverriddenMethod.ql b/python/ql/src/Functions/IncorrectlySpecifiedOverriddenMethod.ql new file mode 100644 index 00000000000..3af03a23602 --- /dev/null +++ b/python/ql/src/Functions/IncorrectlySpecifiedOverriddenMethod.ql @@ -0,0 +1,35 @@ +/** + * @name Mismatch between signature and use of an overridden method + * @description Method has a signature that differs from both the signature of its overriding methods and + * the arguments with which it is called, and if it were called, would be likely to cause an error. + * @kind problem + * @tags maintainability + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/inheritance/incorrect-overridden-signature + */ + +import python +import Expressions.CallArgs + +from Call call, FunctionObject func, FunctionObject overriding, string problem +where +not func.getName() = "__init__" and +overriding.overrides(func) and +call = overriding.getAMethodCall().getNode() and +correct_args_if_called_as_method(call, overriding) and +( + arg_count(call)+1 < func.minParameters() and problem = "too few arguments" + or + arg_count(call) >= func.maxParameters() and problem = "too many arguments" + or + exists(string name | call.getAKeyword().getArg() = name and + overriding.getFunction().getAnArg().(Name).getId() = name and + not func.getFunction().getAnArg().(Name).getId() = name and + problem = "an argument named '" + name + "'" + ) +) + +select func, "Overridden method signature does not match $@, where it is passed " + problem + ". Overriding method $@ matches the call.", +call, "call", overriding, overriding.descriptiveString() diff --git a/python/ql/src/Functions/InitIsGenerator.py b/python/ql/src/Functions/InitIsGenerator.py new file mode 100644 index 00000000000..c64cc346203 --- /dev/null +++ b/python/ql/src/Functions/InitIsGenerator.py @@ -0,0 +1,3 @@ +class InitIsGenerator(object): + def __init__(self, i): + yield i \ No newline at end of file diff --git a/python/ql/src/Functions/InitIsGenerator.qhelp b/python/ql/src/Functions/InitIsGenerator.qhelp new file mode 100644 index 00000000000..113e444d1f3 --- /dev/null +++ b/python/ql/src/Functions/InitIsGenerator.qhelp @@ -0,0 +1,28 @@ + + + +

    The __init__ method of a class is used to initialize new objects, +not create them. As such, it should not return any value. +By including a yield expression in the method turns it into a generator method. +On calling it will return a generator resulting in a runtime error.

    + +
    + +

    The presence of a yield expression in an __init__ method +suggests a logical error, so it is not possible to suggest a general fix.

    + +
    + +

    In this example the __init__ method contains a yield expression. This is +not logical in the context of an initializer.

    + + +
    + + +
  • Python: The __init__ method.
  • + +
    +
    diff --git a/python/ql/src/Functions/InitIsGenerator.ql b/python/ql/src/Functions/InitIsGenerator.ql new file mode 100644 index 00000000000..5ad61ae8255 --- /dev/null +++ b/python/ql/src/Functions/InitIsGenerator.ql @@ -0,0 +1,18 @@ +/** + * @name __init__ method is a generator + * @description __init__ method is a generator. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/init-method-is-generator + */ + +import python + +from Function f +where f.isInitMethod() and +(exists(Yield y | y.getScope() = f) or exists(YieldFrom y| y.getScope() = f)) +select f, "__init__ method is a generator." diff --git a/python/ql/src/Functions/IterReturnsNonIterator.py b/python/ql/src/Functions/IterReturnsNonIterator.py new file mode 100644 index 00000000000..91f2ab699de --- /dev/null +++ b/python/ql/src/Functions/IterReturnsNonIterator.py @@ -0,0 +1,18 @@ +class MyRange(object): + def __init__(self, low, high): + self.current = low + self.high = high + + def __iter__(self): + return self + +#Fixed version +class MyRange(object): + def __init__(self, low, high): + self.current = low + self.high = high + + def __iter__(self): + while self.current < self.high: + yield self.current + self.current += 1 diff --git a/python/ql/src/Functions/IterReturnsNonIterator.qhelp b/python/ql/src/Functions/IterReturnsNonIterator.qhelp new file mode 100644 index 00000000000..ebb043b5d0f --- /dev/null +++ b/python/ql/src/Functions/IterReturnsNonIterator.qhelp @@ -0,0 +1,37 @@ + + + +

    The __iter__ method of a class should return an iterator. + +Iteration in Python relies on this behavior and attempting to iterate over an +instance of a class with an incorrect __iter__ method will raise a TypeError. +

    + + +
    + +

    Make the __iter__ return a new iterator, either as an instance of +a separate class or as a generator.

    + +
    + +

    In this example the MyRange class's __iter__ method does not +return an iterator. This will cause the program to fail when anyone attempts +to use the iterator in a for loop or in statement. +

    + +

    The fixed version implements the __iter__ method as a generator function.

    + + + +
    + + +
  • Python Language Reference: object.__iter__.
  • +
  • Python Standard Library: Iterator Types.
  • + + +
    +
    diff --git a/python/ql/src/Functions/IterReturnsNonIterator.ql b/python/ql/src/Functions/IterReturnsNonIterator.ql new file mode 100644 index 00000000000..7c727af8d4e --- /dev/null +++ b/python/ql/src/Functions/IterReturnsNonIterator.ql @@ -0,0 +1,32 @@ +/** + * @name __iter__ method returns a non-iterator + * @description The '__iter__' method returns a non-iterator which, if used in a 'for' loop, would raise a 'TypeError'. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/iter-returns-non-iterator + */ + +import python + +FunctionObject iter_method(ClassObject t) { + result = t.lookupAttribute("__iter__") +} + +cached ClassObject return_type(FunctionObject f) { + exists(ControlFlowNode n, Return ret | + ret.getScope() = f.getFunction() and ret.getValue() = n.getNode() and + n.refersTo(_, result, _) + ) +} + +from ClassObject t, FunctionObject iter +where exists(ClassObject ret_t | iter = iter_method(t) and + ret_t = return_type(iter) and + not ret_t.isIterator() + ) + +select iter, "The '__iter__' method of iterable class $@ does not return an iterator.", t, t.getName() \ No newline at end of file diff --git a/python/ql/src/Functions/IterReturnsNonSelf.py b/python/ql/src/Functions/IterReturnsNonSelf.py new file mode 100644 index 00000000000..6251b87aba7 --- /dev/null +++ b/python/ql/src/Functions/IterReturnsNonSelf.py @@ -0,0 +1,13 @@ +class MyRange(object): + def __init__(self, low, high): + self.current = low + self.high = high + + def __iter__(self): + return self.current + + def next(self): + if self.current > self.high: + raise StopIteration + self.current += 1 + return self.current - 1 \ No newline at end of file diff --git a/python/ql/src/Functions/IterReturnsNonSelf.qhelp b/python/ql/src/Functions/IterReturnsNonSelf.qhelp new file mode 100644 index 00000000000..f614d912ff0 --- /dev/null +++ b/python/ql/src/Functions/IterReturnsNonSelf.qhelp @@ -0,0 +1,37 @@ + + + +

    The __iter__ method of an iterator should return self. +This is important so that iterators can be used as sequences in any context +that expect a sequence. To do so requires that __iter__ is +idempotent on iterators.

    + +

    +Note that sequences and mapping should return a new iterator, it is just the returned +iterator that must obey this constraint. +

    + +
    + +

    Make the __iter__ return self unless the class should not be an iterator, +in which case rename the next (Python 2) or __next__ (Python 3) +to something else.

    + +
    + +

    In this example the Counter class's __iter__ method does not +return self (or even an iterator). This will cause the program to fail when anyone attempts +to use the iterator in a for loop or in statement.

    + + +
    + + +
  • Python Language Reference: object.__iter__.
  • +
  • Python Standard Library: Iterators.
  • + + +
    +
    diff --git a/python/ql/src/Functions/IterReturnsNonSelf.ql b/python/ql/src/Functions/IterReturnsNonSelf.ql new file mode 100644 index 00000000000..0899cf798a1 --- /dev/null +++ b/python/ql/src/Functions/IterReturnsNonSelf.ql @@ -0,0 +1,33 @@ +/** + * @name Iterator does not return self from __iter__ method + * @description Iterator does not return self from __iter__ method, violating the iterator protocol. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/iter-returns-non-self + */ + +import python + +Function iter_method(ClassObject t) { + result = ((FunctionObject)t.lookupAttribute("__iter__")).getFunction() +} + +predicate is_self(Name value, Function f) { + value.getVariable() = ((Name)f.getArg(0)).getVariable() +} + +predicate returns_non_self(Function f) { + exists(f.getFallthroughNode()) + or + exists(Return r | r.getScope() = f and not is_self(r.getValue(), f)) + or + exists(Return r | r.getScope() = f and not exists(r.getValue())) +} + +from ClassObject t, Function iter +where t.isIterator() and iter = iter_method(t) and returns_non_self(iter) +select t, "Class " + t.getName() + " is an iterator but its $@ method does not return 'self'.", iter, iter.getName() \ No newline at end of file diff --git a/python/ql/src/Functions/ModificationOfParameterWithDefault.py b/python/ql/src/Functions/ModificationOfParameterWithDefault.py new file mode 100644 index 00000000000..2ddee367acc --- /dev/null +++ b/python/ql/src/Functions/ModificationOfParameterWithDefault.py @@ -0,0 +1,7 @@ + + def __init__(self, name, choices=[], default=[], shortDesc=None, + longDesc=None, hints=None, allowNone=1): # 'default' parameter assigned a value + self.choices = choices + if choices and not default: + default.append(choices[0][1]) # value of 'default' parameter modified + Argument.__init__(self, name, default, shortDesc, longDesc, hints, allowNone=allowNone) \ No newline at end of file diff --git a/python/ql/src/Functions/ModificationOfParameterWithDefault.qhelp b/python/ql/src/Functions/ModificationOfParameterWithDefault.qhelp new file mode 100644 index 00000000000..ff225c68992 --- /dev/null +++ b/python/ql/src/Functions/ModificationOfParameterWithDefault.qhelp @@ -0,0 +1,44 @@ + + + + + +

    The default value of a parameter is computed once when the function is +created, not for every invocation. The "pre-computed" value is then used for every +subsequent call to the function. Consequently, if you modify the default +value for a parameter this "modified" default value is used for the parameter +in future calls to the function. This means that the function may not behave as +expected in future calls and also makes the function more difficult to understand. +

    + +
    + +

    If a parameter has a default value, do not modify the default value. When +you use a mutable object as a default value, you should use a placeholder value +instead of modifying the default value. This is a particular problem when you +work with lists and dictionaries but there are standard methods of avoiding +modifying the default parameter (see References).

    + +
    + +

    In the following example, the default parameter is set with a default +value of an empty list. Other commands in the function then append values to the +list. The next time the function is called, the list will contain values, which +may not have been intended.

    + + +

    The recommended workaround is use a placeholder value. That is, define the +function with a default of default=None, check if the parameter is +None and then set the parameter to a list.

    + +
    + + +
  • Effbot: Default Parameter Values in Python.
  • +
  • Python Language Reference: Function definitions.
  • + + +
    +
    diff --git a/python/ql/src/Functions/ModificationOfParameterWithDefault.ql b/python/ql/src/Functions/ModificationOfParameterWithDefault.ql new file mode 100644 index 00000000000..03e76477dea --- /dev/null +++ b/python/ql/src/Functions/ModificationOfParameterWithDefault.ql @@ -0,0 +1,61 @@ +/** + * @name Modification of parameter with default + * @description Modifying the default value of a parameter can lead to unexpected + * results. + * @kind problem + * @tags reliability + * maintainability + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/modification-of-default-value + */ + +import python + +predicate safe_method(string name) { + name = "count" or name = "index" or name = "copy" or name = "get" or name = "has_key" or + name = "items" or name = "keys" or name = "values" or name = "iteritems" or name = "iterkeys" or name = "itervalues" +} + +predicate maybe_parameter(SsaVariable var, Function f, Parameter p) { + p = var.getAnUltimateDefinition().getDefinition().getNode() and + f.getAnArg() = p +} + +Name use_of_parameter(Parameter p) { + exists(SsaVariable var | + p = var.getAnUltimateDefinition().getDefinition().getNode() and + var.getAUse().getNode() = result + ) +} + +predicate modifying_call(Call c, Parameter p) { + exists(Attribute a | + c.getFunc() = a | + a.getObject() = use_of_parameter(p) and + not safe_method(a.getName()) + ) +} + +predicate is_modification(AstNode a, Parameter p) { + modifying_call(a, p) + or + a.(AugAssign).getTarget() = use_of_parameter(p) +} + +predicate has_mutable_default(Parameter p) { + exists(SsaVariable v, FunctionExpr f | maybe_parameter(v, f.getInnerScope(), p) and + exists(int i, int def_cnt, int arg_cnt | + def_cnt = count(f.getArgs().getADefault()) and + arg_cnt = count(f.getInnerScope().getAnArg()) and + i in [1 .. arg_cnt] and + (f.getArgs().getDefault(def_cnt - i) instanceof Dict or f.getArgs().getDefault(def_cnt - i) instanceof List) and + f.getInnerScope().getArgName(arg_cnt - i) = v.getId() + ) + ) +} + +from AstNode a, Parameter p +where has_mutable_default(p) and is_modification(a, p) +select a, "Modification of parameter $@, which has mutable default value.", p, p.asName().getId() diff --git a/python/ql/src/Functions/NonCls.py b/python/ql/src/Functions/NonCls.py new file mode 100644 index 00000000000..f4959d89ebd --- /dev/null +++ b/python/ql/src/Functions/NonCls.py @@ -0,0 +1,4 @@ +class Entry(object): + @classmethod + def make(klass): + return Entry() diff --git a/python/ql/src/Functions/NonCls.qhelp b/python/ql/src/Functions/NonCls.qhelp new file mode 100644 index 00000000000..0e658e7a6b9 --- /dev/null +++ b/python/ql/src/Functions/NonCls.qhelp @@ -0,0 +1,35 @@ + + + + + +

    The first argument of a class method, a new method or any metaclass method +should be called cls. This makes the purpose of the argument clear to other developers. +

    + +
    + + +

    Change the name of the first argument to cls as recommended by the style guidelines +in PEP 8.

    + +
    + +

    In the example, the first parameter to make() is klass which should be changed to cls +for ease of comprehension. +

    + + + + +
    + + +
  • Python PEP 8: Function and method arguments.
  • +
  • Python Tutorial: Classes.
  • + + +
    +
    diff --git a/python/ql/src/Functions/NonCls.ql b/python/ql/src/Functions/NonCls.ql new file mode 100644 index 00000000000..015263df19f --- /dev/null +++ b/python/ql/src/Functions/NonCls.ql @@ -0,0 +1,47 @@ +/** + * @name First parameter of a class method is not named 'cls' + * @description Using an alternative name for the first argument of a class method makes code more + * difficult to read; PEP8 states that the first argument to class methods should be 'cls'. + * @kind problem + * @tags maintainability + * readability + * convention + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/not-named-cls + */ + +import python + +predicate first_arg_cls(Function f) { + exists(string argname | argname = f.getArgName(0) | + argname = "cls" or + /* Not PEP8, but relatively common */ + argname = "mcls" + ) +} + +predicate is_type_method(Function f) { + exists(ClassObject c | c.getPyClass() = f.getScope() and c.getASuperType() = theTypeType()) +} + +predicate classmethod_decorators_only(Function f) { + forall(Expr decorator | + decorator = f.getADecorator() | + ((Name) decorator).getId() = "classmethod") +} + +from Function f, string message +where (f.getADecorator().(Name).getId() = "classmethod" or is_type_method(f)) and +not first_arg_cls(f) and classmethod_decorators_only(f) and +not f.getName() = "__new__" and +( + if exists(f.getArgName(0)) then + message = "Class methods or methods of a type deriving from type should have 'cls', rather than '" + + f.getArgName(0) + "', as their first argument." + else + message = "Class methods or methods of a type deriving from type should have 'cls' as their first argument." +) + +select f, message diff --git a/python/ql/src/Functions/NonSelf.py b/python/ql/src/Functions/NonSelf.py new file mode 100644 index 00000000000..845172717e4 --- /dev/null +++ b/python/ql/src/Functions/NonSelf.py @@ -0,0 +1,9 @@ +class Point: + def __init__(val, x, y): # first argument is mis-named 'val' + val._x = x + val._y = y + +class Point2: + def __init__(self, x, y): # first argument is correctly named 'self' + self._x = x + self._y = y \ No newline at end of file diff --git a/python/ql/src/Functions/NonSelf.qhelp b/python/ql/src/Functions/NonSelf.qhelp new file mode 100644 index 00000000000..c4cef70e731 --- /dev/null +++ b/python/ql/src/Functions/NonSelf.qhelp @@ -0,0 +1,38 @@ + + + + + +

    Normal methods should have at least one parameter and the first parameter should be called self. +This makes the purpose of the parameter clear to other developers. +

    +
    + + +

    If there is at least one parameter, then change the name of the first parameter to self as recommended by the style guidelines +in PEP 8.

    +

    If there are no parameters, then it cannot be a normal method. It may need to be marked as a staticmethod +or it could be moved out of the class as a normal function. +

    +
    + + +

    The following methods can both be used to assign values to variables in a point +object. The second method makes the association clearer because the self parameter is +used.

    + + + +
    + + +
  • Python PEP 8: Function and +method arguments.
  • +
  • Python Tutorial: Classes.
  • + + + +
    +
    diff --git a/python/ql/src/Functions/NonSelf.ql b/python/ql/src/Functions/NonSelf.ql new file mode 100644 index 00000000000..37b7ee0ef06 --- /dev/null +++ b/python/ql/src/Functions/NonSelf.ql @@ -0,0 +1,54 @@ +/** + * @name First argument of a method is not named 'self' + * @description Using an alternative name for the first argument of an instance method makes + * code more difficult to read; PEP8 states that the first argument to instance + * methods should be 'self'. + * @kind problem + * @tags maintainability + * readability + * convention + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/not-named-self + */ + +import python +import semmle.python.libraries.Zope + +predicate first_arg_self(Function f) { + f.getArgName(0) = "self" +} + +predicate is_type_method(FunctionObject f) { + exists(ClassObject c | c.lookupAttribute(_) = f and c.getASuperType() = theTypeType()) +} + +predicate used_in_defining_scope(FunctionObject f) { + exists(Call c | + c.getScope() = f.getFunction().getScope() and + c.getFunc().refersTo(f) + ) +} + +from Function f, PyFunctionObject func, string message +where +exists(ClassObject cls, string name | + cls.declaredAttribute(name) = func and cls.isNewStyle() and + not name = "__new__" and + not name = "__metaclass__" and + /* declared in scope */ + f.getScope() = cls.getPyClass() +) and +not first_arg_self(f) and not is_type_method(func) and +func.getFunction() = f and not f.getName() = "lambda" and +not used_in_defining_scope(func) and +( + if exists(f.getArgName(0)) then + message = "Normal methods should have 'self', rather than '" + f.getArgName(0) + "', as their first parameter." + else + message = "Normal methods should have at least one parameter (the first of which should be 'self')." and not f.hasVarArg() +) and +not func instanceof ZopeInterfaceMethod + +select f, message diff --git a/python/ql/src/Functions/OverlyComplexDelMethod.py b/python/ql/src/Functions/OverlyComplexDelMethod.py new file mode 100644 index 00000000000..61e5e28dd6f --- /dev/null +++ b/python/ql/src/Functions/OverlyComplexDelMethod.py @@ -0,0 +1,24 @@ + +#Relies on __del__ being called by the garbage collector. +class CachedPreferencesFile + + ... + + def __del__(self): + for key, value in self.preferences.items(): + self.write_pair(key, value) + self.backing.close() + + +#Better version +class CachedPreferencesFile + + ... + + def close(self): + for key, value in self.preferences.items(): + self.write_pair(key, value) + self.backing.close() + + def __del__(self): + self.close() diff --git a/python/ql/src/Functions/OverlyComplexDelMethod.qhelp b/python/ql/src/Functions/OverlyComplexDelMethod.qhelp new file mode 100644 index 00000000000..71410d63a78 --- /dev/null +++ b/python/ql/src/Functions/OverlyComplexDelMethod.qhelp @@ -0,0 +1,42 @@ + + + + +

    The __del__ method exists to release any resources held by an object when that object is deleted. +The __del__ is called only by the garbage collector which may call it after an indefinite delay or +never. +

    + +

    +Consequently, __del__ method should not be relied on to release resources, such as file descriptors. +Rather, these resources should be released explicitly. +

    + +

    The existence of a complex __del__ method suggests that this is the main or only way to release resources +associated with the object.

    + +
    + + +

    In order to ensure correct cleanup of the object add an explicit close(), or similar, +method. Possibly make the object a context manager.

    + +

    The __del__ method should just call close()

    + + +
    + +

    The first example below shows a class which relies on __del__ to release resources. +The second example shows an improved version of the class where __del__ simply calls close.

    + + + +
    + + +
  • Python Standard Library: Context manager.
  • + +
    +
    diff --git a/python/ql/src/Functions/OverlyComplexDelMethod.ql b/python/ql/src/Functions/OverlyComplexDelMethod.ql new file mode 100644 index 00000000000..fff4b3fad0b --- /dev/null +++ b/python/ql/src/Functions/OverlyComplexDelMethod.ql @@ -0,0 +1,21 @@ +/** + * @name Overly complex __del__ method + * @description __del__ methods may be called at arbitrary times, perhaps never called at all, and should be simple. + * @kind problem + * @tags efficiency + * maintainability + * complexity + * statistical + * non-attributable + * @problem.severity recommendation + * @sub-severity low + * @precision high + * @id py/overly-complex-delete + */ + +import python + +from FunctionObject method +where exists(ClassObject c | c.declaredAttribute("__del__") = method and +method.getFunction().getMetrics().getCyclomaticComplexity() > 3) +select method, "Overly complex '__del__' method." diff --git a/python/ql/src/Functions/ReturnConsistentTupleSizes.py b/python/ql/src/Functions/ReturnConsistentTupleSizes.py new file mode 100644 index 00000000000..b5ced0685db --- /dev/null +++ b/python/ql/src/Functions/ReturnConsistentTupleSizes.py @@ -0,0 +1,15 @@ +def sum_length_product1(l): + if l == []: + return 0, 0 # this tuple has the wrong length + else: + val = l[0] + restsum, restlength, restproduct = sum_length_product1(l[1:]) + return restsum + val, restlength + 1, restproduct * val + +def sum_length_product2(l): + if l == []: + return 0, 0, 1 # this tuple has the correct length + else: + val = l[0] + restsum, restlength, restproduct = sum_length_product2(l[1:]) + return restsum + val, restlength + 1, restproduct * val diff --git a/python/ql/src/Functions/ReturnConsistentTupleSizes.qhelp b/python/ql/src/Functions/ReturnConsistentTupleSizes.qhelp new file mode 100644 index 00000000000..2ebcdc5721d --- /dev/null +++ b/python/ql/src/Functions/ReturnConsistentTupleSizes.qhelp @@ -0,0 +1,39 @@ + + + + + +

    + A common pattern for functions returning multiple arguments is to return a + single tuple containing said arguments. If the function has multiple return + points, care must be taken to ensure that the tuples returned have the same + length. +

    +
    + + +

    Ensure that the function returns tuples of similar lengths.

    + +
    + +

    + In this example, the sum_length_product1 function + simultaneously calculates the sum, length, and product of the values in the + given list. For empty lists, however, the returned tuple only contains the + sum and length of the list. In sum_length_product2 this error + has been corrected. +

    + + + +
    + + +
  • Python Language Reference: Function definitions. +
  • + + +
    +
    diff --git a/python/ql/src/Functions/ReturnConsistentTupleSizes.ql b/python/ql/src/Functions/ReturnConsistentTupleSizes.ql new file mode 100644 index 00000000000..010bbd07ccb --- /dev/null +++ b/python/ql/src/Functions/ReturnConsistentTupleSizes.ql @@ -0,0 +1,29 @@ +/** + * @name Returning tuples with varying lengths + * @description A function that potentially returns tuples of different lengths may indicate a problem. + * @kind problem + * @tags reliability + * maintainability + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/mixed-tuple-returns + */ + +import python + +predicate returns_tuple_of_size(Function func, int size, AstNode origin) { + exists(Return return, TupleObject val | + return.getScope() = func and + return.getValue().refersTo(val, origin) | + size = val.getLength() + ) +} + + +from Function func, int s1, int s2, AstNode t1, AstNode t2 +where + returns_tuple_of_size(func, s1, t1) and + returns_tuple_of_size(func, s2, t2) and + s1 < s2 +select func, func.getQualifiedName() + " returns $@ and $@.", t1, "tuple of size " + s1, t2, "tuple of size " + s2 diff --git a/python/ql/src/Functions/ReturnValueIgnored.py b/python/ql/src/Functions/ReturnValueIgnored.py new file mode 100644 index 00000000000..848517964c1 --- /dev/null +++ b/python/ql/src/Functions/ReturnValueIgnored.py @@ -0,0 +1,21 @@ + +from third_party import get_resource + +def ignore_error(name): + rsc = get_resource(name) + rsc.initialize() + try: + use_resource(rsc) + finally: + rsc.close() + +#Fixed +def do_not_ignore_error(name): + rsc = get_resource(name) + success = rsc.initialize() + if not success: + raise Error("Could not initialize resource") + try: + use_resource(rsc) + finally: + rsc.close() diff --git a/python/ql/src/Functions/ReturnValueIgnored.qhelp b/python/ql/src/Functions/ReturnValueIgnored.qhelp new file mode 100644 index 00000000000..7081d247112 --- /dev/null +++ b/python/ql/src/Functions/ReturnValueIgnored.qhelp @@ -0,0 +1,45 @@ + + + + +

    When a function returns a non-trivial value, that value should not be ignored. Doing so may result in errors being ignored or +information being thrown away.

    + +

    A return value is considered to be trivial if it is None or it is a parameter (parameters, usually self are often +returned to assist with method chaining, but can be ignored). +A return value is also assumed to be trivial if it is ignored for 75% or more of calls. +

    + +
    + + +

    Act upon all non-trivial return values, either propagating each value or recording it. +If a return value should be ignored, then ensure that it is ignored consistently. +

    + +

    +If you have access to the source code of the called function, then consider modifying it so that it does not return pointless values. +

    + + +
    + + +

    +In the ignore_error function the error condition is ignored. +Ideally the Resource.initialize() function would raise an exception if it failed, but as it does not, the caller must deal with the error. +The do_not_ignore_error function checks the error condition and raises an exception if Resource.initialize() fails. +

    + + + +
    + + +
  • Python Language Reference: Function definitions. +
  • + +
    +
    diff --git a/python/ql/src/Functions/ReturnValueIgnored.ql b/python/ql/src/Functions/ReturnValueIgnored.ql new file mode 100644 index 00000000000..19896533a7c --- /dev/null +++ b/python/ql/src/Functions/ReturnValueIgnored.ql @@ -0,0 +1,72 @@ +/** + * @name Ignored return value + * @description Ignoring return values may result in discarding errors or loss of information. + * @kind problem + * @tags reliability + * readability + * convention + * statistical + * non-attributable + * external/cwe/cwe-252 + * @problem.severity recommendation + * @sub-severity high + * @precision medium + * @id py/ignored-return-value + */ + +import python + +predicate meaningful_return_value(Expr val) { + val instanceof Name + or + val instanceof BooleanLiteral + or + exists(FunctionObject callee | val = callee.getACall().getNode() and returns_meaningful_value(callee)) + or + not exists(FunctionObject callee | val = callee.getACall().getNode()) and not val instanceof Name +} + +/* Value is used before returning, and thus its value is not lost if ignored */ +predicate used_value(Expr val) { + exists(LocalVariable var, Expr other | var.getAnAccess() = val and other = var.getAnAccess() and not other = val) +} + +predicate returns_meaningful_value(FunctionObject f) { + not exists(f.getFunction().getFallthroughNode()) + and + ( + exists(Return ret, Expr val | ret.getScope() = f.getFunction() and val = ret.getValue() | + meaningful_return_value(val) and + not used_value(val) + ) + or + /* Is f a builtin function that returns something other than None? + * Ignore __import__ as it is often called purely for side effects */ + f.isC() and f.getAnInferredReturnType() != theNoneType() and not f.getName() = "__import__" + ) +} + +/* If a call is wrapped tightly in a try-except then we assume it is being executed for the exception. */ +predicate wrapped_in_try_except(ExprStmt call) { + exists(Try t | + exists(t.getAHandler()) and + strictcount(Call c | t.getBody().contains(c)) = 1 and + call = t.getAStmt() + ) +} + +from ExprStmt call, FunctionObject callee, float percentage_used, int total +where call.getValue() = callee.getACall().getNode() and returns_meaningful_value(callee) and +not wrapped_in_try_except(call) and +exists(int unused | + unused = count(ExprStmt e | e.getValue().getAFlowNode() = callee.getACall()) and + total = count(callee.getACall()) | + percentage_used = (100.0*(total-unused)/total).floor() +) and +/* Report an alert if we see at least 5 calls and the return value is used in at least 3/4 of those calls. */ +percentage_used >= 75 and +total >= 5 + +select call, "Call discards return value of function $@. The result is used in " + percentage_used.toString() + "% of calls.", +callee, callee.getName() + diff --git a/python/ql/src/Functions/SignatureIncorrectlyOverriddenMethod.py b/python/ql/src/Functions/SignatureIncorrectlyOverriddenMethod.py new file mode 100644 index 00000000000..731ef72dac0 --- /dev/null +++ b/python/ql/src/Functions/SignatureIncorrectlyOverriddenMethod.py @@ -0,0 +1,14 @@ + +class BaseClass(object): + + def run(self, source, filename, symbol="single"): + ... # Definition + + def load_and_run(self, filename): + source = self.load(filename) + self.run(source, filename) # Matches signature in this class, but not in the derived class. + +class DerivedClass(BaseClass): + + def run(self, source): + ... # Definition diff --git a/python/ql/src/Functions/SignatureOverriddenMethod.py b/python/ql/src/Functions/SignatureOverriddenMethod.py new file mode 100644 index 00000000000..7beddcb9e95 --- /dev/null +++ b/python/ql/src/Functions/SignatureOverriddenMethod.py @@ -0,0 +1,9 @@ + +# Base class method +def runsource(self, source, filename="", symbol="single"): + ... # Definition + + +# Extend base class method +def runsource(self, source): + ... # Definition \ No newline at end of file diff --git a/python/ql/src/Functions/SignatureOverriddenMethod.qhelp b/python/ql/src/Functions/SignatureOverriddenMethod.qhelp new file mode 100644 index 00000000000..b7da2678e3d --- /dev/null +++ b/python/ql/src/Functions/SignatureOverriddenMethod.qhelp @@ -0,0 +1,41 @@ + + + + + +

    There are one (or more) legal parameters for an overridden method that are +not legal for an overriding method. This will cause an error when the overriding +method is called with a number of parameters that is legal for the overridden method. +This violates the Liskov substitution principle. +

    + +
    + + +

    Ensure that the overriding method accepts all the parameters that are legal for +overridden method.

    + +
    + +

    In this example there is a mismatch between the legal parameters for the base +class method (self, source, filename, symbol) and the extension method +(self, source). The extension method can be used to override the base +method as long as values are not specified for the filename and +symbol parameters. If the extension method was passed the additional +parameters accepted by the base method then an error would occur.

    + + + +

    The extension method should be updated to support the filename and +symbol parameters supported by the overridden method.

    + +
    + + +
  • Wikipedia: Liskov Substitution Principle, Method overriding.
  • + + +
    +
    diff --git a/python/ql/src/Functions/SignatureOverriddenMethod.ql b/python/ql/src/Functions/SignatureOverriddenMethod.ql new file mode 100644 index 00000000000..47182d8d87d --- /dev/null +++ b/python/ql/src/Functions/SignatureOverriddenMethod.ql @@ -0,0 +1,35 @@ +/** + * @name Signature mismatch in overriding method + * @description Overriding a method without ensuring that both methods accept the same + * number and type of parameters has the potential to cause an error when there is a mismatch. + * @kind problem + * @problem.severity warning + * @tags reliability + * correctness + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/inheritance/signature-mismatch + */ + +import python +import Expressions.CallArgs + +from FunctionObject base, PyFunctionObject derived +where + not exists(base.getACall()) and + not exists(FunctionObject a_derived | + a_derived.overrides(base) and + exists(a_derived.getACall()) + ) and + not derived.getFunction().isSpecialMethod() and + derived.getName() != "__init__" and + derived.isNormalMethod() and + not derived.getFunction().isSpecialMethod() and + // call to overrides distributed for efficiency + ( + (derived.overrides(base) and derived.minParameters() > base.maxParameters()) + or + (derived.overrides(base) and derived.maxParameters() < base.minParameters()) + ) +select derived, "Overriding method '" + derived.getName() + "' has signature mismatch with $@.", base, "overridden method" diff --git a/python/ql/src/Functions/SignatureOverridingMethod.py b/python/ql/src/Functions/SignatureOverridingMethod.py new file mode 100644 index 00000000000..f4898ef45b5 --- /dev/null +++ b/python/ql/src/Functions/SignatureOverridingMethod.py @@ -0,0 +1,14 @@ + +class BaseClass(object): + + def run(self, source, filename, symbol="single"): + ... # Definition + + def load_and_run(self, filename): + source = self.load(filename) + self.run(source) # Matches signature in derived class, but not in this class. + +class DerivedClass(BaseClass): + + def run(self, source): + ... # Definition diff --git a/python/ql/src/Functions/SignatureSpecialMethods.py b/python/ql/src/Functions/SignatureSpecialMethods.py new file mode 100644 index 00000000000..343baf55f72 --- /dev/null +++ b/python/ql/src/Functions/SignatureSpecialMethods.py @@ -0,0 +1,18 @@ +#-*- coding: utf-8 -*- + +class Point(object): + + def __init__(self, x, y): + self.x + self.y + + def __add__(self, other): + if not isinstance(other, Point): + return NotImplemented + return Point(self.x + other.x, self.y + other.y) + + def __str__(self, style): #Spurious extra parameter + if style == 'polar': + u"%s @ %s\u00b0" % (abs(self), self.angle()) + else: + return "[%s, %s]" % (self.x, self.y) diff --git a/python/ql/src/Functions/SignatureSpecialMethods.qhelp b/python/ql/src/Functions/SignatureSpecialMethods.qhelp new file mode 100644 index 00000000000..ab25c13f07f --- /dev/null +++ b/python/ql/src/Functions/SignatureSpecialMethods.qhelp @@ -0,0 +1,34 @@ + + + +

    Special methods (sometimes also called magic methods) are how user defined classes interact with the Python virtual machine. +For example, for a class to support addition it must implement the __add__ and __radd__ special methods. +When the expression a + b is evaluated the Python virtual machine will call type(a).__add__(a, b) and if that +is not implemented it will call type(b).__radd__(b, a).

    +

    +Since these special methods are always called by the virtual machine with a fixed number of parameters, if the method is implemented with +a different number of parameters it will fail at runtime with a TypeError. +

    + +
    + +

    Ensure that the method has the correct number of parameters

    + +
    + +

    In the example the __str__ method has an extra parameter. This means that if str(p) is called when p +is a Point then it will fail with a TypeError. +

    + + + +
    + + +
  • Python Language Reference: Special Method Names.
  • + + +
    +
    diff --git a/python/ql/src/Functions/SignatureSpecialMethods.ql b/python/ql/src/Functions/SignatureSpecialMethods.ql new file mode 100644 index 00000000000..1301949768a --- /dev/null +++ b/python/ql/src/Functions/SignatureSpecialMethods.ql @@ -0,0 +1,200 @@ +/** + * @name Special method has incorrect signature + * @description Special method has incorrect signature + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/special-method-wrong-signature + */ + +import python + + +predicate is_unary_op(string name) { + name = "__del__" or + name = "__repr__" or + name = "__str__" or + name = "__hash__" or + name = "__bool__" or + name = "__nonzero__" or + name = "__unicode__" or + name = "__len__" or + name = "__iter__" or + name = "__reversed__" or + name = "__neg__" or + name = "__pos__" or + name = "__abs__" or + name = "__invert__" or + name = "__complex__" or + name = "__int__" or + name = "__float__" or + name = "__long__" or + name = "__oct__" or + name = "__hex__" or + name = "__index__" or + name = "__enter__" +} + +predicate is_binary_op(string name) { + name = "__lt__" or + name = "__le__" or + name = "__eq__" or + name = "__ne__" or + name = "__gt__" or + name = "__ge__" or + name = "__cmp__" or + name = "__rcmp__" or + name = "__getattr___" or + name = "__getattribute___" or + name = "__delattr__" or + name = "__delete__" or + name = "__instancecheck__" or + name = "__subclasscheck__" or + name = "__getitem__" or + name = "__delitem__" or + name = "__contains__" or + name = "__add__" or + name = "__sub__" or + name = "__mul__" or + name = "__floordiv__" or + name = "__div__" or + name = "__truediv__" or + name = "__mod__" or + name = "__divmod__" or + name = "__lshift__" or + name = "__rshift__" or + name = "__and__" or + name = "__xor__" or + name = "__or__" or + name = "__radd__" or + name = "__rsub__" or + name = "__rmul__" or + name = "__rfloordiv__" or + name = "__rdiv__" or + name = "__rtruediv__" or + name = "__rmod__" or + name = "__rdivmod__" or + name = "__rpow__" or + name = "__rlshift__" or + name = "__rrshift__" or + name = "__rand__" or + name = "__rxor__" or + name = "__ror__" or + name = "__iadd__" or + name = "__isub__" or + name = "__imul__" or + name = "__ifloordiv__" or + name = "__idiv__" or + name = "__itruediv__" or + name = "__imod__" or + name = "__idivmod__" or + name = "__ipow__" or + name = "__ilshift__" or + name = "__irshift__" or + name = "__iand__" or + name = "__ixor__" or + name = "__ior__" or + name = "__coerce__" +} + +predicate is_ternary_op(string name) { + name = "__setattr__" or + name = "__set__" or + name = "__setitem__" or + name = "__getslice__" or + name = "__delslice__" +} + +predicate is_quad_op(string name) { + name = "__setslice__" or name = "__exit__" +} + +int argument_count(PyFunctionObject f, string name, ClassObject cls) { + cls.declaredAttribute(name) = f and + ( + is_unary_op(name) and result = 1 + or + is_binary_op(name) and result = 2 + or + is_ternary_op(name) and result = 3 + or + is_quad_op(name) and result = 4 + ) +} + +predicate incorrect_special_method_defn(PyFunctionObject func, string message, boolean show_counts, string name, ClassObject owner) { + exists(int required | + required = argument_count(func, name, owner) | + /* actual_non_default <= actual */ + if required > func.maxParameters() then + (message = "Too few parameters" and show_counts = true) + else if required < func.minParameters() then + (message = "Too many parameters" and show_counts = true) + else if (func.minParameters() < required and not func.getFunction().hasVarArg()) then + (message = (required -func.minParameters()) + " default values(s) will never be used" and show_counts = false) + else + none() + ) +} + +predicate incorrect_pow(FunctionObject func, string message, boolean show_counts, ClassObject owner) { + owner.declaredAttribute("__pow__") = func and + ( + func.maxParameters() < 2 and message = "Too few parameters" and show_counts = true + or + func.minParameters() > 3 and message = "Too many parameters" and show_counts = true + or + func.minParameters() < 2 and message = (2 - func.minParameters()) + " default value(s) will never be used" and show_counts = false + or + func.minParameters() = 3 and message = "Third parameter to __pow__ should have a default value" and show_counts = false + ) +} + +predicate incorrect_get(FunctionObject func, string message, boolean show_counts, ClassObject owner) { + owner.declaredAttribute("__get__") = func and + ( + func.maxParameters() < 3 and message = "Too few parameters" and show_counts = true + or + func.minParameters() > 3 and message = "Too many parameters" and show_counts = true + or + func.minParameters() < 2 and not func.getFunction().hasVarArg() and + message = (2 - func.minParameters()) + " default value(s) will never be used" and show_counts = false + ) +} + +string should_have_parameters(PyFunctionObject f, string name, ClassObject owner) { + exists(int i | i = argument_count(f, name, owner) | + result = i.toString() + ) + or + owner.declaredAttribute(name) = f and (name = "__get__" or name = "__pow__") and result = "2 or 3" +} + +string has_parameters(PyFunctionObject f) { + exists(int i | i = f.minParameters() | + i = 0 and result = "no parameters" + or + i = 1 and result = "1 parameter" + or + i > 1 and result = i.toString() + " parameters" + ) +} + +from PyFunctionObject f, string message, string sizes, boolean show_counts, string name, ClassObject owner +where + ( + incorrect_special_method_defn(f, message, show_counts, name, owner) + or + incorrect_pow(f, message, show_counts, owner) and name = "__pow__" + or + incorrect_get(f, message, show_counts, owner) and name = "__get__" + ) + and + ( + show_counts = false and sizes = "" or + show_counts = true and sizes = ", which has " + has_parameters(f) + ", but should have " + should_have_parameters(f, name, owner) + ) +select f, message + " for special method " + name + sizes + ", in class $@.", owner, owner.getName() diff --git a/python/ql/src/Functions/UseImplicitNoneReturnValue.py b/python/ql/src/Functions/UseImplicitNoneReturnValue.py new file mode 100644 index 00000000000..fcaafcfde75 --- /dev/null +++ b/python/ql/src/Functions/UseImplicitNoneReturnValue.py @@ -0,0 +1,17 @@ + +import sys + +def my_print(*args): + print (args) + +def main(): + err = my_print(sys.argv) + if err: + sys.exit(err) + + +#FIXED VERSION +def main(): + my_print(sys.argv) + #The rest of the code can be removed as None as always false + diff --git a/python/ql/src/Functions/UseImplicitNoneReturnValue.qhelp b/python/ql/src/Functions/UseImplicitNoneReturnValue.qhelp new file mode 100644 index 00000000000..2637ac21ef3 --- /dev/null +++ b/python/ql/src/Functions/UseImplicitNoneReturnValue.qhelp @@ -0,0 +1,32 @@ + + + +

    All functions in Python return a value. +If a function has no return statements or none of the return statements return a value +then the function will return None. However, this value has no meaning and should be ignored.

    + +

    Using the return value of such a 'procedure' is confusing to the reader as it suggests +that the value is significant. +

    + +
    + +

    Do not use the return value of a procedure; replace x = proc() with proc() +and replace any use of the value with None.

    + +
    + +

    In this example, the my_print function is a procedure as it returns no value of any meaning. +Using the return value is misleading in subsequent code. +

    + + +
    + + +
  • Python Library Reference: None.
  • + +
    +
    diff --git a/python/ql/src/Functions/UseImplicitNoneReturnValue.ql b/python/ql/src/Functions/UseImplicitNoneReturnValue.ql new file mode 100644 index 00000000000..d2c954cb4c9 --- /dev/null +++ b/python/ql/src/Functions/UseImplicitNoneReturnValue.ql @@ -0,0 +1,34 @@ +/** + * @name Use of the return value of a procedure + * @description The return value of a procedure (a function that does not return a value) is used. This is confusing to the reader as the value (None) has no meaning. + * @kind problem + * @tags maintainability + * @problem.severity warning + * @sub-severity low + * @precision high + * @id py/procedure-return-value-used + */ + +import python +import Testing.Mox + +predicate is_used(Call c) { + exists(Expr outer | outer != c and outer.containsInScope(c) | outer instanceof Call or outer instanceof Attribute or outer instanceof Subscript) + or + exists(Stmt s | + c = s.getASubExpression() and + not s instanceof ExprStmt and + /* Ignore if a single return, as def f(): return g() is quite common. Covers implicit return in a lambda. */ + not (s instanceof Return and strictcount(Return r | r.getScope() = s.getScope()) = 1) + ) +} + +from Call c, FunctionObject func +where +/* Call result is used, but callee is a procedure */ +is_used(c) and c.getFunc().refersTo(func) and func.getFunction().isProcedure() and +/* All callees are procedures */ +forall(FunctionObject callee | c.getFunc().refersTo(callee) | callee.getFunction().isProcedure()) and +/* Mox return objects have an `AndReturn` method */ +not useOfMoxInModule(c.getEnclosingModule()) +select c, "The result of '$@' is used even though it is always None.", func, func.getQualifiedName() diff --git a/python/ql/src/Imports/Cyclic.qll b/python/ql/src/Imports/Cyclic.qll new file mode 100644 index 00000000000..b16e3ae147c --- /dev/null +++ b/python/ql/src/Imports/Cyclic.qll @@ -0,0 +1,89 @@ +import python + +predicate is_import_time(Stmt s) { + not s.getScope+() instanceof Function +} + +PythonModuleObject module_imported_by(PythonModuleObject m) { + exists(Stmt imp | + result = stmt_imports(imp) and + imp.getEnclosingModule() = m.getModule() and + // Import must reach exit to be part of a cycle + imp.getAnEntryNode().getBasicBlock().reachesExit() + ) +} + +/** Is there a circular import of 'm1' beginning with 'm2'? */ +predicate circular_import(PythonModuleObject m1, PythonModuleObject m2) { + m1 != m2 and + m2 = module_imported_by(m1) and m1 = module_imported_by+(m2) +} + +ModuleObject stmt_imports(ImportingStmt s) { + exists(string name | + result.importedAs(name) and not name = "__main__" | + name = s.getAnImportedModuleName() + ) +} + +predicate import_time_imported_module(PythonModuleObject m1, PythonModuleObject m2, Stmt imp) { + imp.getEnclosingModule() = m1.getModule() and + is_import_time(imp) and + m2 = stmt_imports(imp) +} + +/** Is there a cyclic import of 'm1' beginning with an import 'm2' at 'imp' where all the imports are top-level? */ +predicate import_time_circular_import(PythonModuleObject m1, PythonModuleObject m2, Stmt imp) { + m1 != m2 and + import_time_imported_module(m1, m2, imp) and + import_time_transitive_import(m2, _, m1) +} + +predicate import_time_transitive_import(PythonModuleObject base, Stmt imp, PythonModuleObject last) { + last != base and + ( + import_time_imported_module(base, last, imp) + or + exists(PythonModuleObject mid | + import_time_transitive_import(base, imp, mid) and + import_time_imported_module(mid, last, _) + ) + ) and + // Import must reach exit to be part of a cycle + imp.getAnEntryNode().getBasicBlock().reachesExit() +} + +/** + * Returns import-time usages of module 'm' in module 'enclosing' + */ +predicate import_time_module_use(PythonModuleObject m, PythonModuleObject enclosing, Expr use, string attr) { + exists(Expr mod | + use.getEnclosingModule() = enclosing.getModule() and + not use.getScope+() instanceof Function + and mod.refersTo(m) + | + // either 'M.foo' + use.(Attribute).getObject() = mod and use.(Attribute).getName() = attr + or + // or 'from M import foo' + use.(ImportMember).getModule() = mod and use.(ImportMember).getName() = attr + ) +} + +/** Whether importing module 'first' before importing module 'other' will fail at runtime, due to an + AttributeError at 'use' (in module 'other') caused by 'first.attr' not being defined as its definition can + occur after the import 'other' in 'first'. +*/ +predicate failing_import_due_to_cycle(PythonModuleObject first, PythonModuleObject other, Stmt imp, + ControlFlowNode defn, Expr use, string attr) { + import_time_imported_module(other, first, _) and + import_time_transitive_import(first, imp, other) and + import_time_module_use(first, other, use, attr) and + exists(ImportTimeScope n, SsaVariable v | + defn = v.getDefinition() and + n = first.getModule() and v.getVariable().getScope() = n and v.getId() = attr | + not defn.strictlyDominates(imp.getAnEntryNode()) + ) + and not exists(If i | i.isNameEqMain() and i.contains(use)) +} + diff --git a/python/ql/src/Imports/CyclicImport.qhelp b/python/ql/src/Imports/CyclicImport.qhelp new file mode 100644 index 00000000000..0d84c64418a --- /dev/null +++ b/python/ql/src/Imports/CyclicImport.qhelp @@ -0,0 +1,36 @@ + + + +

    A cyclic import is an import which imports another module +and that module imports (possibly indirectly) the module which contains the +import statement.

    + +

    Cyclic imports indicate that two modules are circularly dependent. This means +that the modules cannot be tested independently, and it makes it harder to +understand the architecture of the system. +

    + +
    + + +

    The cycle may be broken by removing any one import. If only one function or +method requires the import, then consider moving that to the other module and +deleting the import. If the two modules are more intimately connected, then move +the inter-dependent parts into a third module and have both the original modules +import that. +

    + + +
    + + + +
  • Python Language Reference: The import statement.
  • +
  • Python: Modules.
  • +
  • Effbot: Import Confusion.
  • + + +
    +
    diff --git a/python/ql/src/Imports/CyclicImport.ql b/python/ql/src/Imports/CyclicImport.ql new file mode 100644 index 00000000000..1e1586c2f93 --- /dev/null +++ b/python/ql/src/Imports/CyclicImport.ql @@ -0,0 +1,27 @@ +/** + * @name Cyclic import + * @description Module forms part of an import cycle, thereby indirectly importing itself. + * @kind problem + * @tags reliability + * maintainability + * modularity + * @problem.severity recommendation + * @sub-severity low + * @precision high + * @id py/cyclic-import + */ + +import python +import Cyclic + +from PythonModuleObject m1, PythonModuleObject m2, Stmt imp +where + imp.getEnclosingModule() = m1.getModule() + and stmt_imports(imp) = m2 + and circular_import(m1, m2) + and m1 != m2 + // this query finds all cyclic imports that are *not* flagged by ModuleLevelCyclicImport + and not failing_import_due_to_cycle(m2, m1, _, _, _, _) + and not exists(If i | i.isNameEqMain() and i.contains(imp)) +select imp, "Import of module $@ begins an import cycle.", m2, m2.getName() + diff --git a/python/ql/src/Imports/DeprecatedModule.qhelp b/python/ql/src/Imports/DeprecatedModule.qhelp new file mode 100644 index 00000000000..53c0d1abbc6 --- /dev/null +++ b/python/ql/src/Imports/DeprecatedModule.qhelp @@ -0,0 +1,23 @@ + + + +

    A module is deprecated when it cannot or will not be maintained indefinitely in the standard library. +Deprecated modules may not receive security fixes or other important updates. +See PEP 4 for a list of all deprecated modules. +

    +
    + + +

    Do not import the deprecated module. Replace uses of it with uses of a better maintained module. +

    + +
    + + +
  • Python PEPs: PEP 4 -- Deprecation of Standard Modules .
  • + + +
    +
    diff --git a/python/ql/src/Imports/DeprecatedModule.ql b/python/ql/src/Imports/DeprecatedModule.ql new file mode 100644 index 00000000000..22f4f962e31 --- /dev/null +++ b/python/ql/src/Imports/DeprecatedModule.ql @@ -0,0 +1,73 @@ +/** + * @name Import of deprecated module + * @description Import of a deprecated module + * @kind problem + * @tags maintainability + * external/cwe/cwe-477 + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/import-deprecated-module + */ + +import python + + +predicate deprecated_module(string name, string instead, int major, int minor) { + name = "posixfile" and instead = "email" and major = 1 and minor = 5 + or + name = "gopherlib" and instead = "no replacement" and major = 2 and minor = 5 + or + name = "rgbimgmodule" and instead = "no replacement" and major = 2 and minor = 5 + or + name = "pre" and instead = "re" and major = 1 and minor = 5 + or + name = "whrandom" and instead = "random" and major = 2 and minor = 1 + or + name = "rfc822" and instead = "email" and major = 2 and minor = 3 + or + name = "mimetools" and instead = "email" and major = 2 and minor = 3 + or + name = "MimeWriter" and instead = "email" and major = 2 and minor = 3 + or + name = "mimify" and instead = "email" and major = 2 and minor = 3 + or + name = "rotor" and instead = "no replacement" and major = 2 and minor = 4 + or + name = "statcache" and instead = "no replacement" and major = 2 and minor = 2 + or + name = "mpz" and instead = "a third party" and major = 2 and minor = 2 + or + name = "xreadlines" and instead = "no replacement" and major = 2 and minor = 3 + or + name = "multifile" and instead = "email" and major = 2 and minor = 5 + or + name = "sets" and instead = "builtins" and major = 2 and minor = 6 + or + name = "buildtools" and instead = "no replacement" and major = 2 and minor = 3 + or + name = "cfmfile" and instead = "no replacement" and major = 2 and minor = 4 + or + name = "macfs" and instead = "no replacement" and major = 2 and minor = 3 + or + name = "md5" and instead = "hashlib" and major = 2 and minor = 5 + or + name = "sha" and instead = "hashlib" and major = 2 and minor = 5 +} + +string deprecation_message(string mod) { + exists(int major, int minor | deprecated_module(mod, _, major, minor) | + result = "The " + mod + " module was deprecated in version " + major.toString() + "." + minor.toString() + ".") +} + +string replacement_message(string mod) { + exists(string instead | deprecated_module(mod, instead, _, _) | + result = " Use " + instead + " module instead." and not instead = "no replacement" + or + result = "" and instead = "no replacement" + ) +} + +from ImportExpr imp, Stmt s, Expr e +where s.getASubExpression() = e and (e = imp or e.contains(imp)) +select s, deprecation_message(imp.getName()) + replacement_message(imp.getName()) diff --git a/python/ql/src/Imports/EncodingError.qhelp b/python/ql/src/Imports/EncodingError.qhelp new file mode 100644 index 00000000000..7142390b679 --- /dev/null +++ b/python/ql/src/Imports/EncodingError.qhelp @@ -0,0 +1,36 @@ + + + + +

    Encoding errors prevent a module being evaluated and thus imported. +An attempt to import a module with an invalid encoding will fail; a SyntaxError will be raised. +Note that in Python 2, the default encoding is ASCII. +

    + +

    The existence of an encoding error in a module may suggest other problems as well. +Either the module is never imported in practice and could be deleted or a +try statement around the import is mistakenly discarding the SyntaxError. +

    + + +
    + +

    Fixing the encoding error is the obvious fix. +However, it is worth investigating why a module containing an encoding error +was able to persist and address that problem as well. +

    +

    + If a different encoding should be used for the file, specify it explicitly by + putting an encoding specification at the top of the file. For instance, to + specify UTF-8 encoding, add the line # coding=utf-8. +

    + +
    + +
  • Python PEPs: PEP 263 — Defining Python Source Code Encodings.
  • +
  • Python Tutorial: SyntaxErrors.
  • + +
    +
    diff --git a/python/ql/src/Imports/EncodingError.ql b/python/ql/src/Imports/EncodingError.ql new file mode 100644 index 00000000000..f26bf8dad33 --- /dev/null +++ b/python/ql/src/Imports/EncodingError.ql @@ -0,0 +1,16 @@ +/** + * @name Encoding error + * @description Encoding errors cause failures at runtime and prevent analysis of the code. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/encoding-error + */ + +import python + +from EncodingError error +select error, error.getMessage() \ No newline at end of file diff --git a/python/ql/src/Imports/FromImportOfMutableAttribute.qhelp b/python/ql/src/Imports/FromImportOfMutableAttribute.qhelp new file mode 100644 index 00000000000..78b03e1cb7c --- /dev/null +++ b/python/ql/src/Imports/FromImportOfMutableAttribute.qhelp @@ -0,0 +1,45 @@ + + + +

    +Explicitly importing an attribute from a module into the current namespace means that the value of that attribute will not be updated if the value in the original module changes. +

    +

    +This can mean that changes in global state are not observed locally, which may lead to inconsistencies and possible errors. +

    + + +
    + +

    Instead of using from module import attr, simply import the module using import module +and replace all uses of attr with module.attr. +

    +
    + + +

    In the first of the two modules shown below, from sys import stdout is used to import the stdout attribute, +rather than using import sys to import the module. Then stdout is used in the main() function. +

    + +

    In the second module, below, a function, redirect_to_file is defined to collect the output from sys.stdout and save it to a file. +However, redirect_to_file will not work correctly when passed the main() function. +This is because the main() function will not see the change to sys.stdout, +as it uses its own version of stdout that was defined when the module was loaded. +

    + +

    +The problem can be fixed by rewriting the first module to import the sys module and write to sys.stdout, as shown below. +

    + +
    + + + +
  • Python Language Reference: The import statement.
  • +
  • Python Tutorial: Modules.
  • + + +
    +
    diff --git a/python/ql/src/Imports/FromImportOfMutableAttribute.ql b/python/ql/src/Imports/FromImportOfMutableAttribute.ql new file mode 100644 index 00000000000..e5e7c96985e --- /dev/null +++ b/python/ql/src/Imports/FromImportOfMutableAttribute.ql @@ -0,0 +1,31 @@ +/** + * @name Importing value of mutable attribute + * @description Importing the value of a mutable attribute directly means that changes in global state will not be observed locally. + * @kind problem + * @tags reliability + * maintainability + * modularity + * @problem.severity warning + * @sub-severity high + * @precision medium + * @id py/import-of-mutable-attribute + */ +import python +import semmle.python.filters.Tests + +from ImportMember im, ModuleObject m, AttrNode store_attr, string name +where im.getModule().(ImportExpr).getImportedModuleName() = m.getName() and +im.getName() = name and +/* Modification must be in a function, so it can occur during lifetime of the import value */ +store_attr.getScope() instanceof Function and +/* variable resulting from import must have a long lifetime */ +not im.getScope() instanceof Function and +store_attr.isStore() and +store_attr.getObject(name).refersTo(m) and +/* Import not in same module as modification. */ +not im.getEnclosingModule() = store_attr.getScope().getEnclosingModule() and +/* Modification is not in a test */ +not store_attr.getScope().getScope*() instanceof TestScope + +select im, "Importing the value of '" + name + "' from $@ means that any change made to $@ will be not be observed locally.", +m, "module " + m.getName(), store_attr, m.getName() + "." + store_attr.getName() diff --git a/python/ql/src/Imports/ImportOnTwoLines.py b/python/ql/src/Imports/ImportOnTwoLines.py new file mode 100644 index 00000000000..226b41ccd14 --- /dev/null +++ b/python/ql/src/Imports/ImportOnTwoLines.py @@ -0,0 +1,2 @@ +import xxx +import yyy diff --git a/python/ql/src/Imports/ImportShadowedByLoopVar.qhelp b/python/ql/src/Imports/ImportShadowedByLoopVar.qhelp new file mode 100644 index 00000000000..566d51f0710 --- /dev/null +++ b/python/ql/src/Imports/ImportShadowedByLoopVar.qhelp @@ -0,0 +1,10 @@ + + + + +

    This is defined as an error in PyFlakes.

    + +
    +
    diff --git a/python/ql/src/Imports/ImportShadowedByLoopVar.ql b/python/ql/src/Imports/ImportShadowedByLoopVar.ql new file mode 100644 index 00000000000..29f6536cce7 --- /dev/null +++ b/python/ql/src/Imports/ImportShadowedByLoopVar.ql @@ -0,0 +1,22 @@ +/** + * @name Import shadowed by loop variable + * @description A loop variable shadows an import. + * @kind problem + * @tags maintainability + * @problem.severity recommendation + * @sub-severity low + * @deprecated + * @precision very-high + * @id py/import-shadowed-loop-variable + */ + +import python + +predicate shadowsImport(Variable l) { + exists(Import i, Name shadow | shadow = i.getAName().getAsname() and shadow.getId() = l.getId() and i.getScope() = l.getScope().getScope*()) +} + + +from Variable l, Name defn +where shadowsImport(l) and defn.defines(l) and exists(For for | defn = for.getTarget()) +select defn, "Loop variable '" + l.getId() + "' shadows an import" diff --git a/python/ql/src/Imports/ImportStarUsed.qhelp b/python/ql/src/Imports/ImportStarUsed.qhelp new file mode 100644 index 00000000000..65f92a5f5e0 --- /dev/null +++ b/python/ql/src/Imports/ImportStarUsed.qhelp @@ -0,0 +1,27 @@ + + + + +

    Using from xxx import * makes it difficult to determine what has +been defined by the import statement. This may hide errors and introduce +unexpected dependencies.

    + +
    + + +

    +Use explicit imports. For example from xxx import a, b, c +

    + +
    + + + +
  • Python Language Reference: The import statement.
  • +
  • Python PEP-8: Imports.
  • + + +
    +
    diff --git a/python/ql/src/Imports/ImportStarUsed.ql b/python/ql/src/Imports/ImportStarUsed.ql new file mode 100644 index 00000000000..bc125c05a3b --- /dev/null +++ b/python/ql/src/Imports/ImportStarUsed.ql @@ -0,0 +1,17 @@ +/** + * @name 'import *' used + * @description Using import * prevents some analysis + * @kind problem + * @tags maintainability + * @problem.severity recommendation + * @sub-severity low + * @deprecated + * @precision medium + * @id py/import-star-used + */ + +import python + +from ImportStar i +select i, "Using 'from ... import *' pollutes the namespace" + diff --git a/python/ql/src/Imports/ImportTwiceOnALine.py b/python/ql/src/Imports/ImportTwiceOnALine.py new file mode 100644 index 00000000000..4a22939b333 --- /dev/null +++ b/python/ql/src/Imports/ImportTwiceOnALine.py @@ -0,0 +1 @@ +import xxx, yyy diff --git a/python/ql/src/Imports/ImportandImportFrom.py b/python/ql/src/Imports/ImportandImportFrom.py new file mode 100644 index 00000000000..9c373d6e58b --- /dev/null +++ b/python/ql/src/Imports/ImportandImportFrom.py @@ -0,0 +1,2 @@ +import os +from os import walk diff --git a/python/ql/src/Imports/ImportandImportFrom.qhelp b/python/ql/src/Imports/ImportandImportFrom.qhelp new file mode 100644 index 00000000000..58dd1ada083 --- /dev/null +++ b/python/ql/src/Imports/ImportandImportFrom.qhelp @@ -0,0 +1,27 @@ + + + + + +

    Importing a module twice using the import xxx and +from xxx import yyy is confusing. +

    + +
    + + +

    Remove the from xxx import yyy statement. +Add yyy = xxx.yyy if required.

    + +
    + + + + + + +
  • Python Language Reference: The import statement.
  • +
    +
    diff --git a/python/ql/src/Imports/ImportandImportFrom.ql b/python/ql/src/Imports/ImportandImportFrom.ql new file mode 100644 index 00000000000..6a12e6b938d --- /dev/null +++ b/python/ql/src/Imports/ImportandImportFrom.ql @@ -0,0 +1,24 @@ +/** + * @name Module is imported with 'import' and 'import from' + * @description A module is imported with the "import" and "import from" statements + * @kind problem + * @tags maintainability + * @problem.severity recommendation + * @sub-severity low + * @precision very-high + * @id py/import-and-import-from + */ + +import python + +predicate import_and_import_from(Import i1, Import i2, Module m) { + i1.getEnclosingModule() = i2.getEnclosingModule() and + exists (ImportExpr e1, ImportExpr e2, ImportMember im | + e1 = i1.getAName().getValue() and im = i2.getAName().getValue() and e2 = im.getModule() | + e1.getName() = m.getName() and e2.getName() = m.getName() + ) +} + +from Stmt i1, Stmt i2, Module m +where import_and_import_from(i1, i2, m) +select i1, "Module '" + m.getName() + "' is imported with both 'import' and 'import from'" diff --git a/python/ql/src/Imports/Imports.qhelp b/python/ql/src/Imports/Imports.qhelp new file mode 100644 index 00000000000..18df8145f26 --- /dev/null +++ b/python/ql/src/Imports/Imports.qhelp @@ -0,0 +1,29 @@ + + + + + +

    Code is easier to read when each import statement is defined on a separate line. +

    + +
    + +

    Update the code so that each import is defined on a separate line. PEP8 notes that it is +acceptable to define multiple imports from a subprocess in a single statement.

    + +
    + +

    The import statement:

    + +

    should be changed to:

    + +
    + + +
  • Python Language Reference: The import statement.
  • +
  • Python PEP 8: Imports.
  • + +
    +
    diff --git a/python/ql/src/Imports/Imports.ql b/python/ql/src/Imports/Imports.ql new file mode 100644 index 00000000000..7adba83cfe4 --- /dev/null +++ b/python/ql/src/Imports/Imports.ql @@ -0,0 +1,27 @@ +/** + * @name Multiple imports on one line + * @description Defining multiple imports on one line makes code more difficult to read; + * PEP8 states that imports should usually be on separate lines. + * @kind problem + * @tags maintainability + * @problem.severity recommendation + * @sub-severity low + * @deprecated + * @precision medium + * @id py/multiple-imports-on-line + */ + +/* Look for imports of the form: +import modA, modB +(Imports should be one per line according PEP 8) +*/ + +import python + +predicate multiple_import(Import imp) { + count(imp.getAName()) > 1 and not imp.isFromImport() +} + +from Import i +where multiple_import(i) +select i, "Multiple imports on one line." diff --git a/python/ql/src/Imports/ModuleImportsItself.py b/python/ql/src/Imports/ModuleImportsItself.py new file mode 100644 index 00000000000..0757e2c1c7f --- /dev/null +++ b/python/ql/src/Imports/ModuleImportsItself.py @@ -0,0 +1,6 @@ +import ModuleImportsItself + +def factorial(n): + if n <= 0: + return 1 + return n * ModuleImportsItself.factorial(n - 1) \ No newline at end of file diff --git a/python/ql/src/Imports/ModuleImportsItself.qhelp b/python/ql/src/Imports/ModuleImportsItself.qhelp new file mode 100644 index 00000000000..1fbad45c149 --- /dev/null +++ b/python/ql/src/Imports/ModuleImportsItself.qhelp @@ -0,0 +1,33 @@ + + + + + +

    There is no need for a module to import itself. A module importing itself may lead to errors as +the module may be in an incomplete state when imported by itself. +

    + +
    + +

    Remove the import statement. +Convert all expressions of the form mod.name where "mod" is the name +of the current module to name.

    + +
    + +

    In this example the module, ModuleImportsItself imports itself and has an expression +referencing the module it is in as well.

    + + +

    The import can be removed and the reference can be corrected.

    + + +
    + + +
  • Python: Modules.
  • + +
    +
    diff --git a/python/ql/src/Imports/ModuleImportsItself.ql b/python/ql/src/Imports/ModuleImportsItself.ql new file mode 100644 index 00000000000..d07d79ed9a3 --- /dev/null +++ b/python/ql/src/Imports/ModuleImportsItself.ql @@ -0,0 +1,22 @@ +/** + * @name Module imports itself + * @description A module imports itself + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/import-own-module + */ + +import python + +predicate modules_imports_itself(Import i, ModuleObject m) { + i.getEnclosingModule() = m.getModule() and + m.importedAs(i.getAnImportedModuleName()) +} + +from Import i, ModuleObject m +where modules_imports_itself(i, m) +select i, "The module '" + m.getName() + "' imports itself." diff --git a/python/ql/src/Imports/ModuleImportsItselfFix.py b/python/ql/src/Imports/ModuleImportsItselfFix.py new file mode 100644 index 00000000000..73f826d202f --- /dev/null +++ b/python/ql/src/Imports/ModuleImportsItselfFix.py @@ -0,0 +1,5 @@ + +def factorial(n): + if n <= 0: + return 1 + return n * factorial(n - 1) \ No newline at end of file diff --git a/python/ql/src/Imports/ModuleLevelCyclicImport.qhelp b/python/ql/src/Imports/ModuleLevelCyclicImport.qhelp new file mode 100644 index 00000000000..30af68d364e --- /dev/null +++ b/python/ql/src/Imports/ModuleLevelCyclicImport.qhelp @@ -0,0 +1,40 @@ + + + +

    A cyclic import is an import which imports another module +and that module imports (possibly indirectly) the module which contains the +import statement. +If all imports in a cyclic import occur at module level, then a module will be +imported when it is part way through its initialization. This may rest in +surprising errors, as parts of the module being imported may not yet exist. +

    + +

    In addition to the possible errors, cyclic imports indicate that two modules +are circularly dependent. This means that the modules cannot be tested +independently, and it makes it harder to understand the architecture of the system. +

    + +
    + + +

    The cycle may be broken by removing any one import. If only one function or +method requires the import, then consider moving that to the other module and +deleting the import. If the two modules are more intimately connected, then move +the inter-dependent parts into a third module and have both the original modules +import that. +

    + + +
    + + + +
  • Python Language Reference: The import statement.
  • +
  • Python: Modules.
  • +
  • Effbot: Import Confusion.
  • + + +
    +
    diff --git a/python/ql/src/Imports/ModuleLevelCyclicImport.ql b/python/ql/src/Imports/ModuleLevelCyclicImport.ql new file mode 100644 index 00000000000..c7dc84e1094 --- /dev/null +++ b/python/ql/src/Imports/ModuleLevelCyclicImport.ql @@ -0,0 +1,31 @@ +/** + * @name Module-level cyclic import + * @description Module uses member of cyclically imported module, which can lead to failure at import time. + * @kind problem + * @tags reliability + * correctness + * types + * @problem.severity error + * @sub-severity low + * @precision high + * @comprehension 0.5 + * @id py/unsafe-cyclic-import + */ + +import python +import Cyclic + +// This is a potentially crashing bug if +// 1. the imports in the whole cycle are lexically outside a def (and so executed at import time) +// 2. there is a use ('M.foo' or 'from M import foo') of the imported module that is lexically outside a def +// 3. 'foo' is defined in M after the import in M which completes the cycle. +// then if we import the 'used' module, we will reach the cyclic import, start importing the 'using' +// module, hit the 'use', and then crash due to the imported symbol not having been defined yet + +from PythonModuleObject m1, Stmt imp, PythonModuleObject m2, string attr, Expr use, ControlFlowNode defn +where failing_import_due_to_cycle(m1, m2, imp, defn, use, attr) +select use, "'" + attr + "' may not be defined if module $@ is imported before module $@, " + +"as the $@ of " + attr + " occurs after the cyclic $@ of " + m2.getName() + ".", +m1, m1.getName(), m2, m2.getName(), defn, "definition", imp, "import" + + \ No newline at end of file diff --git a/python/ql/src/Imports/MultipleImports.py b/python/ql/src/Imports/MultipleImports.py new file mode 100644 index 00000000000..5897abc4d9a --- /dev/null +++ b/python/ql/src/Imports/MultipleImports.py @@ -0,0 +1,3 @@ +import module1 +import module2 +import module1 # Duplicate import diff --git a/python/ql/src/Imports/MultipleImports.qhelp b/python/ql/src/Imports/MultipleImports.qhelp new file mode 100644 index 00000000000..40bbfe4654d --- /dev/null +++ b/python/ql/src/Imports/MultipleImports.qhelp @@ -0,0 +1,24 @@ + + + +

    Importing the same module more than once has no effect as each module is only loaded once. It also +confuses readers of the code.

    + +
    + +

    Remove the second import.

    + +
    + + + + + + +
  • Python: import statement.
  • + + +
    +
    diff --git a/python/ql/src/Imports/MultipleImports.ql b/python/ql/src/Imports/MultipleImports.ql new file mode 100644 index 00000000000..4e5f16779c0 --- /dev/null +++ b/python/ql/src/Imports/MultipleImports.ql @@ -0,0 +1,44 @@ +/** + * @name Module is imported more than once + * @description Importing a module a second time has no effect and impairs readability + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/repeated-import + */ + +import python + +predicate is_simple_import(Import imp) { + not exists(Attribute a | imp.contains(a)) +} + +predicate double_import(Import original, Import duplicate, Module m) { + original != duplicate and + is_simple_import(original) and is_simple_import(duplicate) and + /* Imports import the same thing */ + exists (ImportExpr e1, ImportExpr e2 | e1.getName() = m.getName() and e2.getName() = m.getName() and + e1 = original.getAName().getValue() and e2 = duplicate.getAName().getValue() + ) and + original.getAName().getAsname().(Name).getId() = duplicate.getAName().getAsname().(Name).getId() + and + exists(Module enclosing | + original.getScope() = enclosing and + duplicate.getEnclosingModule() = enclosing and + ( + /* Duplicate is not at top level scope */ + duplicate.getScope() != enclosing + or + /* Original dominates duplicate */ + original.getAnEntryNode().dominates(duplicate.getAnEntryNode()) + ) + ) +} + +from Import original, Import duplicate, Module m +where double_import(original, duplicate, m) +select duplicate, "This import of module " + m.getName() + " is redundant, as it was previously imported $@.", + original, "on line " + original.getLocation().getStartLine().toString() diff --git a/python/ql/src/Imports/SyntaxError.qhelp b/python/ql/src/Imports/SyntaxError.qhelp new file mode 100644 index 00000000000..00b4870e86a --- /dev/null +++ b/python/ql/src/Imports/SyntaxError.qhelp @@ -0,0 +1,39 @@ + + + + +

    Syntax errors prevent a module being evaluated and thus imported. +An attempt to import a module with invalid syntax will fail; a SyntaxError will be raised.

    + +

    A common cause of syntax errors is the difference in syntax between Python 2 +and Python 3. In particular, a syntax error may be alerted if a Python 3 file is +assumed to be compatible with Python 2 (or vice versa). Explicitly specifying +the expected Python version can help prevent this. +

    + +

    The existence of a syntax error in a module may suggest other problems as well. +Either the module is never imported in practice and could be deleted or a +try statement around the import is mistakenly discarding the SyntaxError. +

    + + +
    + +

    Fixing the syntax error is the obvious fix. +However, it is worth investigating why a module containing a syntax error +was able to persist and address that problem as well. +

    +

    If you suspect that the syntax error is caused by the analysis using the +wrong version of Python, consider specifying the version explicitly. For +LGTM.com, you can customize extraction using an lgtm.yml file as +described here. +

    +
    + + +
  • Python Tutorial: SyntaxErrors.
  • + +
    +
    diff --git a/python/ql/src/Imports/SyntaxError.ql b/python/ql/src/Imports/SyntaxError.ql new file mode 100644 index 00000000000..677793a932a --- /dev/null +++ b/python/ql/src/Imports/SyntaxError.ql @@ -0,0 +1,17 @@ +/** + * @name Syntax error + * @description Syntax errors cause failures at runtime and prevent analysis of the code. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity high + * @precision high + * @id py/syntax-error + */ + +import python + +from SyntaxError error +where not error instanceof EncodingError +select error, error.getMessage() + " (in Python " + major_version() + "." + minor_version() + ")." \ No newline at end of file diff --git a/python/ql/src/Imports/UnintentionalImport.py b/python/ql/src/Imports/UnintentionalImport.py new file mode 100644 index 00000000000..272fc9fd046 --- /dev/null +++ b/python/ql/src/Imports/UnintentionalImport.py @@ -0,0 +1,14 @@ +# Example module - finance.py + +__all__ = ['tax1', 'tax2'] #defines the names to import when '*' is used + +tax1 = 5 +tax2 = 10 +def cost(): return 'cost' + +# Imported into code using + +from finance import * + +print tax1 +print tax2 \ No newline at end of file diff --git a/python/ql/src/Imports/UnintentionalImport.qhelp b/python/ql/src/Imports/UnintentionalImport.qhelp new file mode 100644 index 00000000000..8f873931e47 --- /dev/null +++ b/python/ql/src/Imports/UnintentionalImport.qhelp @@ -0,0 +1,45 @@ + + + + + +

    When you import a module using from xxx import * all public names defined in the +module are imported and bound in the local namespace of the import statement. The +public names are determined by checking the __all__ variable for the module. If +__all__ is not defined then all names within the module that do not start with an underscore +character are imported. This pollutes the current namespace with names that are not part of the +public API for the module. +

    + +
    + +

    There are two ways to address this problem:

    +
    • where possible, modify the module being imported from and define __all__ + to restrict the names to be imported
    • +
    • otherwise, explicitly import the values that you need.
    • +
    + +
    + +

    The following simple example shows how __all__ controls the public names for the +module finance.

    + + +

    If the finance module did not include a definition of __all__, then you +could replace from finance import * with from finance import tax1, tax2. +

    + +
    + + +
  • Python Language Reference: The import statement. +
  • +
  • Python Tutorial: Modules.
  • + + + + +
    +
    diff --git a/python/ql/src/Imports/UnintentionalImport.ql b/python/ql/src/Imports/UnintentionalImport.ql new file mode 100644 index 00000000000..3815b04f64a --- /dev/null +++ b/python/ql/src/Imports/UnintentionalImport.ql @@ -0,0 +1,32 @@ +/** + * @name 'import *' may pollute namespace + * @description Importing a module using 'import *' may unintentionally pollute the global + * namespace if the module does not define '__all__' + * @kind problem + * @tags maintainability + * modularity + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/polluting-import + */ + +import python + +predicate import_star(ImportStar imp, ModuleObject exporter) { + exporter.importedAs(imp.getImportedModuleName()) +} + +predicate all_defined(ModuleObject exporter) { + exporter.isC() + or + exporter.getModule().(ImportTimeScope).definesName("__all__") + or + exporter.getModule().getInitModule().(ImportTimeScope).definesName("__all__") +} + + +from ImportStar imp, ModuleObject exporter +where import_star(imp, exporter) and not all_defined(exporter) +select imp, "Import pollutes the enclosing namespace, as the imported module $@ does not define '__all__'.", + exporter, exporter.getName() diff --git a/python/ql/src/Imports/UnusedImport.qhelp b/python/ql/src/Imports/UnusedImport.qhelp new file mode 100644 index 00000000000..8edd5dcc7ec --- /dev/null +++ b/python/ql/src/Imports/UnusedImport.qhelp @@ -0,0 +1,23 @@ + + + + + +

    A module is imported (using the import statement) but that module +is never used. This creates a dependency that does not need to exist and makes the code +more difficult to read. +

    + +
    + +

    Delete the import statement.

    + +
    + + +
  • Python: import statement.
  • + +
    +
    diff --git a/python/ql/src/Imports/UnusedImport.ql b/python/ql/src/Imports/UnusedImport.ql new file mode 100644 index 00000000000..7d6bd1b6805 --- /dev/null +++ b/python/ql/src/Imports/UnusedImport.ql @@ -0,0 +1,74 @@ +/** + * @name Unused import + * @description Import is not required as it is not used + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/unused-import + */ + +import python +import Variables.Definition + +predicate global_name_used(Module m, Variable name) { + exists (Name u, GlobalVariable v | + u.uses(v) and + v.getId() = name.getId() and + u.getEnclosingModule() = m + ) + or + /* A use of an undefined class local variable, will use the global variable */ + exists(Name u, LocalVariable v | + u.uses(v) and + v.getId() = name.getId() and + u.getEnclosingModule() = m and + not v.getScope().getEnclosingScope*() instanceof Function + ) +} + +/** Holds if a module has `__all__` but we don't understand it */ +predicate all_not_understood(Module m) { + exists(GlobalVariable a | + a.getId() = "__all__" and a.getScope() = m | + /* __all__ is not defined as a simple list */ + not m.declaredInAll(_) + or + /* __all__ is modified */ + exists(Call c | c.getFunc().(Attribute).getObject() = a.getALoad()) + ) +} + +predicate unused_import(Import imp, Variable name) { + ((Name)imp.getAName().getAsname()).getVariable() = name + and + not imp.getAnImportedModuleName() = "__future__" + and + not imp.getEnclosingModule().declaredInAll(name.getId()) + and + imp.getScope() = imp.getEnclosingModule() + and + not global_name_used(imp.getScope(), name) + and + /* Imports in __init__.py are used to force module loading */ + not imp.getEnclosingModule().isPackageInit() + and + /* Name may be imported for use in epytext documentation */ + not exists(Comment cmt | + cmt.getText().matches("%L{" + name.getId() + "}%") | + cmt.getLocation().getFile() = imp.getLocation().getFile() + ) + and + not name_acceptable_for_unused_variable(name) + and + /* Assume that opaque `__all__` includes imported module */ + not all_not_understood(imp.getEnclosingModule()) +} + + +from Stmt s, Variable name +where unused_import(s, name) +select s, "Import of '" + name.getId() + "' is not used." + diff --git a/python/ql/src/Imports/from_import.py b/python/ql/src/Imports/from_import.py new file mode 100644 index 00000000000..48151af5fce --- /dev/null +++ b/python/ql/src/Imports/from_import.py @@ -0,0 +1,5 @@ +from sys import stdout + +def main(): + stdout.write("Hello World!") + diff --git a/python/ql/src/Imports/from_import_fixed.py b/python/ql/src/Imports/from_import_fixed.py new file mode 100644 index 00000000000..a93c93f8701 --- /dev/null +++ b/python/ql/src/Imports/from_import_fixed.py @@ -0,0 +1,4 @@ +import sys + +def main(): + sys.stdout.write("Hello World!") diff --git a/python/ql/src/Imports/redirect.py b/python/ql/src/Imports/redirect.py new file mode 100644 index 00000000000..00a4275bfaa --- /dev/null +++ b/python/ql/src/Imports/redirect.py @@ -0,0 +1,11 @@ +import sys + +def redirect_to_file(function, args, kwargs, filename): + with open(filename) as out: + orig_stdout = sys.stdout + sys.stdout = out + try: + function(*args, **kwargs) + finally: + sys.stdout = orig_stdout + diff --git a/python/ql/src/Lexical/CommentedOutCode.py b/python/ql/src/Lexical/CommentedOutCode.py new file mode 100644 index 00000000000..6e7baa6366b --- /dev/null +++ b/python/ql/src/Lexical/CommentedOutCode.py @@ -0,0 +1,4 @@ +def area(r): + #if DEBUG: + # print("Computing area of %r" % r) + return r.length * r.width diff --git a/python/ql/src/Lexical/CommentedOutCode.qhelp b/python/ql/src/Lexical/CommentedOutCode.qhelp new file mode 100644 index 00000000000..0d153665cdc --- /dev/null +++ b/python/ql/src/Lexical/CommentedOutCode.qhelp @@ -0,0 +1,7 @@ + + + + + diff --git a/python/ql/src/Lexical/CommentedOutCode.ql b/python/ql/src/Lexical/CommentedOutCode.ql new file mode 100644 index 00000000000..52633f8e300 --- /dev/null +++ b/python/ql/src/Lexical/CommentedOutCode.ql @@ -0,0 +1,20 @@ +/** + * @name Commented out code + * @description Commented out code causes visual clutter as it is neither code nor comment. + * @kind problem + * @tags maintainability + * readability + * documentation + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/commented-out-code + */ + +import python + +import Lexical.CommentedOutCode + +from CommentedOutCodeBlock c +where not c.maybeExampleCode() +select c, "These comments appear to contain commented-out code." diff --git a/python/ql/src/Lexical/CommentedOutCode.qll b/python/ql/src/Lexical/CommentedOutCode.qll new file mode 100644 index 00000000000..dbb12867466 --- /dev/null +++ b/python/ql/src/Lexical/CommentedOutCode.qll @@ -0,0 +1,337 @@ +import python + + +private predicate def_statement(Comment c) { + c.getText().regexpMatch("#(\\S*\\s+)?def\\s.*\\(.*\\).*:\\s*(#.*)?") +} + +private predicate if_statement(Comment c) { + c.getText().regexpMatch("#(\\S*\\s+)?(el)?if\\s.*:\\s*(#.*)?") + or + c.getText().regexpMatch("#(\\S*\\s+)?else:\\s*(#.*)?") +} + +private predicate for_statement(Comment c) { + c.getText().regexpMatch("#(\\S*\\s+)?for\\s.*\\sin\\s.*:\\s*(#.*)?") +} + +private predicate with_statement(Comment c) { + c.getText().regexpMatch("#(\\S*\\s+)?with\\s+.*:\\s*(#.*)?") +} + +private predicate try_statement(Comment c) { + c.getText().regexpMatch("#(\\S*\\s+)?try:\\s*(#.*)?") + or + c.getText().regexpMatch("#(\\S*\\s+)?except\\s*(\\w+\\s*(\\sas\\s+\\w+\\s*)?)?:\\s*(#.*)?") + or + c.getText().regexpMatch("#(\\S*\\s+)?finally:\\s*(#.*)?") +} + +private int indentation(Comment c) { + exists(int offset | + maybe_code(c) and + exists(c.getText().regexpFind("[^\\s#]", 1, offset)) and + result = offset + c.getLocation().getStartColumn() + ) +} + +private predicate class_statement(Comment c) { + c.getText().regexpMatch("#(\\S*\\s+)?class\\s+\\w+.*:\\s*(#.*)?") +} + +private predicate triple_quote(Comment c) { + c.getText().regexpMatch("#.*(\"\"\"|''').*") +} + +private predicate triple_quoted_string_part(Comment start, Comment end) { + triple_quote(start) and end = start + or + exists(Comment mid | + triple_quoted_string_part(start, mid) and + end = non_empty_following(mid) and + not triple_quote(end) + ) +} + +private predicate maybe_code(Comment c) { + not non_code(c) and not filler(c) and not endline_comment(c) and not file_or_url(c) + or + commented_out_comment(c) +} + +private predicate commented_out_comment(Comment c) { + c.getText().regexpMatch("#+\\s+#.*") +} + +private int scope_start(Comment start) { + ( + def_statement(start) or + class_statement(start) + ) + and + result = indentation(start) + and + not non_code(start) +} + +private int block_start(Comment start) { + ( + if_statement(start) or + for_statement(start) or + try_statement(start) or + with_statement(start) + ) + and + result = indentation(start) + and + not non_code(start) +} + +private int scope_doc_string_part(Comment start, Comment end) { + result = scope_start(start) and + triple_quote(end) and end = non_empty_following(start) + or + exists(Comment mid | + result = scope_doc_string_part(start, mid) and + end = non_empty_following(mid) | + not triple_quote(end) + ) +} + +private int scope_part(Comment start, Comment end) { + result = scope_start(start) and end = start + or + exists(Comment mid | + result = scope_doc_string_part(start, mid) and + end = non_empty_following(mid) and + triple_quote(end) + ) + or + exists(Comment mid | + result = scope_part(start, mid) and + end = non_empty_following(mid) | + indentation(end) > result + ) +} + +private int block_part(Comment start, Comment end) { + result = block_start(start) and + end = non_empty_following(start) and + indentation(end) > result + or + exists(Comment mid | + result = block_part(start, mid) and + end = non_empty_following(mid) | + indentation(end) > result + or + result = block_start(end) + ) +} + +private predicate commented_out_scope_part(Comment start, Comment end) { + exists(scope_doc_string_part(start, end)) + or + exists(scope_part(start, end)) +} + +private predicate commented_out_code(Comment c) { + commented_out_scope_part(c, _) + or + commented_out_scope_part(_, c) + or + exists(block_part(c, _)) + or + exists(block_part(_, c)) +} + +private predicate commented_out_code_part(Comment start, Comment end) { + commented_out_code(start) and end = start and + not exists(Comment prev | + non_empty_following(prev) = start | + commented_out_code(prev) + ) + or + exists(Comment mid | + commented_out_code_part(start, mid) and + non_empty_following(mid) = end and + commented_out_code(end) + ) +} + +private predicate commented_out_code_block(Comment start, Comment end) { + /* A block must be at least 2 comments long. */ + start != end and + commented_out_code_part(start, end) and + not commented_out_code(non_empty_following(end)) +} + +/* A single line comment that appears to be commented out code */ +class CommentedOutCodeLine extends Comment { + + CommentedOutCodeLine () { + exists(CommentedOutCodeBlock b | + b.contains(this) + ) + } + + /* Whether this commented-out code line is likely to be example code embedded in a larger comment. */ + predicate maybeExampleCode() { + exists(CommentedOutCodeBlock block | + block.contains(this) and + block.maybeExampleCode() + ) + } + +} + +/** A block of comments that appears to be commented out code */ +class CommentedOutCodeBlock extends @py_comment { + + CommentedOutCodeBlock() { + commented_out_code_block(this, _) + } + + string toString() { + result = "Commented out code" + } + + /** Whether this commented-out code block contains the comment c */ + predicate contains(Comment c) { + this = c + or + exists(Comment prev | + non_empty_following(prev) = c and + not commented_out_code_block(this, prev) and + this.contains(prev) + ) + } + + /** The length of this comment block (in comments) */ + int length() { + result = count(Comment c | this.contains(c)) + } + + predicate hasLocationInfo(string filepath, int bl, int bc, int el, int ec) { + ((Comment)this).getLocation().hasLocationInfo(filepath, bl, bc, _, _) + and + exists(Comment end | + commented_out_code_block(this, end) | + end.getLocation().hasLocationInfo(_, _, _, el, ec) + ) + } + + /** Whether this commented-out code block is likely to be example code embedded in a larger comment. */ + predicate maybeExampleCode() { + exists(CommentBlock block | + block.contains((Comment)this) | + exists(int all_code | + all_code = sum (CommentedOutCodeBlock code | block.contains((Comment)code) | code.length()) + and + /* This ratio may need fine tuning */ + block.length() > all_code*2 + ) + ) + } +} + +/** Does c contain the pair of words "s1 s2" with only whitespace between them */ +private predicate word_pair(Comment c, string s1, string s2) { + exists(int i1, int i2, int o1, int o2 | + s1 = c.getText().regexpFind("\\w+", i1, o1) and + s2 = c.getText().regexpFind("\\w+", i2, o2) and + i2 = i1 + 1 and + c.getText().prefix(o1).regexpMatch("[^'\"]*") and + c.getText().substring(o1 + s1.length(), o2).regexpMatch("\\s+") + ) +} + +/** The comment c cannot be code if it contains a word pair "word1 word2" and + * either: + * 1. word1 is not a keyword and word2 is not an operator: + * "x is" could be code, "return y" could be code, but "isnt code" cannot be code. + * or + * 2. word1 is a keyword requiring a colon and there is no colon: + * "with spam" can only be code if the comment contains a colon. + */ +private predicate non_code(Comment c) { + exists(string word1, string word2 | + word_pair(c, word1, word2) and + not word2 = operator_keyword() + | + not word1 = a_keyword() + or + word1 = keyword_requiring_colon() and not c.getText().matches("%:%") + ) and + /* Except comments of the form: # (maybe code) # some comment */ + not c.getText().regexpMatch("#\\S+\\s.*#.*") + or + /* Don't count doctests as code */ + c.getText().matches("%>>>%") or c.getText().matches("%...%") +} + +private predicate filler(Comment c) { + c.getText().regexpMatch("#+[\\s*#-_=+]*") +} + +/** Gets the first non empty comment following c */ +private Comment non_empty_following(Comment c) { + not empty(result) and + ( + result = empty_following(c).getFollowing() + or + not empty(c) and result = c.getFollowing() + ) +} + +/* Helper for non_empty_following() */ +private Comment empty_following(Comment c) { + not empty(c) and + empty(result) + and + exists(Comment prev | + result = prev.getFollowing() | + prev = c + or + prev = empty_following(c) + ) +} + +private predicate empty(Comment c) { + c.getText().regexpMatch("#+\\s*") +} + +/* A comment following code on the same line */ +private predicate endline_comment(Comment c) { + exists(Expr e, string f, int line | + e.getLocation().hasLocationInfo(f, line, _, _, _) and + c.getLocation().hasLocationInfo(f, line, _, _, _) + ) +} + +private predicate file_or_url(Comment c) { + c.getText().regexpMatch("#[^'\"]+(https?|file)://.*") or + c.getText().regexpMatch("#[^'\"]+(/[a-zA-Z]\\w*)+\\.[a-zA-Z]+.*") or + c.getText().regexpMatch("#[^'\"]+(\\[a-zA-Z]\\w*)+\\.[a-zA-Z]+.*") +} + +private string operator_keyword() { + result = "import" or result = "and" or result = "is" or result = "or" or result = "in" or result = "not" or result = "as" +} + +private string keyword_requiring_colon() { + result = "try" or result = "while" or result = "elif" or result = "else" or result = "if" or + result = "except" or result = "def" or result = "class" +} + +private string other_keyword() { + result = "del" or result = "lambda" or result = "from" or + result = "global" or result = "with" or result = "assert" or + result = "yield" or result = "finally" or + result = "print" or + result = "exec" or result = "raise" or + result = "return" or result = "for" +} + +private string a_keyword() { + result = keyword_requiring_colon() or result = other_keyword() or result = operator_keyword() +} diff --git a/python/ql/src/Lexical/CommentedOutCodeCommon.qhelp b/python/ql/src/Lexical/CommentedOutCodeCommon.qhelp new file mode 100644 index 00000000000..aeae8991726 --- /dev/null +++ b/python/ql/src/Lexical/CommentedOutCodeCommon.qhelp @@ -0,0 +1,28 @@ + + + + +

    +Remove the commented-out code, or reinstate it if necessary. If you want to include a snippet +of example code in a comment, consider adding an @example tag or enclosing the code +in a code or pre element. +

    + +
    + +

    +In the following example, a print statement, originally used +for debugging, is left in the code, but commented out. It should be removed altogether. +

    + + + +
    + + +
  • Los Techies: Commented Code == Technical Debt.
  • + +
    +
    diff --git a/python/ql/src/Lexical/FCommentedOutCode.qhelp b/python/ql/src/Lexical/FCommentedOutCode.qhelp new file mode 100644 index 00000000000..2caeaf5cdf3 --- /dev/null +++ b/python/ql/src/Lexical/FCommentedOutCode.qhelp @@ -0,0 +1,7 @@ + + + + + diff --git a/python/ql/src/Lexical/FCommentedOutCode.ql b/python/ql/src/Lexical/FCommentedOutCode.ql new file mode 100644 index 00000000000..2f6ee0741c6 --- /dev/null +++ b/python/ql/src/Lexical/FCommentedOutCode.ql @@ -0,0 +1,20 @@ +/** + * @name Lines of commented-out code in files + * @description The number of lines of commented out code per file + * @kind treemap + * @treemap.warnOn highValues + * @metricType file + * @precision high + * @tags maintainability + * @id py/lines-of-commented-out-code-in-files + */ + +import python +import Lexical.CommentedOutCode + +import python + +from File f, int n +where n = count(CommentedOutCodeLine c | not c.maybeExampleCode() and c.getLocation().getFile() = f) +select f, n +order by n desc diff --git a/python/ql/src/Lexical/OldOctalLiteral.py b/python/ql/src/Lexical/OldOctalLiteral.py new file mode 100644 index 00000000000..ad15ab8889b --- /dev/null +++ b/python/ql/src/Lexical/OldOctalLiteral.py @@ -0,0 +1,12 @@ + +#Easily misread as x = 15 +x = 015 + +#The extra 'o' alerts the reader that this is an octal literal +y = 0o15 + +#If this is a byte sized value then a hexadecimal might be clearer +y = 0x0d + +#Or if it is a bit pattern then a binary value might be clearer +y = 0b1101 diff --git a/python/ql/src/Lexical/OldOctalLiteral.qhelp b/python/ql/src/Lexical/OldOctalLiteral.qhelp new file mode 100644 index 00000000000..f44bbddbc07 --- /dev/null +++ b/python/ql/src/Lexical/OldOctalLiteral.qhelp @@ -0,0 +1,35 @@ + + + +

    +Octal literals starting with 0 are easily misread as a decimal, +particularly by those programmers who do not have a C or Java background. +

    + +

    +The new literal syntax for non-decimal numbers is more distinct and is thus less likely to be misunderstood. +

    + +
    + + +

    +Use the 0oXXX form instead of the 0XXX form. Alternatively use binary or hexadecimal format if that would be clearer. +

    + +
    + + + + + + + +
  • Python Language Reference: Integer Literals.
  • +
  • Python PEP 3127: Integer Literal Support and Syntax.
  • + + +
    +
    diff --git a/python/ql/src/Lexical/OldOctalLiteral.ql b/python/ql/src/Lexical/OldOctalLiteral.ql new file mode 100644 index 00000000000..af0ee723c10 --- /dev/null +++ b/python/ql/src/Lexical/OldOctalLiteral.ql @@ -0,0 +1,31 @@ +/** + * @name Confusing octal literal + * @description Octal literal with a leading 0 is easily misread as a decimal value + * @kind problem + * @tags readability + * @problem.severity recommendation + * @sub-severity low + * @precision high + * @id py/old-style-octal-literal + */ + +import python + +predicate is_old_octal(IntegerLiteral i) { + exists(string text | + text = i.getText() | + text.charAt(0) = "0" and + not text = "00" and + exists(text.charAt(1).toInt()) and + /* Do not flag file permission masks */ + exists(int len | len = text.length() | + len != 4 and + len != 5 and + len != 7 + ) + ) +} + +from IntegerLiteral i +where is_old_octal(i) +select i, "Confusing octal literal, use 0o" + i.getText().suffix(1) + " instead." diff --git a/python/ql/src/Lexical/ToDoComment.py b/python/ql/src/Lexical/ToDoComment.py new file mode 100644 index 00000000000..3cc021e6d15 --- /dev/null +++ b/python/ql/src/Lexical/ToDoComment.py @@ -0,0 +1,8 @@ +def realpath(path): + ''' + Returns the true, canonical file system path equivalent to the given + path. + ''' + # TODO: There may be a more clever way to do this that also handles other, + # less common file systems. + return os.path.normpath(normcase(os.path.realpath(path))) \ No newline at end of file diff --git a/python/ql/src/Lexical/ToDoComment.qhelp b/python/ql/src/Lexical/ToDoComment.qhelp new file mode 100644 index 00000000000..2b5154d5924 --- /dev/null +++ b/python/ql/src/Lexical/ToDoComment.qhelp @@ -0,0 +1,53 @@ + + + + +

    A comment that includes the word TODO often marks a part of +the code that is incomplete or broken, or highlights ambiguities in the +software's specification.

    + +

    For example, this list of comments is typical of those found in real +programs:

    + +
      +
    • TODO: move this code somewhere else
    • +
    • TODO: find a better solution to this workaround
    • +
    • TODO: test this
    • +
    + +
    + + +

    It is very important that TODO comments are +not just removed from the code. Each of them must be addressed in some way.

    + +

    Simpler comments can usually be immediately addressed by fixing the code, +adding a test, doing some refactoring, or clarifying the intended behavior of +a feature.

    + +

    In contrast, larger issues may require discussion, and a significant amount +of work to address. In these cases it is a good idea to move the comment to an +issue-tracking system, so that the issue can be tracked +and prioritized relative to other defects and feature requests.

    + +
    + +

    The following example shows a function where a TODO comment indicates a known limitation in the +existing implementation. The function should be reviewed, the limitation addressed and then the +comment deleted.

    + + + +
    + + +
  • + Wikipedia: + Comment tags. +
  • + + +
    +
    diff --git a/python/ql/src/Lexical/ToDoComment.ql b/python/ql/src/Lexical/ToDoComment.ql new file mode 100644 index 00000000000..b8bcf98ada8 --- /dev/null +++ b/python/ql/src/Lexical/ToDoComment.ql @@ -0,0 +1,21 @@ +/** + * @name 'To Do' comment + * @description Writing comments that include 'TODO' tends to lead to a build up of partially + * implemented features. + * @kind problem + * @tags maintainability + * readability + * documentation + * external/cwe/cwe-546 + * @problem.severity recommendation + * @sub-severity low + * @deprecated + * @precision medium + * @id py/todo-comment + */ + +import python + +from Comment c +where c.getText().matches("%TODO%") or c.getText().matches("%TO DO%") +select c, c.getText() diff --git a/python/ql/src/Metrics/CLinesOfCode.qhelp b/python/ql/src/Metrics/CLinesOfCode.qhelp new file mode 100644 index 00000000000..666bd473cb4 --- /dev/null +++ b/python/ql/src/Metrics/CLinesOfCode.qhelp @@ -0,0 +1,23 @@ + + + +

    This metric measures the number of lines of code in a function. This excludes comments and blank lines.

    + +

    Having too many lines of code in a function is an indication that it can be split into several functions of more manageable size.

    + +
    + + +

    Long functions should be examined to see if they can be split into smaller, more cohesive functions.

    + +
    + + +
  • M. Fowler, Refactoring. Addison-Wesley, 1999.
  • +
  • Wikipedia: Code refactoring.
  • + + +
    +
    diff --git a/python/ql/src/Metrics/CLinesOfCode.ql b/python/ql/src/Metrics/CLinesOfCode.ql new file mode 100644 index 00000000000..5c5453fb76a --- /dev/null +++ b/python/ql/src/Metrics/CLinesOfCode.ql @@ -0,0 +1,15 @@ +/** + * @name Lines of code in functions + * @description The number of lines of code in a function. + * @kind treemap + * @id py/lines-of-code-per-function + * @treemap.warnOn highValues + * @metricType callable + * @metricAggregate avg sum max + * @tags maintainability + */ +import python + +from Function f +select f, f.getMetrics().getNumberOfLinesOfCode() as n +order by n desc \ No newline at end of file diff --git a/python/ql/src/Metrics/ClassAfferentCoupling.qhelp b/python/ql/src/Metrics/ClassAfferentCoupling.qhelp new file mode 100644 index 00000000000..a1e2792c046 --- /dev/null +++ b/python/ql/src/Metrics/ClassAfferentCoupling.qhelp @@ -0,0 +1,80 @@ + + + +

    +This metric measures the number of incoming dependencies for each +class, that is the number of other classes that depend on it. +

    + +

    +Classes that are depended upon by many other classes typically require a lot of +effort to change, because changing them will force their dependents to change +as well. This is not necessarily a bad thing -- indeed, most systems will have +some such classes (one example might be a string class). However, classes with a high number +of incoming dependencies +and a high number of outgoing dependencies are hard to maintain. A class with both high afferent +coupling and high efferent coupling is referred to as a hub class. +Such classes can be problematic, because on the one hand they are hard to +change (high afferent coupling), yet on the other they have many reasons to +change (high efferent coupling). This contradiction yields code that is very +hard to maintain or test. +

    + +

    +Conversely, some classes may only be depended on by very few other classes. Again, +this is not necessarily a problem -- we would expect, for example, that the +top-level classes of a system would meet this criterion. When lower-level +classes have very few incoming dependencies, however, it can be an indication +that a class is not pulling its weight. In extreme cases, classes may even +have an afferent coupling of 0, indicating that they are dead +code. +

    + +
    + + +

    +It is unwise to refactor a class based purely on its high or low number of +incoming dependencies -- a class's afferent coupling value only makes sense +in the context of its role in the system as a whole. However, when combined +with other metrics such as efferent coupling, it is possible to make some +general recommendations: +

    + +
      +
    • +Classes with high numbers of incoming and outgoing dependencies +are hub classes that are prime candidates for refactoring (although this +will not always be easy). The general strategy is to split the class into +smaller classes that each have fewer responsibilities, and refactor the code +that previously used the hub class accordingly. +
    • + +
    • +Classes that have very few incoming dependencies and are not at the top level +of a system may not be pulling their weight and should be refactored, e.g. +using the 'Collapse Hierarchy' or 'Inline Class' techniques in [Fowler] +(see the section entitled 'Lazy Class' on p.68). +
    • + +
    • +Classes that have an afferent coupling of 0 may be dead code -- +in this situation, they can often be deleted. +
    • +
    + + + +
    + + + +
  • +M. Fowler. Refactoring. Addison-Wesley, 1999. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/ClassAfferentCoupling.ql b/python/ql/src/Metrics/ClassAfferentCoupling.ql new file mode 100644 index 00000000000..5fd2ec4c16f --- /dev/null +++ b/python/ql/src/Metrics/ClassAfferentCoupling.ql @@ -0,0 +1,18 @@ +/** + * @name Incoming class dependencies + * @description The number of classes that depend on a class. + * @kind treemap + * @id py/afferent-coupling-per-class + * @treemap.warnOn highValues + * @metricType reftype + * @metricAggregate avg max + * @tags changeability + * modularity + */ + +import python + +from ClassMetrics cls +select cls, cls.getAfferentCoupling() as n +order by n desc + diff --git a/python/ql/src/Metrics/ClassEfferentCoupling.py b/python/ql/src/Metrics/ClassEfferentCoupling.py new file mode 100644 index 00000000000..34638826234 --- /dev/null +++ b/python/ql/src/Metrics/ClassEfferentCoupling.py @@ -0,0 +1,10 @@ +class X: + + def iUseY(y): + y.doStuff() + + def soDoY(): + return Y() + + def iUseZ(z1, z2): + return z1.combine(z2) diff --git a/python/ql/src/Metrics/ClassEfferentCoupling.qhelp b/python/ql/src/Metrics/ClassEfferentCoupling.qhelp new file mode 100644 index 00000000000..b28d6e5fb2e --- /dev/null +++ b/python/ql/src/Metrics/ClassEfferentCoupling.qhelp @@ -0,0 +1,60 @@ + + + +

    +Efferent coupling is the number of outgoing dependencies for each class. In other words, it is the +number of other classes on which each class depends. +

    + +

    +A class that depends on many other classes is quite brittle, because if any of +its dependencies change, the class itself may have to change as well. Furthermore, the +reason for the high number of dependencies is often that different parts of +the class depend on different groups of other classes, so it is common to +find that classes with high efferent coupling also lack cohesion. +

    + +
    + + +

    +You can reduce efferent coupling by splitting up a class so that each part depends on fewer classes. +

    + +
    + + +

    In the following example, class X depends on both Y and +Z. +

    + + + +

    However, the methods that use Y do not use Z, and the methods +that use Z do not use Y. Therefore, the class can be split into +two classes, one of which depends only on Y and the other only on Z

    + + + +

    +Although this is a slightly artificial example, this sort of situation +does tend to occur in more complicated classes, +so the general technique is quite widely applicable. +

    + +
    + + + +
  • +IBM developerWorks: Evolutionary architecture and emergent design: Emergent design through metrics. +
  • +
  • +R. Martin, Agile Software Development: Principles, Patterns and Practices. Pearson, 2011. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/ClassEfferentCoupling.ql b/python/ql/src/Metrics/ClassEfferentCoupling.ql new file mode 100644 index 00000000000..d8d9dabd5dd --- /dev/null +++ b/python/ql/src/Metrics/ClassEfferentCoupling.ql @@ -0,0 +1,18 @@ +/** + * @name Outgoing class dependencies + * @description The number of classes that this class depends upon. + * @kind treemap + * @id py/efferent-coupling-per-class + * @treemap.warnOn highValues + * @metricType reftype + * @metricAggregate avg max + * @tags testability + * modularity + */ + +import python + +from ClassMetrics cls +select cls, cls.getEfferentCoupling() as n +order by n desc + diff --git a/python/ql/src/Metrics/ClassEfferentCouplingGood.py b/python/ql/src/Metrics/ClassEfferentCouplingGood.py new file mode 100644 index 00000000000..1131ee6b5dd --- /dev/null +++ b/python/ql/src/Metrics/ClassEfferentCouplingGood.py @@ -0,0 +1,12 @@ +class YX: + + def iUseY(y): + y.doStuff() + + def soDoY(): + return Y() + +class ZX: + + def iUseZ(z1, z2): + return z1.combine(z2) diff --git a/python/ql/src/Metrics/CommentRatio.qhelp b/python/ql/src/Metrics/CommentRatio.qhelp new file mode 100644 index 00000000000..32271ccbb85 --- /dev/null +++ b/python/ql/src/Metrics/CommentRatio.qhelp @@ -0,0 +1,34 @@ + + + +

    This metric measures the percentage of lines in a file that contain a comment or are part of a +multi-line comment. Note that this metric ignores docstrings.

    + +

    The percentage of comment lines should always be considered with the value for the related metric +"Percentage of docstrings". For public modules, functions, classes and methods docstrings are the +preferred method of documentation because the information can be inspected by the program at runtime, +for example, as an interactive help system or as metadata for a function.

    + +

    Having a low percentage of comments and docstrings is an indication that a file does not have +sufficient documentation. Undocumented code is difficult to understand, modify, and reuse.

    + +
    + +

    Add documentation to files with a low comment and docstring ratio. Use docstrings to document +public modules, functions, classes and methods.

    + +
    + + +
  • Wikipedia: +Need for comments.
  • +
  • Python PEP 8: Comments.
  • +
  • Python for Beginners: +Python Docstrings.
  • +
  • Python PEP 257: Docstring Conventions.
  • + + +
    +
    diff --git a/python/ql/src/Metrics/CommentRatio.ql b/python/ql/src/Metrics/CommentRatio.ql new file mode 100644 index 00000000000..3f04da28283 --- /dev/null +++ b/python/ql/src/Metrics/CommentRatio.ql @@ -0,0 +1,18 @@ +/** + * @name Percentage of comments + * @description The percentage of lines in a file that contain comments. Note that docstrings are + * reported by a separate metric. + * @kind treemap + * @id py/comment-ratio-per-file + * @treemap.warnOn lowValues + * @metricType file + * @metricAggregate avg max + * @tags maintainability + * documentation + */ +import python + +from Module m, ModuleMetrics mm +where mm = m.getMetrics() and mm.getNumberOfLines() > 0 +select m, 100.0 * ((float)mm.getNumberOfLinesOfComments() / (float)mm.getNumberOfLines()) as ratio +order by ratio desc diff --git a/python/ql/src/Metrics/CyclomaticComplexity.qhelp b/python/ql/src/Metrics/CyclomaticComplexity.qhelp new file mode 100644 index 00000000000..0335580c4b8 --- /dev/null +++ b/python/ql/src/Metrics/CyclomaticComplexity.qhelp @@ -0,0 +1,38 @@ + + + + + +

    This metric measures the total cyclomatic complexity for the functions in a file. +

    + +

    +Cyclomatic complexity approximates the number of paths that can be taken during the execution of a +function (and hence, the minimum number of tests cases necessary to test it thoroughly). Straight-line +code has zero cyclomatic complexity, while branches and loops increase cyclomatic complexity.

    + +

    Files that contain too many complex functions can be difficult to test, understand, and maintain.

    + +
    + +

    Try to simplify overly-complex code. For example:

    + +
    • Highly nested conditionals can be simplified by rethinking the requirements that the function fulfills.
    • +
    • Repeated tests can be refactored into helper functions, which also decreases the risk of +introducing defects by copying and pasting code.
    • +
    • Large complex functions can often be split into smaller more focused functions.
    • +
    + + +
    + + +
  • M. Fowler. Refactoring. Addison-Wesley, 1999.
  • +
  • T. J. McCabe. A Complexity Measure. IEEE Transactions on Software Engineering, SE-2(4), +December 1976.
  • +
  • Wikipedia: Cyclomatic complexity.
  • + +
    +
    diff --git a/python/ql/src/Metrics/CyclomaticComplexity.ql b/python/ql/src/Metrics/CyclomaticComplexity.ql new file mode 100644 index 00000000000..c5ab9858202 --- /dev/null +++ b/python/ql/src/Metrics/CyclomaticComplexity.ql @@ -0,0 +1,19 @@ +/** + * @name Cyclomatic complexity of functions + * @description The cyclomatic complexity per function (an indication of how many tests are necessary, + * based on the number of branching statements). + * @kind treemap + * @id py/cyclomatic-complexity-per-function + * @treemap.warnOn highValues + * @metricType callable + * @metricAggregate avg max sum + * @tags testability + * complexity + * maintainability + */ +import python + +from Function func, int complexity +where complexity = func.getMetrics().getCyclomaticComplexity() +select func, complexity +order by complexity desc \ No newline at end of file diff --git a/python/ql/src/Metrics/Dependencies/ExternalDependencies.ql b/python/ql/src/Metrics/Dependencies/ExternalDependencies.ql new file mode 100644 index 00000000000..49506b0a0f9 --- /dev/null +++ b/python/ql/src/Metrics/Dependencies/ExternalDependencies.ql @@ -0,0 +1,44 @@ +/** + * @name External dependencies + * @description Count the number of dependencies that a Python source file has on external packages. + * @kind treemap + * @treemap.warnOn highValues + * @metricType externalDependency + * @precision medium + * @id py/external-dependencies + */ + +import python +import semmle.python.dependencies.TechInventory + +/* + * These two columns encode four logical columns: + * + * 1. Python source file where the dependency originates + * 2. Package Object, ideally referring to a PyPI or similar externally provided package + * 3. Version of that package Object, if known + * 4. Number of dependencies from the source file to the package + * + * Ideally this query would therefore return three columns, + * but this would require changing the dashboard database schema + * and dashboard extractor. + * + * The first column (the Python source file) is prepended with a '/' + * so that the file path matches the path used for the file in the + * dashboard database, which is implicitly relative to the source + * archive location. + */ + +predicate src_package_count(File sourceFile, ExternalPackage package, int total) { + total = strictcount(AstNode src | + dependency(src, package) and + src.getLocation().getFile() = sourceFile + ) +} + +from File sourceFile, int total, string entity, ExternalPackage package +where +src_package_count(sourceFile, package, total) and +entity = munge(sourceFile, package) +select entity, total +order by total desc diff --git a/python/ql/src/Metrics/Dependencies/ExternalDependenciesSourceLinks.ql b/python/ql/src/Metrics/Dependencies/ExternalDependenciesSourceLinks.ql new file mode 100644 index 00000000000..3129edd6328 --- /dev/null +++ b/python/ql/src/Metrics/Dependencies/ExternalDependenciesSourceLinks.ql @@ -0,0 +1,26 @@ +/** + * @name External dependency source links + * @kind source-link + * @metricType externalDependency + * @id py/dependency-source-links + */ + +import python +import semmle.python.dependencies.TechInventory + +/* + * This query creates the source links for the ExternalDependencies.ql query. + * Although the entities in question are of the form '/file/path<|>dependency', the + * /file/path is a bare string relative to the root of the source archive, and not + * tied to a particular revision. We need the File entity (the second column here) to + * recover that information once we are in the dashboard database, using the + * ExternalEntity.getASourceLink() method. + */ +from File sourceFile, string entity +where + exists(PackageObject package, AstNode src | + dependency(src, package) and + src.getLocation().getFile() = sourceFile and + entity = munge(sourceFile, package) + ) +select entity, sourceFile diff --git a/python/ql/src/Metrics/DirectImports.qhelp b/python/ql/src/Metrics/DirectImports.qhelp new file mode 100644 index 00000000000..db74ac2076f --- /dev/null +++ b/python/ql/src/Metrics/DirectImports.qhelp @@ -0,0 +1,26 @@ + + + +

    This metric measures the number of modules that are directly imported by each module (file). +Modules that import many other modules often have too many responsibilities and are not well-focused. +This makes it difficult to understand and maintain the module. +

    + +
    + +

    Split and/or refactor files with too many responsibilities to create modules with a single, +well-defined role.

    + + +
    + + +
  • Python Language Reference: The import statement. +
  • M. Fowler, Refactoring. Addison-Wesley, 1999.
  • +
  • Wikipedia: Code refactoring.
  • + + +
    +
    diff --git a/python/ql/src/Metrics/DirectImports.ql b/python/ql/src/Metrics/DirectImports.ql new file mode 100644 index 00000000000..1eeb7694879 --- /dev/null +++ b/python/ql/src/Metrics/DirectImports.ql @@ -0,0 +1,16 @@ +/** + * @name Direct imports per file + * @description The number of modules directly imported by this file. + * @kind treemap + * @id py/direct-imports-per-file + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg max + * @tags modularity + * maintainability + */ +import python + +from ModuleObject m, int n +where n = count(ModuleObject imp | imp = m.getAnImportedModule()) +select m.getModule(), n \ No newline at end of file diff --git a/python/ql/src/Metrics/DocStringRatio.qhelp b/python/ql/src/Metrics/DocStringRatio.qhelp new file mode 100644 index 00000000000..4e124223d15 --- /dev/null +++ b/python/ql/src/Metrics/DocStringRatio.qhelp @@ -0,0 +1,35 @@ + + + +

    This metric measures the percentage of lines in a file that contain a docstring. Note that this +metric ignores comments. + +

    Docstrings are a good way to associate documentation with a specific object in Python. For public +modules, functions, classes and methods docstrings are the preferred method of documentation because +the information can be inspected by the program at runtime, for example, as an interactive help system +or as metadata for a function.

    + +

    Having a low percentage of docstrings is often an indication that a file has insufficient +documentation. However, the value for the related metric "Percentage of comments" should also be +considered because packages and non-public methods may be documented using comments. Undocumented +code is difficult to understand, modify, and reuse.

    + +
    + +

    Add documentation to files with a low docstring ratio. It is most useful to start documenting +the public functions first.

    + +
    + + +
  • Python for Beginners: +Python Docstrings.
  • +
  • Python PEP 8: Documentation +Strings.
  • +
  • Python PEP 257: Docstring Conventions.
  • + + +
    +
    diff --git a/python/ql/src/Metrics/DocStringRatio.ql b/python/ql/src/Metrics/DocStringRatio.ql new file mode 100644 index 00000000000..43d8d7af248 --- /dev/null +++ b/python/ql/src/Metrics/DocStringRatio.ql @@ -0,0 +1,17 @@ +/** + * @name Percentage of docstrings + * @description The percentage of lines in a file that contain docstrings. + * @kind treemap + * @id py/doc-string-ratio-per-file + * @treemap.warnOn lowValues + * @metricType file + * @metricAggregate avg max + * @tags maintainability + * documentation + */ +import python + +from Module m, ModuleMetrics mm +where mm = m.getMetrics() and mm.getNumberOfLines() > 0 +select m, 100.0 * ((float)mm.getNumberOfLinesOfDocStrings() / (float)mm.getNumberOfLines()) as ratio +order by ratio desc diff --git a/python/ql/src/Metrics/DuplicationProblems.qhelp b/python/ql/src/Metrics/DuplicationProblems.qhelp new file mode 100644 index 00000000000..e55f8f8e455 --- /dev/null +++ b/python/ql/src/Metrics/DuplicationProblems.qhelp @@ -0,0 +1,17 @@ + + + +

    +Duplicated code increases overall code size, making the code base +harder to maintain and harder to understand. It also becomes harder to fix bugs, +since a programmer applying a fix to one copy has to always remember to update +other copies accordingly. Finally, code duplication is generally an indication of +a poorly designed or hastily written code base, which typically suffers from other +problems as well. +

    + + +
    +
    diff --git a/python/ql/src/Metrics/External/CommitDisplayStrings.ql b/python/ql/src/Metrics/External/CommitDisplayStrings.ql new file mode 100644 index 00000000000..dd5104996d0 --- /dev/null +++ b/python/ql/src/Metrics/External/CommitDisplayStrings.ql @@ -0,0 +1,10 @@ +/** + * @name Display strings of commits + * @kind display-string + * @id py/commit-display-strings + * @metricType commit + */ +import python +import external.VCS +from Commit c +select c.getRevisionName(), c.getMessage() + "(" + c.getDate().toString() + ")" diff --git a/python/ql/src/Metrics/External/CommitSourceLinks.ql b/python/ql/src/Metrics/External/CommitSourceLinks.ql new file mode 100644 index 00000000000..a31b73e2a7c --- /dev/null +++ b/python/ql/src/Metrics/External/CommitSourceLinks.ql @@ -0,0 +1,11 @@ +/** + * @name Source links of commits + * @kind source-link + * @id py/commit-source-links + * @metricType commit + */ +import python +import external.VCS +from Commit c, File f +where f.fromSource() and f = c.getAnAffectedFile() +select c.getRevisionName(), f diff --git a/python/ql/src/Metrics/FClasses.qhelp b/python/ql/src/Metrics/FClasses.qhelp new file mode 100644 index 00000000000..2584ef06b5d --- /dev/null +++ b/python/ql/src/Metrics/FClasses.qhelp @@ -0,0 +1,41 @@ + + + +

    This metric measures the number of classes in each file.

    + +

    There are advantages and disadvantages associated with defining multiple classes in the same file. +However, if you define unrelated classes in one file then the resulting module API is difficult for +other developers to understand and use.

    + +

    The disadvantages of putting multiple classes in the same file include:

    +
    • unless the classes are closely related, it can be difficult to understand and maintain the code, +even with good support from development tools
    • +
    • it increases the risk that multiple developers will work on the same file at once, and increases the +incidence of merge conflicts
    • +
    • it may be a symptom of badly designed modules, where many different features are handled by a +single file.
    • +
    + +

    Sometimes there are advantages of putting multiple classes in the same file, for example:

    +
    • it reduces the proliferation of files containing very few lines of code
    • +
    • it can be used to group logically-related classes together.
    + +
    + +

    Each module should have a single, well-defined role. Consequently, only logically-related classes +should be grouped together in the same file. If your code defines unrelated classes in the same file +then you should refactor the code and create new files, each containing logically related classes.

    + +
    + + +
  • Python: Class +Definitions.
  • +
  • M. Fowler, Refactoring. Addison-Wesley, 1999.
  • +
  • Wikipedia: Code refactoring.
  • + + +
    +
    diff --git a/python/ql/src/Metrics/FClasses.ql b/python/ql/src/Metrics/FClasses.ql new file mode 100644 index 00000000000..da667bd1df5 --- /dev/null +++ b/python/ql/src/Metrics/FClasses.ql @@ -0,0 +1,17 @@ +/** + * @name Classes per file + * @description Measures the number of classes in a file + * @kind treemap + * @id py/classes-per-file + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg sum max + * @tags maintainability + */ + +import python + +from Module m, int n +where n = count(Class c | c.getEnclosingModule() = m) +select m, n +order by n desc diff --git a/python/ql/src/Metrics/FFunctionsAndMethods.qhelp b/python/ql/src/Metrics/FFunctionsAndMethods.qhelp new file mode 100644 index 00000000000..67bd594f3b8 --- /dev/null +++ b/python/ql/src/Metrics/FFunctionsAndMethods.qhelp @@ -0,0 +1,27 @@ + + + +

    This metric measures the number of functions and methods in each file.

    + +

    Tracking this metric over time will indicate which parts of the system are under active development. +Cross-referencing with the other metrics "Cyclomatic Complexity" and "Lines of Code" is recommended, +because files with high values for all three metrics are very likely to be too big and unwieldy; such +files should be split up.

    + +
    + +

    If a file is too big, identify the different tasks that are carried out by its functions and split +the file according to these tasks.

    + +
    + + +
  • Python: Function Definitions.
  • +
  • M. Fowler, Refactoring. Addison-Wesley, 1999.
  • +
  • Wikipedia: Code refactoring.
  • + + +
    +
    diff --git a/python/ql/src/Metrics/FFunctionsAndMethods.ql b/python/ql/src/Metrics/FFunctionsAndMethods.ql new file mode 100644 index 00000000000..b8d3a43b1dd --- /dev/null +++ b/python/ql/src/Metrics/FFunctionsAndMethods.ql @@ -0,0 +1,17 @@ +/** + * @name Functions and methods per file + * @description Measures the number of functions and methods in a file. + * @kind treemap + * @id py/functions-and-methods-per-file + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg sum max + * @tags maintainability + */ + +import python + +from Module m, int n +where n = count(Function f | f.getEnclosingModule() = m and f.getName() != "lambda") +select m, n +order by n desc diff --git a/python/ql/src/Metrics/FLines.ql b/python/ql/src/Metrics/FLines.ql new file mode 100644 index 00000000000..04d9abad7e4 --- /dev/null +++ b/python/ql/src/Metrics/FLines.ql @@ -0,0 +1,15 @@ +/** + * @name Number of lines + * @description The number of lines in each file. + * @kind treemap + * @id py/lines-per-file + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg sum max + */ +import python + +from Module m, int n +where n = m.getMetrics().getNumberOfLines() +select m, n +order by n desc diff --git a/python/ql/src/Metrics/FLinesOfCode.qhelp b/python/ql/src/Metrics/FLinesOfCode.qhelp new file mode 100644 index 00000000000..79aaea2cfb1 --- /dev/null +++ b/python/ql/src/Metrics/FLinesOfCode.qhelp @@ -0,0 +1,37 @@ + + + +

    This metric measures the number of lines of code in each file. The value excludes docstrings, comments and +blank lines.

    + +

    Organizing source into very large files is not recommended because:

    +
    • it can be difficult to understand and maintain the code, even with good support from +development tools
    • +
    • it increases the risk that multiple developers will work on the same file at once, and increases the +incidence of merge conflicts
    • +
    • it may be a symptom of weak code organization, where many different features are handled by functions in +a single file.
    • +
    + +
    + + +

    The solution depends on the underlying cause:

    +
    • if individual classes or functions are too large then they should be refactored into smaller +modules
    • +
    • if the class contains many classes or functions, they should be +moved to their own modules (sometimes in a subsidiary module, where appropriate)
    • +
    • if the file has been automatically generated by a tool, then it should be left alone.
    • +
    + +
    + + +
  • M. Fowler, Refactoring. Addison-Wesley, 1999.
  • +
  • Wikipedia: Code refactoring.
  • + + +
    +
    diff --git a/python/ql/src/Metrics/FLinesOfCode.ql b/python/ql/src/Metrics/FLinesOfCode.ql new file mode 100644 index 00000000000..778897c6ae0 --- /dev/null +++ b/python/ql/src/Metrics/FLinesOfCode.ql @@ -0,0 +1,18 @@ +/** + * @name Lines of code in files + * @kind treemap + * @description Measures the number of lines of code in each file (ignoring lines that + * contain only docstrings, comments or are blank). + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg sum max + * @precision very-high + * @tags maintainability + * @id py/lines-of-code-in-files + */ +import python + +from Module m, int n +where n = m.getMetrics().getNumberOfLinesOfCode() +select m, n +order by n desc diff --git a/python/ql/src/Metrics/FLinesOfComments.qhelp b/python/ql/src/Metrics/FLinesOfComments.qhelp new file mode 100644 index 00000000000..fe91fa3d460 --- /dev/null +++ b/python/ql/src/Metrics/FLinesOfComments.qhelp @@ -0,0 +1,19 @@ + + + +

    This metric measures the number of comment lines per file. A low number of comments may indicate files that are difficult to understand due to poor documentation.

    + +
    + +

    Consider if the file needs more documentation. Most files should have at least a comment explaining their purpose.

    + +
    + + +
  • Jeff Atwood. Avoiding Undocumentation. 2005.
  • +
  • Steve McConnell. Code Complete. 2nd Edition. Microsoft Press. 2004.
  • + +
    +
    diff --git a/python/ql/src/Metrics/FLinesOfComments.ql b/python/ql/src/Metrics/FLinesOfComments.ql new file mode 100644 index 00000000000..38b19c2dc46 --- /dev/null +++ b/python/ql/src/Metrics/FLinesOfComments.ql @@ -0,0 +1,17 @@ +/** + * @name Lines of comments in files + * @kind treemap + * @description Measures the number of lines of comments in each file (including docstrings, + * and ignoring lines that contain only code or are blank). + * @treemap.warnOn lowValues + * @metricType file + * @metricAggregate avg sum max + * @precision very-high + * @id py/lines-of-comments-in-files + */ +import python + +from Module m, int n +where n = m.getMetrics().getNumberOfLinesOfComments() + m.getMetrics().getNumberOfLinesOfDocStrings() +select m, n +order by n desc diff --git a/python/ql/src/Metrics/FLinesOfDuplicatedCode.qhelp b/python/ql/src/Metrics/FLinesOfDuplicatedCode.qhelp new file mode 100644 index 00000000000..30a98df0cee --- /dev/null +++ b/python/ql/src/Metrics/FLinesOfDuplicatedCode.qhelp @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/python/ql/src/Metrics/FLinesOfDuplicatedCode.ql b/python/ql/src/Metrics/FLinesOfDuplicatedCode.ql new file mode 100644 index 00000000000..ac8e0a3a25c --- /dev/null +++ b/python/ql/src/Metrics/FLinesOfDuplicatedCode.ql @@ -0,0 +1,26 @@ +/** + * @name Duplicated lines in files + * @description The number of lines in a file, including code, comment and whitespace lines, + * which are duplicated in at least one other place. + * @kind treemap + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg sum max + * @precision high + * @tags testability + * @id py/duplicated-lines-in-files + */ +import python +import external.CodeDuplication + +from File f, int n + +where n = count(int line | + exists(DuplicateBlock d | d.sourceFile() = f | + line in [d.sourceStartLine()..d.sourceEndLine()] and + not whitelistedLineForDuplication(f, line) + ) +) + +select f, n +order by n desc diff --git a/python/ql/src/Metrics/FLinesOfSimilarCode.qhelp b/python/ql/src/Metrics/FLinesOfSimilarCode.qhelp new file mode 100644 index 00000000000..fd3aeb3bc0b --- /dev/null +++ b/python/ql/src/Metrics/FLinesOfSimilarCode.qhelp @@ -0,0 +1,31 @@ + + + +

    +A file that contains many lines that are similar to other code within the code base is +problematic for the same reasons as a file that contains a lot of (exactly) +duplicated code. +

    + +
    + + + + +

    +Refactor similar code snippets by extracting common functionality into functions +that can be reused across modules. +

    + +
    + + + +
  • Wikipedia: Duplicate code.
  • +
  • M. Fowler, Refactoring. Addison-Wesley, 1999.
  • + + +
    +
    diff --git a/python/ql/src/Metrics/FLinesOfSimilarCode.ql b/python/ql/src/Metrics/FLinesOfSimilarCode.ql new file mode 100644 index 00000000000..e78fe52959b --- /dev/null +++ b/python/ql/src/Metrics/FLinesOfSimilarCode.ql @@ -0,0 +1,26 @@ +/** + * @name Similar lines in files + * @description The number of lines in a file, including code, comment and whitespace lines, + * which are similar in at least one other place. + * @kind treemap + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg sum max + * @precision high + * @tags testability + * @id py/similar-lines-in-files + */ +import python +import external.CodeDuplication + +from File f, int n + +where n = count(int line | + exists(SimilarBlock d | d.sourceFile() = f | + line in [d.sourceStartLine()..d.sourceEndLine()] and + not whitelistedLineForDuplication(f, line) + ) +) + +select f, n +order by n desc diff --git a/python/ql/src/Metrics/FNumberOfTests.qhelp b/python/ql/src/Metrics/FNumberOfTests.qhelp new file mode 100644 index 00000000000..be632542b74 --- /dev/null +++ b/python/ql/src/Metrics/FNumberOfTests.qhelp @@ -0,0 +1,52 @@ + + + +

    + This metric measures the number of tests below this location in the tree. + At a file level, this would just be the number of tests in the file. +

    + +

    + A function or method is considered to be a "test" if one of the major + testing frameworks would invoke it as part of a test run. + Recognized frameworks include unittest, pytest, doctest and nose. +

    + +

    + In general, having many test cases is a good thing rather than a bad + thing. However, at the file level, tests should typically be grouped + by the functionality they relate to, which makes a file with an + exceptionally high number of tests a strong candidate for splitting + up. At a higher level, this metric makes it possible to compare the + number of tests in different components, potentially flagging + functionality that is comparatively under-tested. +

    +
    + +

    + Since it is typically not a problem to have too many tests, this + metric is usually included for the purposes of collecting + information, rather than finding problematic areas in the code. With + that in mind, it is usually a good idea to avoid an excessive number + of tests in a single file, and to maintain a broadly comparable + level of testing across components. +

    + +

    + When assessing the thoroughness of a code base's test suite, the number + of tests provides only part of the story. Test coverage + statistics allow a more detailed examination of which parts of the + code deserve improvements in this area. +

    +
    + + +
  • Python Standard Library: unitest.
  • +
  • Python Standard Library: doctest.
  • +
  • http://pytest.org.
  • +
  • Read the docs: http://nose.readthedocs.org/en/latest.
  • + +
    +
    diff --git a/python/ql/src/Metrics/FNumberOfTests.ql b/python/ql/src/Metrics/FNumberOfTests.ql new file mode 100644 index 00000000000..1cc914a0d55 --- /dev/null +++ b/python/ql/src/Metrics/FNumberOfTests.ql @@ -0,0 +1,18 @@ +/** + * @name Number of tests + * @description The number of test methods defined in a module + * @kind treemap + * @treemap.warnOn lowValues + * @metricType file + * @metricAggregate avg sum max + * @precision medium + * @precision very-high + * @id py/tests-in-files + */ +import python +import semmle.python.filters.Tests + +from Module m, int n +where n = strictcount(Test test | test.getEnclosingModule() = m) +select m.getFile(), n +order by n desc diff --git a/python/ql/src/Metrics/FunctionNumberOfCalls.qhelp b/python/ql/src/Metrics/FunctionNumberOfCalls.qhelp new file mode 100644 index 00000000000..a348ddf92ba --- /dev/null +++ b/python/ql/src/Metrics/FunctionNumberOfCalls.qhelp @@ -0,0 +1,71 @@ + + + +

    If the number of calls that is made by a function (or method) to other functions is high, +the function can be difficult to +understand, because you have to read through all the functions that it calls +to fully understand what it does. There are various reasons why +a function may make a high number of calls, including: +

    + +
      +
    • +The function is simply too large in general. +
    • + +
    • +The function has too many responsibilities (see [Martin]). +
    • + +
    • +The function spends all of its time delegating rather than doing any work itself. +
    • +
    + +
    + + +

    +The appropriate action depends on the reason why the function +makes a high number of calls: +

    + +
      +
    • +If the function is too large, you should refactor it into multiple smaller +functions, using the 'Extract Method' refactoring from [Fowler], for example. +
    • + +
    • +If the function is taking on too many responsibilities, a new layer of functions +can be introduced below the top-level function, each of which can do some of the +original work. The top-level function then only needs to delegate to a much +smaller number of functions, which themselves delegate to the functions lower down. +
    • + +
    • +If the function spends all of its time delegating, some of the work that is done by the +subsidiary functions can be moved into the top-level function, and the subsidiary +functions can be removed. This is the refactoring called 'Inline +Method' in [Fowler]. +
    • +
    + + + +
    + + + +
  • +M. Fowler, Refactoring. Addison-Wesley, 1999. +
  • +
  • +Wikipedia: The Single Responsibility Principle. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/FunctionNumberOfCalls.ql b/python/ql/src/Metrics/FunctionNumberOfCalls.ql new file mode 100644 index 00000000000..0dd5050214a --- /dev/null +++ b/python/ql/src/Metrics/FunctionNumberOfCalls.ql @@ -0,0 +1,16 @@ +/** + * @name Number of calls + * @description The total number of calls in a function. + * @kind treemap + * @id py/number-of-calls-per-function + * @treemap.warnOn highValues + * @metricType callable + * @metricAggregate avg max + */ + +import python + + +from FunctionMetrics func +select func, func.getNumberOfCalls() as n +order by n desc diff --git a/python/ql/src/Metrics/FunctionStatementNestingDepth.py b/python/ql/src/Metrics/FunctionStatementNestingDepth.py new file mode 100644 index 00000000000..a66e8621297 --- /dev/null +++ b/python/ql/src/Metrics/FunctionStatementNestingDepth.py @@ -0,0 +1,6 @@ +def print_character_codes_bad(strings): + if strings is not None: + for s in strings: + if s is not None: + for c in s: + print(c + '=' + ord(c)) \ No newline at end of file diff --git a/python/ql/src/Metrics/FunctionStatementNestingDepth.qhelp b/python/ql/src/Metrics/FunctionStatementNestingDepth.qhelp new file mode 100644 index 00000000000..3017a6fa018 --- /dev/null +++ b/python/ql/src/Metrics/FunctionStatementNestingDepth.qhelp @@ -0,0 +1,78 @@ + + + +

    +A method that contains a high level of nesting can be very difficult to understand. As noted in +[McConnell], the human brain cannot easily handle more than three levels of nested if +statements.

    + +
    + + +

    +Extract the control flow into a separate generator and use that to control iteration.

    + +

    +Use early exits to move nested statements out of conditions. For example: + +def func(x): + if x: + long_complex_block() + +can be replaced by + +def func(x): + if x: + return + long_complex_block() + +

    + +

    +Extract nested statements into new functions, for example by using the 'Extract Method' refactoring +from [Fowler].

    + +

    +For more ways to reduce the level of nesting in a method, see [McConnell]. +

    + +

    +Furthermore, a method that has a high level of nesting often indicates that its design can be +improved in other ways, as well as dealing with the nesting problem itself. +

    + +
    + + +

    +In the following example, the code has four levels of nesting and is unnecessarily difficult to read. +

    + + + +

    +In the following modified example, three different approaches to reducing the nesting depth are shown. +The first, print_character_codes_early_exit, uses early exits, either return +or continue. +The second, print_character_codes_use_gen, extracts the control flow into a generator. +The third, print_character_codes_extracted, uses a separate function for the inner loop. +

    + + + +
    + + + +
  • +M. Fowler, Refactoring, pp. 89-95. Addison-Wesley, 1999. +
  • +
  • +S. McConnell, Code Complete, 2nd Edition, §19.4. Microsoft Press, 2004. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/FunctionStatementNestingDepth.ql b/python/ql/src/Metrics/FunctionStatementNestingDepth.ql new file mode 100644 index 00000000000..64a72fbd34d --- /dev/null +++ b/python/ql/src/Metrics/FunctionStatementNestingDepth.ql @@ -0,0 +1,18 @@ +/** + * @name Statement nesting depth + * @description The maximum nesting depth of statements in a function. + * @kind treemap + * @id py/statement-nesting-depth-per-function + * @treemap.warnOn highValues + * @metricType callable + * @metricAggregate avg max + * @tags maintainability + * complexity + */ + +import python + + +from FunctionMetrics func +select func, func.getStatementNestingDepth() as n +order by n desc diff --git a/python/ql/src/Metrics/FunctionStatementNestingDepthGood.py b/python/ql/src/Metrics/FunctionStatementNestingDepthGood.py new file mode 100644 index 00000000000..1f6a651343c --- /dev/null +++ b/python/ql/src/Metrics/FunctionStatementNestingDepthGood.py @@ -0,0 +1,36 @@ + +# Flatten nesting by using early exits +def print_character_codes_early_exit(strings): + if strings is None: + return + for s in strings: + if s is None: + continue + for c in s: + print(c + '=' + ord(c)) + + +#Move flow control into its own generator function +def print_character_codes_use_gen(strings): + for c in gen_chars_in_strings(strings): + print(c + '=' + ord(c)) + +def gen_chars_in_strings(strings): + if strings is None: + return + for s in strings: + if s is None: + continue + for c in s: + yield c + +#Move inner loop into its own function +def print_character_codes_in_string(string): + if string is not None: + for c in string: + print(c + '=' + ord(c)) + +def print_character_codes_extracted(strings): + if strings is not None: + for s in strings: + print_character_codes_in_string(s) \ No newline at end of file diff --git a/python/ql/src/Metrics/History/HChurn.qhelp b/python/ql/src/Metrics/History/HChurn.qhelp new file mode 100644 index 00000000000..2d248a0ac18 --- /dev/null +++ b/python/ql/src/Metrics/History/HChurn.qhelp @@ -0,0 +1,42 @@ + + + +

    +This metric measures the number of lines of text that have been added, deleted +or modified in files below this location in the tree. +

    + +

    +Code churn is known to be a good (if not the best) predictor of defects in a +code component (see e.g. [Nagappan] or [Khoshgoftaar]). The intuition is that +files, packages or projects that have experienced a disproportionately high +amount of churn for the amount of code involved may have been harder to write, +and are thus likely to contain more bugs. +

    + +
    + + +

    +It is a fact of life that some code is going to be changed more than the rest, +and little can be done to change this. However, bearing in mind code churn's +effectiveness as a defect predictor, code that has been repeatedly changed +should be subjected to vigorous testing and code review. +

    + +
    + + + +
  • +N. Nagappan et al. Change Bursts as Defect Predictors. In Proceedings of the 21st IEEE International Symposium on Software Reliability Engineering, 2010. +
  • +
  • +T. M. Khoshgoftaar and R. M. Szabo. Improving code churn predictions during the system test and maintenance phases. In ICSM '94, 1994, pp. 58-67. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/History/HChurn.ql b/python/ql/src/Metrics/History/HChurn.ql new file mode 100644 index 00000000000..437fae7460c --- /dev/null +++ b/python/ql/src/Metrics/History/HChurn.ql @@ -0,0 +1,17 @@ +/** + * @name Churned lines per file + * @description Number of churned lines per file, across the revision history in the database. + * @kind treemap + * @id py/historical-churn + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg sum max + */ +import python +import external.VCS + +from Module m, int n +where n = sum(Commit entry, int churn | churn = entry.getRecentChurnForFile(m.getFile()) and not artificialChange(entry) | churn) + and exists(m.getMetrics().getNumberOfLinesOfCode()) +select m, n +order by n desc diff --git a/python/ql/src/Metrics/History/HLinesAdded.qhelp b/python/ql/src/Metrics/History/HLinesAdded.qhelp new file mode 100644 index 00000000000..fc812fc8357 --- /dev/null +++ b/python/ql/src/Metrics/History/HLinesAdded.qhelp @@ -0,0 +1,6 @@ + + + + diff --git a/python/ql/src/Metrics/History/HLinesAdded.ql b/python/ql/src/Metrics/History/HLinesAdded.ql new file mode 100644 index 00000000000..9eea8687118 --- /dev/null +++ b/python/ql/src/Metrics/History/HLinesAdded.ql @@ -0,0 +1,17 @@ +/** + * @name Added lines per file + * @description Number of added lines per file, across the revision history in the database. + * @kind treemap + * @id py/historical-lines-added + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg sum max + */ +import python +import external.VCS + +from Module m, int n +where n = sum(Commit entry, int churn | churn = entry.getRecentAdditionsForFile(m.getFile()) and not artificialChange(entry) | churn) + and exists(m.getMetrics().getNumberOfLinesOfCode()) +select m, n +order by n desc diff --git a/python/ql/src/Metrics/History/HLinesDeleted.qhelp b/python/ql/src/Metrics/History/HLinesDeleted.qhelp new file mode 100644 index 00000000000..fc812fc8357 --- /dev/null +++ b/python/ql/src/Metrics/History/HLinesDeleted.qhelp @@ -0,0 +1,6 @@ + + + + diff --git a/python/ql/src/Metrics/History/HLinesDeleted.ql b/python/ql/src/Metrics/History/HLinesDeleted.ql new file mode 100644 index 00000000000..905d15b524c --- /dev/null +++ b/python/ql/src/Metrics/History/HLinesDeleted.ql @@ -0,0 +1,17 @@ +/** + * @name Deleted lines per file + * @description Number of deleted lines per file, across the revision history in the database. + * @kind treemap + * @id py/historical-lines-deleted + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg sum max + */ +import python +import external.VCS + +from Module m, int n +where n = sum(Commit entry, int churn | churn = entry.getRecentDeletionsForFile(m.getFile()) and not artificialChange(entry) | churn) + and exists(m.getMetrics().getNumberOfLinesOfCode()) +select m, n +order by n desc diff --git a/python/ql/src/Metrics/History/HNumberOfAuthors.qhelp b/python/ql/src/Metrics/History/HNumberOfAuthors.qhelp new file mode 100644 index 00000000000..00ec48b744f --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfAuthors.qhelp @@ -0,0 +1,48 @@ + + + +

    +This metric measures the number of different authors (by examining the +version control history) +for files below this location in the tree. (This is a better version +of the metric that counts the number of different authors using Javadoc +tags.) +

    + +

    +Files that have been changed by a large number of different authors are +by definition the product of many minds. New authors working on a file +may be less familiar with the design and implementation of the code than +the original authors, which can be a potential source of bugs. Furthermore, +code that has been worked on by many people, if not carefully maintained, +often ends up lacking conceptual integrity. For both of these reasons, any +code that has been worked on by an unusually high number of different people +merits careful inspection in code reviews. +

    + +
    + + +

    +There is clearly no way to reduce the number of authors that have worked +on a file - it is impossible to rewrite history. However, files highlighted +by this metric should be given special attention in a code review, and may +ultimately be good candidates for refactoring/rewriting by an individual, +experienced developer. +

    + + + +
    + + + +
  • +F. P. Brooks Jr. The Mythical Man-Month, Chapter 4. Addison-Wesley, 1974. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/History/HNumberOfAuthors.ql b/python/ql/src/Metrics/History/HNumberOfAuthors.ql new file mode 100644 index 00000000000..fef769fc705 --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfAuthors.ql @@ -0,0 +1,16 @@ +/** + * @name Number of authors + * @description Number of distinct authors for each file + * @kind treemap + * @id py/historical-number-of-authors + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg min max + */ +import python +import external.VCS + +from Module m +where exists(m.getMetrics().getNumberOfLinesOfCode()) +select m, count(Author author | author.getAnEditedFile() = m.getFile()) + diff --git a/python/ql/src/Metrics/History/HNumberOfCoCommits.qhelp b/python/ql/src/Metrics/History/HNumberOfCoCommits.qhelp new file mode 100644 index 00000000000..6a767d57658 --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfCoCommits.qhelp @@ -0,0 +1,51 @@ + + + +

    +This metric measures the average number of co-committed files for the files +below this location in the tree. +

    + +

    +A co-committed file is one that is committed at the same time as a given file. +For instance, if you commit files A, B and C together, then B and C would be +the co-committed files of A for that commit. The value of the metric for an +individual file is the average number of such co-committed files over all +commits. The value of the metric for a directory is the aggregation of these +averages - for instance, if we are using max as our aggregation +function, the value would be the maximum of the average number of co-commits +over all files in the directory. +

    + +

    +An unusually high value for this metric may indicate that the file in question +is too tightly-coupled to other files, and it is difficult to change it in +isolation. Alternatively, it may just be an indication that you commit lots of +unrelated changes at the same time. +

    + +
    + + +

    +Examine the file in question to see what the problem is. +

    + +
      +
    • +If the file is too tightly coupled, it will have high values for its afferent +and/or efferent coupling metrics, and you should apply the advice given there. +
    • + +
    • +If the file is not tightly coupled, but you find that you are committing lots +of unrelated changes at the same time, then you may want to revisit your commit +practices. +
    • +
    + + +
    +
    diff --git a/python/ql/src/Metrics/History/HNumberOfCoCommits.ql b/python/ql/src/Metrics/History/HNumberOfCoCommits.ql new file mode 100644 index 00000000000..81dbe8ba2da --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfCoCommits.ql @@ -0,0 +1,20 @@ +/** + * @name Number of co-committed files + * @description The average number of other files that are touched whenever a file is affected by a commit + * @kind treemap + * @id py/historical-number-of-co-commits + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg min max + */ +import python +import external.VCS + +int committedFiles(Commit commit) { + result = count(commit.getAnAffectedFile()) +} + +from Module m +where exists(m.getMetrics().getNumberOfLinesOfCode()) +select m, avg(Commit commit, int toAvg | (commit.getAnAffectedFile() = m.getFile()) and (toAvg = committedFiles(commit)-1) | toAvg) + diff --git a/python/ql/src/Metrics/History/HNumberOfCommits.qhelp b/python/ql/src/Metrics/History/HNumberOfCommits.qhelp new file mode 100644 index 00000000000..e54825e484d --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfCommits.qhelp @@ -0,0 +1,15 @@ + + + +

    +This metric measures the total number of commits made to files +below this location in the tree. For an individual file, it measures the +number of commits that have affected that file. For a directory of files, it +measures the total number of commits affecting files below that +directory. +

    + +
    +
    diff --git a/python/ql/src/Metrics/History/HNumberOfCommits.ql b/python/ql/src/Metrics/History/HNumberOfCommits.ql new file mode 100644 index 00000000000..deca31e1444 --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfCommits.ql @@ -0,0 +1,15 @@ +/** + * @name Number of commits + * @description Number of commits + * @kind treemap + * @id py/historical-number-of-commits + * @treemap.warnOn highValues + * @metricType commit + * @metricAggregate sum + */ +import python +import external.VCS + +from Commit c +where not artificialChange(c) +select c.getRevisionName(), 1 diff --git a/python/ql/src/Metrics/History/HNumberOfReCommits.qhelp b/python/ql/src/Metrics/History/HNumberOfReCommits.qhelp new file mode 100644 index 00000000000..37edc9aecf1 --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfReCommits.qhelp @@ -0,0 +1,53 @@ + + + +

    +This metric measures the number of file re-commits that have occurred below +this location in the tree. A re-commit is taken to mean a commit to a file +that was touched less than five days ago. +

    + +

    +In a system that is being developed using a controlled change process (where +changes are not committed until they are in some sense 'complete'), re-commits +can be (but are not always) an indication that an initial change was not +successful and had to be revisited within a short time period. The intuition +is that the original change may have been difficult to get right, and hence +the code in the file may be more than usually defect-prone. The concept is +somewhat similar to that of 'change bursts', as described in [Nagappan]. +

    + +
    + + +

    +High numbers of re-commits can be addressed on two levels: preventative and +corrective. +

    + +
      +
    • +On the preventative side, a high number of re-commits may be an indication +that your code review process needs an overhaul. +
    • + +
    • +On the corrective side, code that has experienced a high number of re-commits +should be vigorously code reviewed and tested. +
    • +
    + + +
    + + + +
  • +N. Nagappan et al. Change Bursts as Defect Predictors. In Proceedings of the 21st IEEE International Symposium on Software Reliability Engineering, 2010. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/History/HNumberOfReCommits.ql b/python/ql/src/Metrics/History/HNumberOfReCommits.ql new file mode 100644 index 00000000000..f5831944aed --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfReCommits.ql @@ -0,0 +1,29 @@ +/** + * @name Number of re-commits for each file + * @description A re-commit is taken to mean a commit to a file that was touched less than five days ago. + * @kind treemap + * @id py/historical-number-of-re-commits + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg min max + */ +import python +import external.VCS + +predicate inRange(Commit first, Commit second) { + first.getAnAffectedFile() = second.getAnAffectedFile() and + first != second and + exists(int n | n = first.getDate().daysTo(second.getDate()) and + n >= 0 and n < 5) +} + +int recommitsForFile(File f) { + result = count(Commit recommit | + f = recommit.getAnAffectedFile() and + exists(Commit prev | inRange(prev, recommit))) +} + +from Module m +where exists(m.getMetrics().getNumberOfLinesOfCode()) +select m, recommitsForFile(m.getFile()) + diff --git a/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql b/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql new file mode 100644 index 00000000000..6ea84550f76 --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql @@ -0,0 +1,16 @@ +/** + * @name Number of recent authors + * @description Number of distinct authors that have recently made changes + * @kind treemap + * @id py/historical-number-of-recent-authors + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg min max + */ +import python +import external.VCS + +from Module m +where exists(m.getMetrics().getNumberOfLinesOfCode()) +select m, count(Author author | exists(Commit e | e = author.getACommit() and m.getFile() = e.getAnAffectedFile() and e.daysToNow() <= 180 and not artificialChange(e))) + diff --git a/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql b/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql new file mode 100644 index 00000000000..3f35a9cba77 --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql @@ -0,0 +1,17 @@ +/** + * @name Recently changed files + * @description Number of files recently edited + * @kind treemap + * @id py/historical-number-of-recent-changed-files + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg min max + */ +import python +import external.VCS + +from Module m +where exists(Commit e | e.getAnAffectedFile() = m.getFile() and e.daysToNow() <= 180 and not artificialChange(e)) + and exists(m.getMetrics().getNumberOfLinesOfCode()) +select m, 1 + diff --git a/python/ql/src/Metrics/History/HNumberOfRecentCommits.qhelp b/python/ql/src/Metrics/History/HNumberOfRecentCommits.qhelp new file mode 100644 index 00000000000..4860add6342 --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfRecentCommits.qhelp @@ -0,0 +1,63 @@ + + + +

    +This metric measures the number of recent commits to files that have occurred +below this location in the tree. A recent commits is taken to mean a commits +that has occurred in the last 180 days. +

    + +

    +All code that has changed a great deal may be more than usually prone to +defects, but this is particularly true of code that has been changing +dramatically in the recent past, because it has not yet had a chance to be +properly field-tested in order to iron out the bugs. +

    + +
    + + +

    +There is more than one reason why a file may have been changing a lot +recently: +

    + +
      +
    • +The file may be part of a new subsystem that is being written. New code is +always going to change a lot in a short period of time, but it is important +to ensure that it is properly code reviewed and unit tested before integrating +it into a working product. +
    • + +
    • +The file may be being heavily refactored. Large refactorings are sometimes +essential, but they are also quite risky. You should write proper regression +tests before starting on a major refactoring, and check that they still pass +once you're done. +
    • + +
    • +The same bit of code may be being changed repeatedly because it is difficult +to get right. Aside from vigorous code reviewing and testing, it may be a good +idea to rethink the system design - if something is that hard +to get right (and it's not an inherently difficult concept), you might be making life unnecessarily hard for yourself and +risking introducing insidious defects. +
    • +
    + + + +
    + + + +
  • +N. Nagappan et al. Change Bursts as Defect Predictors. In Proceedings of the 21st IEEE International Symposium on Software Reliability Engineering, 2010. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/History/HNumberOfRecentCommits.ql b/python/ql/src/Metrics/History/HNumberOfRecentCommits.ql new file mode 100644 index 00000000000..e9e3b14538f --- /dev/null +++ b/python/ql/src/Metrics/History/HNumberOfRecentCommits.ql @@ -0,0 +1,16 @@ +/** + * @name Recent changes + * @description Number of recent commits + * @kind treemap + * @id py/historical-number-of-recent-commits + * @treemap.warnOn highValues + * @metricType commit + * @metricAggregate sum + */ +import python +import external.VCS + +from Commit c +where c.daysToNow() <= 180 and not artificialChange(c) +select c.getRevisionName(), 1 + diff --git a/python/ql/src/Metrics/Internal/CallableDisplayStrings.ql b/python/ql/src/Metrics/Internal/CallableDisplayStrings.ql new file mode 100644 index 00000000000..47a6f20db3e --- /dev/null +++ b/python/ql/src/Metrics/Internal/CallableDisplayStrings.ql @@ -0,0 +1,10 @@ +/** + * @name Display strings of callables + * @kind display-string + * @id py/function-display-strings + * @metricType callable + */ +import python + +from Function f +select f, "Function " + f.getName() diff --git a/python/ql/src/Metrics/Internal/CallableExtents.ql b/python/ql/src/Metrics/Internal/CallableExtents.ql new file mode 100644 index 00000000000..7e2d0baedfa --- /dev/null +++ b/python/ql/src/Metrics/Internal/CallableExtents.ql @@ -0,0 +1,11 @@ +/** + * @name Extents of callables + * @kind extent + * @id py/function-extents + * @metricType callable + */ +import python +import Extents + +from RangeFunction f +select f.getLocation(), f diff --git a/python/ql/src/Metrics/Internal/CallableSourceLinks.ql b/python/ql/src/Metrics/Internal/CallableSourceLinks.ql new file mode 100644 index 00000000000..41278a18684 --- /dev/null +++ b/python/ql/src/Metrics/Internal/CallableSourceLinks.ql @@ -0,0 +1,10 @@ +/** + * @name Source links of callables + * @kind source-link + * @id py/function-source-links + * @metricType callable + */ +import python + +from Function f +select f, f.getLocation().getFile() diff --git a/python/ql/src/Metrics/Internal/ClassDisplayStrings.ql b/python/ql/src/Metrics/Internal/ClassDisplayStrings.ql new file mode 100644 index 00000000000..612abfebec7 --- /dev/null +++ b/python/ql/src/Metrics/Internal/ClassDisplayStrings.ql @@ -0,0 +1,10 @@ +/** + * @name Display strings of classes + * @kind display-string + * @id py/lgtm/class-display-strings + * @metricType reftype + */ +import python + +from Class c +select c, c.getName() diff --git a/python/ql/src/Metrics/Internal/ClassExtents.ql b/python/ql/src/Metrics/Internal/ClassExtents.ql new file mode 100644 index 00000000000..cc5fd7e9390 --- /dev/null +++ b/python/ql/src/Metrics/Internal/ClassExtents.ql @@ -0,0 +1,11 @@ +/** + * @name Extents of classes + * @kind extent + * @id py/class-extents + * @metricType reftype + */ +import python +import Extents + +from RangeClass c +select c.getLocation(), c diff --git a/python/ql/src/Metrics/Internal/ClassSourceLinks.ql b/python/ql/src/Metrics/Internal/ClassSourceLinks.ql new file mode 100644 index 00000000000..089596a0d40 --- /dev/null +++ b/python/ql/src/Metrics/Internal/ClassSourceLinks.ql @@ -0,0 +1,10 @@ +/** + * @name Source links of classes + * @kind source-link + * @id py/class-source-links + * @metricType reftype + */ +import python + +from Class c +select c, c.getLocation().getFile() diff --git a/python/ql/src/Metrics/Internal/Extents.qll b/python/ql/src/Metrics/Internal/Extents.qll new file mode 100644 index 00000000000..283f1fb7c30 --- /dev/null +++ b/python/ql/src/Metrics/Internal/Extents.qll @@ -0,0 +1,33 @@ +import python + +/* + * When this library is imported, the 'hasLocationInfo' predicate of + * Functions and is overridden to specify their entire range + * instead of just the range of their name. The latter can still be + * obtained by invoking the getLocation() predicate. + * + * The full ranges are required for the purpose of associating an alert + * with an individual Function as opposed to a whole File. + */ + +/** + * A Function whose 'hasLocationInfo' is overridden to specify its entire range + * including the body (if any), as opposed to the location of its name only. + */ +class RangeFunction extends Function { + predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { + super.getLocation().hasLocationInfo(path, sl, sc, _, _) + and this.getBody().getLastItem().getLocation().hasLocationInfo(path, _, _, el, ec) + } +} + +/** + * A Class whose 'hasLocationInfo' is overridden to specify its entire range + * including the body (if any), as opposed to the location of its name only. + */ +class RangeClass extends Class { + predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { + super.getLocation().hasLocationInfo(path, sl, sc, _, _) + and this.getBody().getLastItem().getLocation().hasLocationInfo(path, _, _, el, ec) + } +} \ No newline at end of file diff --git a/python/ql/src/Metrics/LackofCohesionInMethodsCK.qhelp b/python/ql/src/Metrics/LackofCohesionInMethodsCK.qhelp new file mode 100644 index 00000000000..83fc3a05dd4 --- /dev/null +++ b/python/ql/src/Metrics/LackofCohesionInMethodsCK.qhelp @@ -0,0 +1,68 @@ + + + +

    +A cohesive class is one in which most methods access the same fields. A class that +lacks cohesion is usually one that has multiple responsibilities. +

    + +

    +Various measures of lack of cohesion have been proposed. The Chidamber and Kemerer +version of lack of cohesion inspects pairs of methods. If there are many pairs that +access the same data, the class is cohesive. If there are many pairs that do not access +any common data, the class is not cohesive. More precisely, if:

    + +
      +
    • n1 is the number of pairs of distinct methods in a class that + do not have at least one commonly-accessed field, and
    • +
    • n2 is the number of pairs of distinct methods in a class that + do have at least one commonly-accessed field,
    • +
    + +

    the lack of cohesion measure (LCOM) can be defined as: +

    + +

    +LCOM = max((n1 - n2) / 2, 0) +

    + +

    +High values of LCOM indicate a significant lack of cohesion. As a rough +indication, an LCOM of 500 or more may give you cause for concern. +

    + +
    + + +

    +Classes generally lack cohesion because they have more responsibilities +than they should (see [Martin]). In general, the solution is to identify each +of the different responsibilities that the class has, and split them +into multiple classes, using the 'Extract Class' refactoring from [Fowler], for +example. +

    + + + +
    + + + +
  • +S. R. Chidamber and C. F. Kemerer, A metrics suite for object-oriented design. IEEE Transactions on Software Engineering, 20(6):476-493, 1994. +
  • +
  • +M. Fowler, Refactoring, pp. 65, 122-5. Addison-Wesley, 1999. +
  • +
  • +Wikipedia: The Single Responsibility Principle. +
  • +
  • +O. de Moor et al, Keynote Address: .QL for Source Code Analysis. Proceedings of the 7th IEEE International Working Conference on Source Code Analysis and Manipulation, 2007. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql b/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql new file mode 100644 index 00000000000..c950cd4bac4 --- /dev/null +++ b/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql @@ -0,0 +1,16 @@ +/** + * @name Lack of Cohesion in Methods (CK) + * @description Lack of cohesion in the methods of a class, as defined by Chidamber and Kemerer. + * @kind treemap + * @id py/lack-of-cohesion-chidamber-kemerer + * @treemap.warnOn highValues + * @metricType reftype + * @metricAggregate avg max + */ + +import python + + +from ClassMetrics cls +select cls, cls.getLackOfCohesionCK() as n +order by n desc diff --git a/python/ql/src/Metrics/LackofCohesionInMethodsHM.qhelp b/python/ql/src/Metrics/LackofCohesionInMethodsHM.qhelp new file mode 100644 index 00000000000..e2c492e6747 --- /dev/null +++ b/python/ql/src/Metrics/LackofCohesionInMethodsHM.qhelp @@ -0,0 +1,56 @@ + + + +

    +A cohesive class is one in which most methods access the same fields. A class that +lacks cohesion is usually one that has multiple responsibilities. +

    + +

    +Various measures of lack of cohesion have been proposed. A measure proposed by Hitz and Montazeri +counts the number of strongly connected components, that is disjoint subgraphs, +in the graph of method and attribute dependencies. +This can be thought of as the number of possible classes that a single class could be split into. +

    + +

    +Values of LCOM above 1 indicate a lack of cohesion in that there are several +disjoint subgraphs in a graph of intra-class dependencies. +

    + +
    + + +

    +Classes generally lack cohesion because they have more responsibilities +than they should (see [Martin]). In general, the solution is to identify each +of the different responsibilities that the class has, and split them +into multiple classes, using the 'Extract Class' refactoring from [Fowler], for +example. +

    + + + +
    + + + +
  • + + Measuring coupling and cohesion in object-oriented systems by Martin Hitz, Behzad Montazeri (1995). + Proceedings of International Symposium on Applied Corporate Computing
  • +
  • +M. Fowler, Refactoring, pp. 65, 122-5. Addison-Wesley, 1999. +
  • +
  • +Wikipedia: The Single Responsibility Principle. +
  • +
  • +Wikipedia: Strongly connected component. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql b/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql new file mode 100644 index 00000000000..0a315c44ea7 --- /dev/null +++ b/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql @@ -0,0 +1,16 @@ +/** + * @name Lack of Cohesion in a Class (HM) + * @description Lack of cohesion of a class, as defined by Hitz and Montazeri. + * @kind treemap + * @id py/lack-of-cohesion-hitz-montazeri + * @treemap.warnOn highValues + * @metricType reftype + * @metricAggregate avg max + */ + +import python + + +from ClassMetrics cls +select cls, cls.getLackOfCohesionHM() as n +order by n desc diff --git a/python/ql/src/Metrics/ModuleAfferentCoupling.qhelp b/python/ql/src/Metrics/ModuleAfferentCoupling.qhelp new file mode 100644 index 00000000000..2466791bd7e --- /dev/null +++ b/python/ql/src/Metrics/ModuleAfferentCoupling.qhelp @@ -0,0 +1,72 @@ + + + +

    +This metric measures the number of incoming dependencies for each +module, that is the number of other modules that depend on it. +

    + +

    +Modules that are depended upon by many other modules typically require a lot of +effort to change, because changing them will force their dependents to change +as well. This is not necessarily a bad thing -- indeed, most systems will have +some such modules (one example might be an I/O module). However, modules with a high number +of incoming dependencies and a high number of outgoing dependencies are hard to maintain. +A module with both high afferent coupling and high efferent coupling can be problematic +because, on the one hand they are hard to change (high afferent coupling), yet on the other they +have many reasons to change (high efferent coupling). This contradiction yields code that is very +hard to maintain or test. +

    + +

    +Conversely, some modules may only be depended on by very few other modules. Again, +this is not necessarily a problem -- we would expect, for example, that the +top-level modules of a system would meet this criterion. When lower-level +modules have very few incoming dependencies, however, it can be an indication +that a module is not pulling its weight. In extreme cases, modules may even +have an afferent coupling of 0, indicating that they are dead +code. +

    + +
    + + +

    +It is unwise to refactor a module based purely on its high or low number of +incoming dependencies -- a module's afferent coupling value only makes sense +in the context of its role in the system as a whole. However, when combined +with other metrics such as efferent coupling, it is possible to make some +general recommendations: +

    + +
      +
    • +Modules with high numbers of incoming and outgoing dependencies +are prime candidates for refactoring (although this +will not always be easy). The general strategy is to split the module into +smaller modules that each have fewer responsibilities, and refactor the code +that previously used that module accordingly. +
    • + + +
    • +Modules that have an afferent coupling of 0 may be dead code -- +in this situation, they can often be deleted. +
    • +
    + + + +
    + + + +
  • +M. Fowler. Refactoring. Addison-Wesley, 1999. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/ModuleAfferentCoupling.ql b/python/ql/src/Metrics/ModuleAfferentCoupling.ql new file mode 100644 index 00000000000..f8f5e0c4208 --- /dev/null +++ b/python/ql/src/Metrics/ModuleAfferentCoupling.ql @@ -0,0 +1,18 @@ +/** + * @name Incoming module dependencies + * @description The number of modules that depend on a module. + * @kind treemap + * @id py/afferent-coupling-per-file + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg max + * @tags maintainability + * modularity + */ + +import python + +from ModuleMetrics m +select m, m.getAfferentCoupling() as n +order by n desc + diff --git a/python/ql/src/Metrics/ModuleEfferentCoupling.qhelp b/python/ql/src/Metrics/ModuleEfferentCoupling.qhelp new file mode 100644 index 00000000000..15cf254efac --- /dev/null +++ b/python/ql/src/Metrics/ModuleEfferentCoupling.qhelp @@ -0,0 +1,40 @@ + + + +

    +Efferent coupling is the number of outgoing dependencies for each module. In other words, it is the +number of other modules on which each module depends. +

    + +

    +A module that depends on many other modules is quite brittle, because if any of +its dependencies change, the module itself may have to change as well. Furthermore, the +reason for the high number of dependencies is often that different parts of +the module depend on different groups of other modules, so it is common to +find that modules with high efferent coupling also lack cohesion. +

    + +
    + + +

    +You can reduce efferent coupling by splitting up a module so that each part depends on fewer modules. +

    + + +
    + + + +
  • +IBM developerWorks: Evolutionary architecture and emergent design: Emergent design through metrics. +
  • +
  • +R. Martin, Agile Software Development: Principles, Patterns and Practices. Pearson, 2011. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/ModuleEfferentCoupling.ql b/python/ql/src/Metrics/ModuleEfferentCoupling.ql new file mode 100644 index 00000000000..be32b8bc561 --- /dev/null +++ b/python/ql/src/Metrics/ModuleEfferentCoupling.ql @@ -0,0 +1,18 @@ +/** + * @name Outgoing module dependencies + * @description The number of modules that this module depends upon. + * @kind treemap + * @id py/efferent-coupling-per-file + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg max + * @tags testability + * modularity + */ + +import python + +from ModuleMetrics m +select m, m.getEfferentCoupling() as n +order by n desc + diff --git a/python/ql/src/Metrics/NumberOfParameters1.py b/python/ql/src/Metrics/NumberOfParameters1.py new file mode 100644 index 00000000000..b0343da8853 --- /dev/null +++ b/python/ql/src/Metrics/NumberOfParameters1.py @@ -0,0 +1,5 @@ +def print_annotation(message, line, offset, length): + print("Message: " + message) + print("Line: " + line) + print("Offset: " + offset) + print("Length: " + length) \ No newline at end of file diff --git a/python/ql/src/Metrics/NumberOfParameters1Good.py b/python/ql/src/Metrics/NumberOfParameters1Good.py new file mode 100644 index 00000000000..6788b12e43d --- /dev/null +++ b/python/ql/src/Metrics/NumberOfParameters1Good.py @@ -0,0 +1,9 @@ +class Annotation: + #... + pass + +def print_annotation(annotation): + print("Message: " + annotation.message) + print("Line: " + annotation.line) + print("Offset: " + annotation.offset) + print("Length: " + annotation.length) diff --git a/python/ql/src/Metrics/NumberOfParameters2.py b/python/ql/src/Metrics/NumberOfParameters2.py new file mode 100644 index 00000000000..8429b98dd15 --- /dev/null +++ b/python/ql/src/Metrics/NumberOfParameters2.py @@ -0,0 +1,13 @@ +def print_membership(fellows, members, associates, students): + for f in fellows: + print(f) + for m in members: + print(m) + for a in associates: + print(a) + for s in students: + print(s) + +def print_records(): + #... + print_membership(fellows, members, associates, students) \ No newline at end of file diff --git a/python/ql/src/Metrics/NumberOfParameters2Good.py b/python/ql/src/Metrics/NumberOfParameters2Good.py new file mode 100644 index 00000000000..82300c467ea --- /dev/null +++ b/python/ql/src/Metrics/NumberOfParameters2Good.py @@ -0,0 +1,13 @@ + +def print_fellows(fellows): + for f in fellows: + print(f) + +#... + +def print_records(): + #... + print_fellows(fellows) + print_members(members) + print_associates(associates) + print_students(students) \ No newline at end of file diff --git a/python/ql/src/Metrics/NumberOfParametersWithoutDefault.qhelp b/python/ql/src/Metrics/NumberOfParametersWithoutDefault.qhelp new file mode 100644 index 00000000000..cfeb5fc41be --- /dev/null +++ b/python/ql/src/Metrics/NumberOfParametersWithoutDefault.qhelp @@ -0,0 +1,92 @@ + + + + +

    +A function (or method) that uses a high number of parameters makes maintenance more difficult: +

    + +
      +
    • It is difficult to write a call to the function, because the programmer must know how to +supply an appropriate value for each parameter.
    • + +
    • It is externally difficult to understand, because calls +to the function are longer than a single line of code.
    • + +
    • It can be internally difficult to understand, because it +has so many dependencies.
    • +
    + +
    + + +

    +Restrict the number of parameters for a function, according to the reason for the high number: +

    + +
      +
    • Several of the parameters are logically related, but are +passed into the function separately. The parameters that are logically related should be grouped together +(see the 'Introduce Parameter Object' refactoring on pp. 238-242 of [Fowler]).
    • + +
    • The function has too many responsibilities. It should be broken into multiple functions (see the +'Extract Method' refactoring on pp. 89-95 of [Fowler]), and each new function should be passed +a subset of the original parameters.
    • + +
    • The function has redundant parameters that are not used. The two main reasons for this are: +(1) parameters were added for future extensibility but are never used; (2) the body of the function was changed +so that it no longer uses certain parameters, but the function signature was not +correspondingly updated. In both cases, the theoretically correct solution is to delete the unused +parameters (see the 'Remove Parameter' refactoring on pp. 223-225 of [Fowler]), although you must do +this cautiously if the function is part of a published interface.
    • +
    + +

    When a function is part of a published interface, one possible solution is to add a new, wrapper +function to the interface that has a tidier signature. Alternatively, you can publish a new version of +the interface that has a better design. Clearly, however, neither of these solutions is ideal, +so you should take care to design interfaces the right way from the start.

    + +

    The practice of adding parameters for future extensibility is especially +bad. It is confusing to other programmers, who are uncertain what values they should pass +in for these unnecessary parameters, and it adds unused code that is potentially difficult to remove +later.

    + +
    +
    + +

    In the following example, although the parameters are logically related, they are passed into the +print_annotation function separately.

    + + + +

    In the following modified example, the print_annotation function is simplified by logically grouping +the related parameters into a single class. +An instance of the class can then be passed into the function instead, as shown below. +

    + + + +

    In the following example, the print_membership function has too many responsibilities, +and so needs to be passed four arguments.

    + + + +

    In the following modified example, print_membership has been broken into four functions. +(For brevity, only one function is shown.) As a result, each new function needs to be passed only one +of the original four arguments.

    + + + +
    + + + +
  • +M. Fowler, Refactoring. Addison-Wesley, 1999. +
  • + + +
    +
    diff --git a/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql b/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql new file mode 100644 index 00000000000..4ddd2ba1f0e --- /dev/null +++ b/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql @@ -0,0 +1,18 @@ +/** + * @name Number of parameters without defaults + * @description The number of parameters of a function that do not have default values defined. + * @kind treemap + * @id py/number-of-parameters-without-default-per-function + * @treemap.warnOn highValues + * @metricType callable + * @metricAggregate avg max + * @tags testability + * complexity + */ + +import python + + +from FunctionMetrics func +select func, func.getNumberOfParametersWithoutDefault() as n +order by n desc diff --git a/python/ql/src/Metrics/NumberOfStatements.qhelp b/python/ql/src/Metrics/NumberOfStatements.qhelp new file mode 100644 index 00000000000..e33313c91f6 --- /dev/null +++ b/python/ql/src/Metrics/NumberOfStatements.qhelp @@ -0,0 +1,65 @@ + + + +

    +This metric measures the number of statements that occur in a module. +

    + +

    +If there are too many statements in a module, it is generally +for one of two reasons: +

    + +
      +
    • +One or more individual classes or functions of the module contain too many statements, +making them hard to understand, difficult to check and a common source of defects +(particularly towards the end of the class or function, since few people ever read that +far). These entities typically lack cohesion because they are trying to do too many things. +
    • + +
    • +The module contains too many functions or classes, which generally indicates that it is +trying to do too much, either at the interface or implementation level or +both. It can be difficult for readers to understand because there is a +confusing list of operations. +
    • +
    + +
    + + +

    +As described above, modules reported as violations by this rule contain one +or more classes or functions with too many statements, or the module itself contains +too many classes or functions.

    + +
      +
    • +Individual classes or functions of the module that contain too many statements +should be refactored into multiple, smaller parts. As a rough +guide, functions should be able to fit on a single screen or side of A4. Anything +longer than that increases the risk of introducing new defects during routine code changes. +
    • + +
    • +Modules that contain too many functions or classes often lack cohesion and are +prime candidates for refactoring. +
    • +
    + + +
    + + + +
  • +M. Fowler. Refactoring. Addison-Wesley, 1999. +
  • + + + +
    +
    diff --git a/python/ql/src/Metrics/NumberOfStatements.ql b/python/ql/src/Metrics/NumberOfStatements.ql new file mode 100644 index 00000000000..66263f68a84 --- /dev/null +++ b/python/ql/src/Metrics/NumberOfStatements.ql @@ -0,0 +1,15 @@ +/** + * @name Number of statements + * @description The number of statements in this module + * @kind treemap + * @id py/number-of-statements-per-file + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg sum max + */ +import python + +from Module m, int n +where n = count(Stmt s | s.getEnclosingModule() = m) +select m, n +order by n desc diff --git a/python/ql/src/Metrics/TransitiveImports.qhelp b/python/ql/src/Metrics/TransitiveImports.qhelp new file mode 100644 index 00000000000..7946423547f --- /dev/null +++ b/python/ql/src/Metrics/TransitiveImports.qhelp @@ -0,0 +1,25 @@ + + + +

    This metric measures the number of modules that are imported by each module (file) - either directly +by an import statement or indirectly (that is, imported by a module that is imported). Modules that +import many other modules often have too many responsibilities and are not well-focused. +This makes it difficult to understand and maintain the module. +

    + +
    + +

    Split and/or refactor files with too many responsibilities to create modules with a single, +well-defined role.

    + +
    + + +
  • Python Language Reference: The import statement. +
  • M. Fowler, Refactoring. Addison-Wesley, 1999.
  • +
  • Wikipedia: Code refactoring.
  • + +
    +
    diff --git a/python/ql/src/Metrics/TransitiveImports.ql b/python/ql/src/Metrics/TransitiveImports.ql new file mode 100644 index 00000000000..11fe7ee8f7e --- /dev/null +++ b/python/ql/src/Metrics/TransitiveImports.ql @@ -0,0 +1,16 @@ +/** + * @name Indirect imports per file + * @description The number of modules imported by this file - either directly by an import statement, + * or indirectly (by being imported by an imported module). + * @kind treemap + * @id py/transitive-imports-per-file + * @treemap.warnOn highValues + * @metricType file + * @metricAggregate avg max + * @tags modularity + */ +import python + +from ModuleObject m, int n +where n = count(ModuleObject imp | imp = m.getAnImportedModule+() and imp != m) +select m.getModule(), n \ No newline at end of file diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.py b/python/ql/src/Resources/FileNotAlwaysClosed.py new file mode 100644 index 00000000000..5f5f10345c7 --- /dev/null +++ b/python/ql/src/Resources/FileNotAlwaysClosed.py @@ -0,0 +1,15 @@ +f = open("filename") + ... # Actions to perform on file +f.close() +# File only closed if actions are completed successfully + +with open("filename") as f: + ...# Actions to perform on file +# File always closed + +f = open("filename") +try: + ... # Actions to perform on file +finally: + f.close() +# File always closed diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.qhelp b/python/ql/src/Resources/FileNotAlwaysClosed.qhelp new file mode 100644 index 00000000000..71073caa47b --- /dev/null +++ b/python/ql/src/Resources/FileNotAlwaysClosed.qhelp @@ -0,0 +1,40 @@ + + + + +

    If a file is opened then it should always be closed again, even if an +exception is raised. +Failing to ensure that all files are closed may result in failure due to too +many open files.

    + +
    + + +

    Ensure that if you open a file it is always closed on exiting the method. +Wrap the code between the open() and close() +functions in a with statement or use a try...finally +statement. Using a with statement is preferred as it is shorter +and more readable.

    + +
    + +

    The following code shows examples of different ways of closing a file. In the first example, the +file is closed only if the method is exited successfully. In the other examples, the file is always +closed on exiting the method.

    + + + +
    + + + +
  • Python Language Reference: The with statement, + The try statement.
  • +
  • Python PEP 343: The "with" Statement.
  • + + + +
    +
    diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.ql b/python/ql/src/Resources/FileNotAlwaysClosed.ql new file mode 100755 index 00000000000..870c041402e --- /dev/null +++ b/python/ql/src/Resources/FileNotAlwaysClosed.ql @@ -0,0 +1,72 @@ +/** + * @name File is not always closed + * @description Opening a file without ensuring that it is always closed may cause resource leaks. + * @kind problem + * @tags efficiency + * correctness + * resources + * external/cwe/cwe-772 + * @problem.severity warning + * @sub-severity high + * @precision medium + * @id py/file-not-closed + */ + +import python +import FileOpen + +/** Whether resource is opened and closed in in a matched pair of methods, + * either __enter__ and __exit__ or __init__ and __del__ */ +predicate opened_in_enter_closed_in_exit(ControlFlowNode open) { + file_not_closed_at_scope_exit(open) and + exists(FunctionObject entry, FunctionObject exit | + open.getScope() = entry.getFunction() and + exists(ClassObject cls | + cls.declaredAttribute("__enter__") = entry and cls.declaredAttribute("__exit__") = exit + or + cls.declaredAttribute("__init__") = entry and cls.declaredAttribute("__del__") = exit + ) + and + exists(AttrNode attr_open, AttrNode attrclose | + attr_open.getScope() = entry.getFunction() and + attrclose.getScope() = exit.getFunction() and + expr_is_open(attr_open.(DefinitionNode).getValue(), open) and + attr_open.getName() = attrclose.getName() and + close_method_call(_, attrclose) + ) + ) +} + +predicate file_not_closed_at_scope_exit(ControlFlowNode open) { + exists(EssaVariable v | + BaseFlow::reaches_exit(v) and + var_is_open(v, open) and + not file_is_returned(v, open) + ) + or + call_to_open(open) and not exists(AssignmentDefinition def | def.getValue() = open) + and not exists(Return r | r.getValue() = open.getNode()) +} + +predicate file_not_closed_at_exception_exit(ControlFlowNode open, ControlFlowNode exit) { + exists(EssaVariable v | + exit.(RaisingNode).viableExceptionalExit(_, _) and + not closes_arg(exit, v.getSourceVariable()) and + not close_method_call(exit, v.getAUse().(NameNode)) and + var_is_open(v, open) and + v.getAUse() = exit.getAChild*() + ) +} + +/* Check to see if a file is opened but not closed or returned */ + +from ControlFlowNode defn, string message +where +not opened_in_enter_closed_in_exit(defn) and +( + file_not_closed_at_scope_exit(defn) and message = "File is opened but is not closed." + or + not file_not_closed_at_scope_exit(defn) and file_not_closed_at_exception_exit(defn, _) and message = "File may not be closed if an exception is raised." +) + +select defn.getNode(), message diff --git a/python/ql/src/Resources/FileOpen.qll b/python/ql/src/Resources/FileOpen.qll new file mode 100644 index 00000000000..ec07749587f --- /dev/null +++ b/python/ql/src/Resources/FileOpen.qll @@ -0,0 +1,156 @@ +import python +import semmle.python.GuardedControlFlow +import semmle.python.dataflow.SsaDefinitions +import semmle.python.pointsto.Filters + +/** Holds if `open` is a call that returns a newly opened file */ +predicate call_to_open(ControlFlowNode open) { + exists(FunctionObject f | + function_opens_file(f) and + f.getACall() = open + ) and + /* If in `with` statement, then it will be automatically closed. So just treat as not opened */ + not exists(With w | w.getContextExpr() = open.getNode()) +} + +/** Holds if `n` refers to a file opened at `open` */ +predicate expr_is_open(ControlFlowNode n, ControlFlowNode open) { + call_to_open(open) and open = n + or + exists(EssaVariable v | + n instanceof NameNode and + var_is_open(v, open) | + n = v.getAUse() + or + wraps_file(n, v) + ) +} + +/** Holds if `call` wraps the object referred to by `v` and returns it */ +private predicate wraps_file(CallNode call, EssaVariable v) { + exists(ClassObject cls | + call = cls.getACall() and + call.getAnArg() = v.getAUse() + ) +} + +/** Holds if `var` refers to a file opened at `open` */ +predicate var_is_open(EssaVariable v, ControlFlowNode open) { + def_is_open(v.getDefinition(), open) and + /* If use in context expression in `with` statement, then it will be automatically closed. */ + not exists(With w | w.getContextExpr() = v.getAUse().getNode()) +} + +/** Holds if `test` will pass through an open file in variable `v` for the `sense` successor */ +predicate passes_open_files(Variable v, ControlFlowNode test, boolean sense) { + // `if fd.closed:` + exists(AttrNode closed | + closed = test and + closed.getObject("closed") = v.getAUse() + ) and sense = false + or + // `if fd ==/is ...:` most commonly `if fd is None:` + equality_test(test, v.getAUse(), sense.booleanNot(), _) + or + // `if fd:` + test = v.getAUse() and sense = true + or + exists(UnaryExprNode n | + n = test and + n.getNode().getOp() instanceof Not | + passes_open_files(v, n.getOperand(), sense.booleanNot()) + ) +} + +/* Helper for `def_is_open` to give better join order */ +private predicate passes_open_files(PyEdgeRefinement refinement) { + passes_open_files(refinement.getSourceVariable(), refinement.getPredecessor().getLastNode(), refinement.getSense()) +} + +/** Holds if `def` refers to a file opened at `open` */ +predicate def_is_open(EssaDefinition def, ControlFlowNode open) { + expr_is_open(def.(AssignmentDefinition).getValue(), open) + or + exists(PyEdgeRefinement refinement | + refinement = def | + var_is_open(refinement.getInput(), open) and + passes_open_files(refinement) + ) + or + exists(PyNodeRefinement refinement | + refinement = def | + not closes_file(def) and not wraps_file(refinement.getDefiningNode(), refinement.getInput()) and + var_is_open(refinement.getInput(), open) + ) + or + var_is_open(def.(PhiFunction).getAnInput(), open) +} + +/** Holds if `call` closes a file */ +predicate closes_file(EssaNodeRefinement call) { + closes_arg(call.(ArgumentRefinement).getDefiningNode(), call.getSourceVariable()) or + close_method_call(call.(MethodCallsiteRefinement).getCall(), call.getSourceVariable().(Variable).getAUse()) +} + +/** Holds if `call` closes its argument, which is an open file referred to by `v` */ +predicate closes_arg(CallNode call, Variable v) { + call.getAnArg() = v.getAUse() and + ( + exists(FunctionObject close | + call = close.getACall() and function_closes_file(close) + ) + or + call.getFunction().(NameNode).getId() = "close" + ) +} + +/** Holds if `call` closes its 'self' argument, which is an open file referred to by `v` */ +predicate close_method_call(CallNode call, ControlFlowNode self) { + call.getFunction().(AttrNode).getObject() = self and + exists(FunctionObject close | + call = close.getACall() and function_closes_file(close) + ) + or + call.getFunction().(AttrNode).getObject("close") = self +} + +predicate function_closes_file(FunctionObject close) { + close.hasLongName("os.close") + or + function_should_close_parameter(close.getFunction()) +} + +predicate function_should_close_parameter(Function func) { + exists(EssaDefinition def | + closes_file(def) and + def.getSourceVariable().(Variable).getScope() = func + ) +} + +predicate function_opens_file(FunctionObject f) { + f = theOpenFunction() + or + exists(EssaVariable v, Return ret | + ret.getScope() = f.getFunction() | + ret.getValue().getAFlowNode() = v.getAUse() and + var_is_open(v, _) + ) + or + exists(Return ret, FunctionObject callee | + ret.getScope() = f.getFunction() | + ret.getValue().getAFlowNode() = callee.getACall() and + function_opens_file(callee) + ) +} + +predicate file_is_returned(EssaVariable v, ControlFlowNode open) { + exists(NameNode n, Return ret | + var_is_open(v, open) and + v.getAUse() = n | + ret.getValue() = n.getNode() + or + ret.getValue().(Tuple).getAnElt() = n.getNode() + or + ret.getValue().(List).getAnElt() = n.getNode() + ) +} diff --git a/python/ql/src/Security/CWE-022/PathInjection.qhelp b/python/ql/src/Security/CWE-022/PathInjection.qhelp new file mode 100644 index 00000000000..4a4fb3f4bd7 --- /dev/null +++ b/python/ql/src/Security/CWE-022/PathInjection.qhelp @@ -0,0 +1,61 @@ + + + + +

    +Accessing files using paths constructed from user-controlled data can allow an attacker to access +unexpected resources. This can result in sensitive information being revealed or deleted, or an +attacker being able to influence behavior by modifying unexpected files. +

    +
    + + +

    +Validate user input before using it to construct a file path, either using an off-the-shelf library function +like werkzeug.utils.secure_filename, or by performing custom validation. +

    + +

    +Ideally, follow these rules: +

    + +
      +
    • Do not allow more than a single "." character.
    • +
    • Do not allow directory separators such as "/" or "\" (depending on the file system).
    • +
    • Do not rely on simply replacing problematic sequences such as "../". For example, after +applying this filter to ".../...//", the resulting string would still be "../".
    • +
    • Use a whitelist of known good patterns.
    • +
    +
    + + +

    +In the first example, a file name is read from an HTTP request and then used to access a file. +However, a malicious user could enter a file name that is an absolute path, such as +"/etc/passwd". +

    + +

    +In the second example, it appears that the user is restricted to opening a file within the +"user" home directory. However, a malicious user could enter a file name containing +special characters. For example, the string "../../../etc/passwd" will result in the code +reading the file located at "/server/static/images/../../../etc/passwd", which is the system's +password file. This file would then be sent back to the user, giving them access to all the +system's passwords. +

    + +

    +In the third example, the path used to access the file system is normalized before being checked against a +known prefix. This ensures that regardless of the user input, the resulting path is safe. +

    + + +
    + + +
  • OWASP: Path Traversal.
  • +
  • npm: werkzeug.utils.secure_filename.
  • +
    +
    diff --git a/python/ql/src/Security/CWE-022/PathInjection.ql b/python/ql/src/Security/CWE-022/PathInjection.ql new file mode 100644 index 00000000000..49c9ee74296 --- /dev/null +++ b/python/ql/src/Security/CWE-022/PathInjection.ql @@ -0,0 +1,31 @@ +/** + * @name Uncontrolled data used in path expression + * @description Accessing paths influenced by users can allow an attacker to access unexpected resources. + * @kind problem + * @problem.severity error + * @sub-severity high + * @precision medium + * @id py/path-injection + * @tags correctness + * security + * external/owasp/owasp-a1 + * external/cwe/cwe-022 + * external/cwe/cwe-023 + * external/cwe/cwe-036 + * external/cwe/cwe-073 + * external/cwe/cwe-099 + */ + +import python + +/* Sources */ +import semmle.python.web.HttpRequest + +/* Sinks */ +import semmle.python.security.injection.Path + + +from TaintSource src, TaintSink sink +where src.flowsToSink(sink) + +select sink, "This path depends on $@.", src, "a user-provided value" diff --git a/python/ql/src/Security/CWE-022/examples/tainted_path.py b/python/ql/src/Security/CWE-022/examples/tainted_path.py new file mode 100644 index 00000000000..b7366b9b6cf --- /dev/null +++ b/python/ql/src/Security/CWE-022/examples/tainted_path.py @@ -0,0 +1,37 @@ +import os.path + + +urlpatterns = [ + # Route to user_picture + url(r'^user-pic1$', user_picture1, name='user-picture1'), + url(r'^user-pic2$', user_picture2, name='user-picture2'), + url(r'^user-pic3$', user_picture3, name='user-picture3') +] + + +def user_picture1(request): + """A view that is vulnerable to malicious file access.""" + base_path = '/server/static/images' + filename = request.GET.get('p') + # BAD: This could read any file on the file system + data = open(filename, 'rb').read() + return HttpResponse(data) + +def user_picture2(request): + """A view that is vulnerable to malicious file access.""" + base_path = '/server/static/images' + filename = request.GET.get('p') + # BAD: This could still read any file on the file system + data = open(os.path.join(base_path, filename), 'rb').read() + return HttpResponse(data) + +def user_picture3(request): + """A view that is not vulnerable to malicious file access.""" + base_path = '/server/static/images' + filename = request.GET.get('p') + #GOOD -- Verify with normalised version of path + fullpath = os.path.normpath(os.path.join(base_path, filename)) + if not fullpath.startswith(base_path): + raise SecurityException() + data = open(fullpath, 'rb').read() + return HttpResponse(data) diff --git a/python/ql/src/Security/CWE-078/CommandInjection.qhelp b/python/ql/src/Security/CWE-078/CommandInjection.qhelp new file mode 100644 index 00000000000..0423269c919 --- /dev/null +++ b/python/ql/src/Security/CWE-078/CommandInjection.qhelp @@ -0,0 +1,41 @@ + + + +

    Code that passes user input directly to +exec, eval, or some other library +routine that executes a command, allows the user to execute malicious +code.

    + +
    + + +

    If possible, use hard-coded string literals to specify the command to run +or the library to load. Instead of passing the user input directly to the +process or library function, examine the user input and then choose +among hard-coded string literals.

    + +

    If the applicable libraries or commands cannot be determined at +compile time, then add code to verify that the user input string is +safe before using it.

    + +
    + + +

    The following example shows two functions. The first is unsafe as it takes a shell script that can be changed +by a user, and passes it straight to subprocess.call() without examining it first. +The second is safe as it selects the command from a predefined white-list.

    + + + +
    + + +
  • +OWASP: +Command Injection. +
  • + +
    +
    diff --git a/python/ql/src/Security/CWE-078/CommandInjection.ql b/python/ql/src/Security/CWE-078/CommandInjection.ql new file mode 100755 index 00000000000..22e3860dd38 --- /dev/null +++ b/python/ql/src/Security/CWE-078/CommandInjection.ql @@ -0,0 +1,28 @@ +/** + * @name Uncontrolled command line + * @description Using externally controlled strings in a command line may allow a malicious + * user to change the meaning of the command. + * @kind problem + * @problem.severity error + * @sub-severity high + * @precision medium + * @id py/command-line-injection + * @tags correctness + * security + * external/owasp/owasp-a1 + * external/cwe/cwe-078 + * external/cwe/cwe-088 + */ + +import python + +/* Sources */ +import semmle.python.web.HttpRequest + +/* Sinks */ +import semmle.python.security.injection.Command + +from TaintSource src, TaintSink sink +where src.flowsToSink(sink) + +select sink, "This command depends on $@.", src, "a user-provided value" diff --git a/python/ql/src/Security/CWE-078/examples/command_injection.py b/python/ql/src/Security/CWE-078/examples/command_injection.py new file mode 100644 index 00000000000..54cfb275165 --- /dev/null +++ b/python/ql/src/Security/CWE-078/examples/command_injection.py @@ -0,0 +1,24 @@ + +urlpatterns = [ + # Route to command_execution + url(r'^command-ex1$', command_execution_unsafe, name='command-execution-unsafe'), + url(r'^command-ex2$', command_execution_safe, name='command-execution-safe') +] + +COMMANDS = { + "list" :"ls", + "stat" : "stat" +} + +def command_execution_unsafe(request): + if request.method == 'POST': + action = request.POST.get('action', '') + #BAD -- No sanitizing of input + subprocess.call(["application", action]) + +def command_execution_safe(request): + if request.method == 'POST': + action = request.POST.get('action', '') + #GOOD -- Use a whitelist + subprocess.call(["application", COMMAND[action]]) + diff --git a/python/ql/src/Security/CWE-079/ReflectedXss.qhelp b/python/ql/src/Security/CWE-079/ReflectedXss.qhelp new file mode 100644 index 00000000000..8cdeb4d3e79 --- /dev/null +++ b/python/ql/src/Security/CWE-079/ReflectedXss.qhelp @@ -0,0 +1,45 @@ + + + + +

    +Directly writing user input (for example, an HTTP request parameter) to a webpage +without properly sanitizing the input first, allows for a cross-site scripting vulnerability. +

    +
    + + +

    +To guard against cross-site scripting, consider escaping the input before writing user input to the page. +The standard library provides escaping functions: html.escape() for Python 3.2 upwards +or cgi.escape() older versions of Python. +Most frameworks also provide their own escaping functions, for example flask.escape(). +

    +
    + + +

    +The following example is a minimal flask app which shows a safe and unsafe way to render the given name back to the page. +The first view is unsafe as first_name is not escaped, leaving the page vulnerable to cross-site scripting attacks. +The second view is safe as first_name is escaped, so it is not vulnerable to cross-site scripting attacks. +

    + +
    + + +
  • +OWASP: +XSS +(Cross Site Scripting) Prevention Cheat Sheet. +
  • +
  • +Wikipedia: Cross-site scripting. +
  • +
  • +Python Library Reference: +html.escape(). +
  • +
    +
    diff --git a/python/ql/src/Security/CWE-079/ReflectedXss.ql b/python/ql/src/Security/CWE-079/ReflectedXss.ql new file mode 100644 index 00000000000..aa6c5552630 --- /dev/null +++ b/python/ql/src/Security/CWE-079/ReflectedXss.ql @@ -0,0 +1,32 @@ +/** + * @name Reflected server-side cross-site scripting + * @description Writing user input directly to a web page + * allows for a cross-site scripting vulnerability. + * @kind problem + * @problem.severity error + * @sub-severity high + * @precision medium + * @id py/reflective-xss + * @tags security + * external/cwe/cwe-079 + * external/cwe/cwe-116 + */ + +import python + +/* Sources */ +import semmle.python.web.HttpRequest + +/* Sinks */ + +import semmle.python.web.HttpResponse + +/* Flow */ +import semmle.python.security.strings.Untrusted + +from TaintSource src, TaintSink sink +where src.flowsToSink(sink) + +select sink, "Cross-site scripting vulnerability due to $@.", + src, "user-provided value" + diff --git a/python/ql/src/Security/CWE-079/examples/xss.py b/python/ql/src/Security/CWE-079/examples/xss.py new file mode 100644 index 00000000000..fbe5795047b --- /dev/null +++ b/python/ql/src/Security/CWE-079/examples/xss.py @@ -0,0 +1,13 @@ +from flask import Flask, request, make_response, escape + +app = Flask(__name__) + +@app.route('/unsafe') +def unsafe(): + first_name = request.args.get('name', '') + return make_response("Your name is " + first_name) + +@app.route('/safe') +def safe(): + first_name = request.args.get('name', '') + return make_response("Your name is " + escape(first_name)) diff --git a/python/ql/src/Security/CWE-089/SqlInjection.qhelp b/python/ql/src/Security/CWE-089/SqlInjection.qhelp new file mode 100644 index 00000000000..e976401a6b5 --- /dev/null +++ b/python/ql/src/Security/CWE-089/SqlInjection.qhelp @@ -0,0 +1,47 @@ + + + + +

    +If a database query (such as a SQL or NoSQL query) is built from +user-provided data without sufficient sanitization, a user +may be able to run malicious database queries. +

    +
    + + +

    +Most database connector libraries offer a way of safely +embedding untrusted data into a query by means of query parameters +or prepared statements. +

    +
    + + +

    +In the following snippet, from an example django app, +a name is stored in the database using two different queries. +

    + +

    +In the first case, the query string is built by +directly using string formatting from a user-supplied request attribute. +The parameter may include quote characters, so this +code is vulnerable to a SQL injection attack. +

    + +

    +In the second case, the user-supplied request attribute is passed +to the database using query parameters. +

    + + +
    + + +
  • Wikipedia: SQL injection.
  • +
  • OWASP: SQL Injection Prevention Cheat Sheet.
  • +
    +
    diff --git a/python/ql/src/Security/CWE-089/SqlInjection.ql b/python/ql/src/Security/CWE-089/SqlInjection.ql new file mode 100755 index 00000000000..62235dfe9ce --- /dev/null +++ b/python/ql/src/Security/CWE-089/SqlInjection.ql @@ -0,0 +1,28 @@ +/** + * @name SQL query built from user-controlled sources + * @description Building a SQL query from user-controlled sources is vulnerable to insertion of + * malicious SQL code by the user. + * @kind problem + * @problem.severity error + * @precision medium + * @id py/sql-injection + * @tags security + * external/cwe/cwe-089 + * external/owasp/owasp-a1 + */ + +import python + +/* Sources */ +import semmle.python.web.HttpRequest + +/* Sinks */ +import semmle.python.security.injection.Sql +import semmle.python.web.django.Db +import semmle.python.web.django.Model + + +from TaintSource src, TaintSink sink +where src.flowsToSink(sink) + +select sink, "This SQL query depends on $@.", src, "a user-provided value" diff --git a/python/ql/src/Security/CWE-089/examples/sql_injection.py b/python/ql/src/Security/CWE-089/examples/sql_injection.py new file mode 100644 index 00000000000..541c580f712 --- /dev/null +++ b/python/ql/src/Security/CWE-089/examples/sql_injection.py @@ -0,0 +1,21 @@ + +from django.conf.urls import patterns, url +from django.db import connection + + +def save_name(request): + + if request.method == 'POST': + name = request.POST.get('name') + curs = connection.cursor() + #BAD -- Using string formatting + curs.execute( + "insert into names_file ('name') values ('%s')" % name) + #GOOD -- Using parameters + curs.execute( + "insert into names_file ('name') values ('%s')", name) + + +urlpatterns = patterns(url(r'^save_name/$', + upload, name='save_name')) + diff --git a/python/ql/src/Security/CWE-094/CodeInjection.qhelp b/python/ql/src/Security/CWE-094/CodeInjection.qhelp new file mode 100644 index 00000000000..8d7aab476a6 --- /dev/null +++ b/python/ql/src/Security/CWE-094/CodeInjection.qhelp @@ -0,0 +1,46 @@ + + + + +

    +Directly evaluating user input (for example, an HTTP request parameter) as code without properly +sanitizing the input first allows an attacker arbitrary code execution. This can occur when user +input is passed to code that interprets it as an expression to be +evaluated, such as eval or exec. +

    +
    + + +

    +Avoid including user input in any expression that may be dynamically evaluated. If user input must +be included, use context-specific escaping before including it. +It is important that the correct escaping is used for the type of evaluation that will occur. +

    +
    + + +

    +The following example shows two functions setting a name from a request. +The first function uses exec to execute the setname function. +This is dangerous as it can allow a malicious user to execute arbitrary code on the server. +For example, the user could supply the value "' + subprocess.call('rm -rf') + '" +to destroy the server's file system. +The second function calls the setname function directly and is thus safe. + +

    + + +
    + + +
  • +OWASP: +Code Injection. +
  • +
  • +Wikipedia: Code Injection. +
  • +
    +
    diff --git a/python/ql/src/Security/CWE-094/CodeInjection.ql b/python/ql/src/Security/CWE-094/CodeInjection.ql new file mode 100644 index 00000000000..ef548c83ee2 --- /dev/null +++ b/python/ql/src/Security/CWE-094/CodeInjection.ql @@ -0,0 +1,29 @@ +/** + * @name Code injection + * @description Interpreting unsanitized user input as code allows a malicious user arbitrary + * code execution. + * @kind problem + * @problem.severity error + * @sub-severity high + * @precision medium + * @id py/code-injection + * @tags security + * external/owasp/owasp-a1 + * external/cwe/cwe-094 + * external/cwe/cwe-079 + * external/cwe/cwe-116 + */ + +import python + +/* Sources */ +import semmle.python.web.HttpRequest + +/* Sinks */ +import semmle.python.security.injection.Exec + + +from TaintSource src, TaintSink sink +where src.flowsToSink(sink) + +select sink, "$@ flows to here and is interpreted as code.", src, "User-provided value" diff --git a/python/ql/src/Security/CWE-094/examples/code_injection.py b/python/ql/src/Security/CWE-094/examples/code_injection.py new file mode 100644 index 00000000000..d70f9118ad5 --- /dev/null +++ b/python/ql/src/Security/CWE-094/examples/code_injection.py @@ -0,0 +1,18 @@ + +urlpatterns = [ + # Route to code_execution + url(r'^code-ex1$', code_execution_bad, name='code-execution-bad'), + url(r'^code-ex2$', code_execution_good, name='code-execution-good') +] + +def code_execution(request): + if request.method == 'POST': + first_name = base64.decodestring(request.POST.get('first_name', '')) + #BAD -- Allow user to define code to be run. + exec("setname('%s')" % first_name) + +def code_execution(request): + if request.method == 'POST': + first_name = base64.decodestring(request.POST.get('first_name', '')) + #GOOD --Call code directly + setname(first_name) diff --git a/python/ql/src/Security/CWE-209/StackTraceExposure.py b/python/ql/src/Security/CWE-209/StackTraceExposure.py new file mode 100644 index 00000000000..169dc091ddd --- /dev/null +++ b/python/ql/src/Security/CWE-209/StackTraceExposure.py @@ -0,0 +1,25 @@ +from flask import Flask +app = Flask(__name__) + + +import traceback + +def do_computation(): + raise Exception("Secret info") + +# BAD +@app.route('/bad') +def server_bad(): + try: + do_computation() + except Exception as e: + return traceback.format_exc() + +# GOOD +@app.route('/good') +def server_good(): + try: + do_computation() + except Exception as e: + log(traceback.format_exc()) + return "An internal error has occurred!" diff --git a/python/ql/src/Security/CWE-209/StackTraceExposure.qhelp b/python/ql/src/Security/CWE-209/StackTraceExposure.qhelp new file mode 100644 index 00000000000..86ecdbdc0d8 --- /dev/null +++ b/python/ql/src/Security/CWE-209/StackTraceExposure.qhelp @@ -0,0 +1,52 @@ + + + + +

    +Software developers often add stack traces to error messages, as a +debugging aid. Whenever that error message occurs for an end user, the +developer can use the stack trace to help identify how to fix the +problem. In particular, stack traces can tell the developer more about +the sequence of events that led to a failure, as opposed to merely the +final state of the software when the error occurred. +

    + +

    +Unfortunately, the same information can be useful to an attacker. +The sequence of class names in a stack trace can reveal the structure +of the application as well as any internal components it relies on. +Furthermore, the error message at the top of a stack trace can include +information such as server-side file names and SQL code that the +application relies on, allowing an attacker to fine-tune a subsequent +injection attack. +

    +
    + + +

    +Send the user a more generic error message that reveals less information. +Either suppress the stack trace entirely, or log it only on the server. +

    +
    + + +

    +In the following example, an exception is handled in two different +ways. In the first version, labeled BAD, the exception is sent back to +the remote user by returning it from the function. As such, +the user is able to see a detailed stack trace, which may contain +sensitive information. In the second version, the error message is +logged only on the server, and a generic error message is displayed to +the user. That way, the developers can still access and use the error +log, but remote users will not see the information. +

    + + +
    + + +
  • OWASP: Information Leak.
  • +
    +
    diff --git a/python/ql/src/Security/CWE-209/StackTraceExposure.ql b/python/ql/src/Security/CWE-209/StackTraceExposure.ql new file mode 100644 index 00000000000..f389cb6cc30 --- /dev/null +++ b/python/ql/src/Security/CWE-209/StackTraceExposure.ql @@ -0,0 +1,22 @@ +/** + * @name Information exposure through an exception + * @description Leaking information about an exception, such as messages and stack traces, to an + * external user can expose implementation details that are useful to an attacker for + * developing a subsequent exploit. + * @kind problem + * @problem.severity error + * @precision high + * @id py/stack-trace-exposure + * @tags security + * external/cwe/cwe-209 + * external/cwe/cwe-497 + */ + +import python + +import semmle.python.security.Exceptions +import semmle.python.web.HttpResponse + +from TaintSource src, TaintSink sink +where src.flowsToSink(sink) +select sink, "$@ may be exposed to an external user", src, "Error information" diff --git a/python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.qhelp b/python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.qhelp new file mode 100644 index 00000000000..6cc787e52e4 --- /dev/null +++ b/python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.qhelp @@ -0,0 +1,57 @@ + + + +

    + Using broken or weak cryptographic algorithms can leave data + vulnerable to being decrypted or forged by an attacker. +

    + +

    + Many cryptographic algorithms provided by cryptography + libraries are known to be weak, or flawed. Using such an + algorithm means that encrypted or hashed data is less + secure than it appears to be. +

    + +
    + + +

    + Ensure that you use a strong, modern cryptographic + algorithm. Use at least AES-128 or RSA-2048 for + encryption, and SHA-2 or SHA-3 for secure hashing. +

    + +
    + + +

    + The following code uses the pycrypto + library to encrypt some secret data. When you create a cipher using + pycrypto you must specify the encryption + algorithm to use. The first example uses DES, which is an + older algorithm that is now considered weak. The second + example uses Blowfish, which is a stronger more modern algorithm. +

    + + + +

    + WARNING: Although the second example above is more robust, + pycrypto is no longer actively maintained so we recommend using cryptography instead. +

    + +
    + + +
  • NIST, FIPS 140 Annex a: Approved Security Functions.
  • +
  • NIST, SP 800-131A: Transitions: Recommendation for Transitioning the Use of Cryptographic Algorithms and Key Lengths.
  • +
  • OWASP: Rule + - Use strong approved cryptographic algorithms. +
  • +
    + +
    diff --git a/python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.ql b/python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.ql new file mode 100644 index 00000000000..188e43ebb8e --- /dev/null +++ b/python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.ql @@ -0,0 +1,18 @@ +/** + * @name Use of a broken or weak cryptographic algorithm + * @description Using broken or weak cryptographic algorithms can compromise security. + * @kind problem + * @problem.severity warning + * @precision medium + * @id py/weak-cryptographic-algorithm + * @tags security + * external/cwe/cwe-327 + */ +import python +import semmle.python.security.SensitiveData +import semmle.python.security.Crypto + +from SensitiveDataSource src, WeakCryptoSink sink +where src.flowsToSink(sink) + +select sink, "Sensitive data from $@ is used in a broken or weak cryptographic algorithm.", src , src.toString() diff --git a/python/ql/src/Security/CWE-327/examples/broken_crypto.py b/python/ql/src/Security/CWE-327/examples/broken_crypto.py new file mode 100644 index 00000000000..ef9fc75e889 --- /dev/null +++ b/python/ql/src/Security/CWE-327/examples/broken_crypto.py @@ -0,0 +1,13 @@ +from Crypto.Cipher import DES, Blowfish + +cipher = DES.new(SECRET_KEY) + +def send_encrypted(channel, message): + channel.send(cipher.encrypt(message)) # BAD: weak encryption + + +cipher = Blowfish.new(SECRET_KEY) + +def send_encrypted(channel, message): + channel.send(cipher.encrypt(message)) # GOOD: strong encryption + diff --git a/python/ql/src/Security/CWE-502/JsonGood.py b/python/ql/src/Security/CWE-502/JsonGood.py new file mode 100644 index 00000000000..89947cb0e5c --- /dev/null +++ b/python/ql/src/Security/CWE-502/JsonGood.py @@ -0,0 +1,10 @@ + +from django.conf.urls import url +import json + +def safe(pickled): + return json.loads(pickled) + +urlpatterns = [ + url(r'^(?P.*)$', safe) +] diff --git a/python/ql/src/Security/CWE-502/UnpicklingBad.py b/python/ql/src/Security/CWE-502/UnpicklingBad.py new file mode 100644 index 00000000000..0f8112a28ae --- /dev/null +++ b/python/ql/src/Security/CWE-502/UnpicklingBad.py @@ -0,0 +1,10 @@ + +from django.conf.urls import url +import pickle + +def unsafe(pickled): + return pickle.loads(pickled) + +urlpatterns = [ + url(r'^(?P.*)$', unsafe) +] \ No newline at end of file diff --git a/python/ql/src/Security/CWE-502/UnsafeDeserialization.qhelp b/python/ql/src/Security/CWE-502/UnsafeDeserialization.qhelp new file mode 100644 index 00000000000..f298e62695f --- /dev/null +++ b/python/ql/src/Security/CWE-502/UnsafeDeserialization.qhelp @@ -0,0 +1,61 @@ + + + + +

    +Deserializing untrusted data using any deserialization framework that +allows the construction of arbitrary serializable objects is easily exploitable +and in many cases allows an attacker to execute arbitrary code. Even before a +deserialized object is returned to the caller of a deserialization method a lot +of code may have been executed, including static initializers, constructors, +and finalizers. Automatic deserialization of fields means that an attacker may +craft a nested combination of objects on which the executed initialization code +may have unforeseen effects, such as the execution of arbitrary code. +

    +

    +There are many different serialization frameworks. This query currently +supports Pickle, Marshal and Yaml. +

    +
    + + +

    +Avoid deserialization of untrusted data if at all possible. If the +architecture permits it then use other formats instead of serialized objects, +for example JSON. +

    +
    + + +

    +The following example calls pickle.loads directly on a +value provided by an incoming HTTP request. Pickle then creates a new value from untrusted data, and is +therefore inherently unsafe. +

    + + +

    +Changing the code to use json.loads instead of pickle.loads removes the vulnerability. +

    + + +
    + + + +
  • +OWASP vulnerability description: +Deserialization of untrusted data. +
  • +
  • +OWASP guidance on deserializing objects: +Deserialization Cheat Sheet. +
  • +
  • +Talks by Chris Frohoff & Gabriel Lawrence: + +AppSecCali 2015: Marshalling Pickles - how deserializing objects will ruin your day +
  • +
    + +
    diff --git a/python/ql/src/Security/CWE-502/UnsafeDeserialization.ql b/python/ql/src/Security/CWE-502/UnsafeDeserialization.ql new file mode 100644 index 00000000000..48d4ed23983 --- /dev/null +++ b/python/ql/src/Security/CWE-502/UnsafeDeserialization.ql @@ -0,0 +1,30 @@ +/** + * @name Deserializing untrusted input + * @description Deserializing user-controlled data may allow attackers to execute arbitrary code. + * @kind problem + * @id py/unsafe-deserialization + * @problem.severity error + * @sub-severity high + * @precision medium + * @tags external/cwe/cwe-502 + * security + * serialization + */ +import python + +// Sources -- Any untrusted input +import semmle.python.web.HttpRequest + +// Flow -- untrusted string +import semmle.python.security.strings.Untrusted + +// Sink -- Unpickling and other deserialization formats. +import semmle.python.security.injection.Pickle +import semmle.python.security.injection.Marshal +import semmle.python.security.injection.Yaml + +from TaintSource src, TaintSink sink + +where src.flowsToSink(sink) + +select sink, "Deserializing of $@.", src, "untrusted input" diff --git a/python/ql/src/Security/CWE-601/UrlRedirect.qhelp b/python/ql/src/Security/CWE-601/UrlRedirect.qhelp new file mode 100644 index 00000000000..c2e053f030b --- /dev/null +++ b/python/ql/src/Security/CWE-601/UrlRedirect.qhelp @@ -0,0 +1,42 @@ + + + + +

    +Directly incorporating user input into a URL redirect request without validating the input +can facilitate phishing attacks. In these attacks, unsuspecting users can be redirected to a +malicious site that looks very similar to the real site they intend to visit, but which is +controlled by the attacker. +

    +
    + + +

    +To guard against untrusted URL redirection, it is advisable to avoid putting user input +directly into a redirect URL. Instead, maintain a list of authorized +redirects on the server; then choose from that list based on the user input provided. +

    +
    + + +

    +The following example shows an HTTP request parameter being used directly in a URL redirect +without validating the input, which facilitates phishing attacks: +

    + + + +

    +One way to remedy the problem is to validate the user input against a known fixed string +before doing the redirection: +

    + + +
    + + +
  • OWASP: + XSS Unvalidated Redirects and Forwards Cheat Sheet.
  • +
    + +
    diff --git a/python/ql/src/Security/CWE-601/UrlRedirect.ql b/python/ql/src/Security/CWE-601/UrlRedirect.ql new file mode 100644 index 00000000000..6a412813bc0 --- /dev/null +++ b/python/ql/src/Security/CWE-601/UrlRedirect.ql @@ -0,0 +1,35 @@ +/** + * @name URL redirection from remote source + * @description URL redirection based on unvalidated user input + * may cause redirection to malicious web sites. + * @kind problem + * @problem.severity error + * @sub-severity low + * @id py/url-redirection + * @tags security + * external/cwe/cwe-601 + * @precision high + */ + +import python + + +import semmle.python.web.HttpRedirect +import semmle.python.web.HttpRequest +import semmle.python.security.strings.Untrusted + +/** Url redirection is a problem only if the user controls the prefix of the URL */ +class UntrustedPrefixStringKind extends UntrustedStringKind { + + override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { + result = UntrustedStringKind.super.getTaintForFlowStep(fromnode, tonode) and + not tonode.(BinaryExprNode).getRight() = fromnode + } + +} + +from TaintSource src, TaintSink sink +where src.flowsToSink(sink) + +select sink, "Untrusted URL redirection due to $@.", src, "a user-provided value" + diff --git a/python/ql/src/Security/CWE-601/examples/redirect_bad.py b/python/ql/src/Security/CWE-601/examples/redirect_bad.py new file mode 100644 index 00000000000..161edd70ec3 --- /dev/null +++ b/python/ql/src/Security/CWE-601/examples/redirect_bad.py @@ -0,0 +1,8 @@ +from flask import Flask, request, redirect + +app = Flask(__name__) + +@app.route('/') +def hello(): + target = files = request.args.get('target', '') + return redirect(target, code=302) diff --git a/python/ql/src/Security/CWE-601/examples/redirect_good.py b/python/ql/src/Security/CWE-601/examples/redirect_good.py new file mode 100644 index 00000000000..c93b0f98a00 --- /dev/null +++ b/python/ql/src/Security/CWE-601/examples/redirect_good.py @@ -0,0 +1,13 @@ +from flask import Flask, request, redirect + +VALID_REDIRECT = "http://cwe.mitre.org/data/definitions/601.html" + +app = Flask(__name__) + +@app.route('/') +def hello(): + target = files = request.args.get('target', '') + if target == VALID_REDIRECT: + return redirect(target, code=302) + else: + ... # Error diff --git a/python/ql/src/Security/CWE-798/HardcodedCredentials.py b/python/ql/src/Security/CWE-798/HardcodedCredentials.py new file mode 100644 index 00000000000..6eb54c567f8 --- /dev/null +++ b/python/ql/src/Security/CWE-798/HardcodedCredentials.py @@ -0,0 +1,19 @@ +import hashlib +import binascii + +def process_request(request): + password = request.GET["password"] + + # BAD: Inbound authentication made by comparison to string literal + if password == "myPa55word": + redirect("login") + + hashed_password = load_from_config('hashed_password', CONFIG_FILE) + salt = load_from_config('salt', CONFIG_FILE) + + #GOOD: Inbound authentication made by comparing to a hash password from a config file. + dk = hashlib.pbkdf2_hmac('sha256', password, salt, 100000) + hashed_input = binascii.hexlify(dk) + if hashed_input == hashed_password: + redirect("login") + diff --git a/python/ql/src/Security/CWE-798/HardcodedCredentials.qhelp b/python/ql/src/Security/CWE-798/HardcodedCredentials.qhelp new file mode 100644 index 00000000000..df7d81792b6 --- /dev/null +++ b/python/ql/src/Security/CWE-798/HardcodedCredentials.qhelp @@ -0,0 +1,82 @@ + + + + +

    +Including unencrypted hard-coded inbound or outbound authentication credentials within source code +or configuration files is dangerous because the credentials may be easily discovered. +

    +

    +Source or configuration files containing hard-coded credentials may be visible to an attacker. For +example, the source code may be open source, or it may be leaked or accidentally revealed. +

    +

    +For inbound authentication, hard-coded credentials may allow unauthorized access to the system. This +is particularly problematic if the credential is hard-coded in the source code, because it cannot be +disabled easily. For outbound authentication, the hard-coded credentials may provide an attacker with +privileged information or unauthorized access to some other system. +

    + +
    + + +

    +Remove hard-coded credentials, such as user names, passwords and certificates, from source code, +placing them in configuration files or other data stores if necessary. If possible, store +configuration files including credential data separately from the source code, in a secure location +with restricted access. +

    + +

    +For outbound authentication details, consider encrypting the credentials or the enclosing data +stores or configuration files, and using permissions to restrict access. +

    + +

    +For inbound authentication details, consider hashing passwords using standard library functions +where possible. For example, hashlib.pbkdf2_hmac. +

    + +
    + + +

    +The following examples shows different types of inbound and outbound authentication. +

    + +

    +In the first case, we accept a password from a remote user, and compare it against a plaintext +string literal. If an attacker acquires the source code they can observe +the password, and can log in to the system. Furthermore, if such an intrusion was discovered, the +application would need to be rewritten and redeployed in order to change the password. +

    + +

    +In the second case, the password is compared to a hashed and salted password stored in a +configuration file, using hashlib.pbkdf2_hmac. +In this case, access to the source code or the assembly would not reveal the password to an +attacker. Even access to the configuration file containing the password hash and salt would be of +little value to an attacker, as it is usually extremely difficult to reverse engineer the password +from the hash and salt. +

    + +

    +In the final case, a password is changed to a new, hard-coded value. If an attacker has access to +the source code, they will be able to observe the new password. +

    + + + +
    + + +
  • +OWASP: +XSS +Use of hard-coded password. +
  • + +
    +
    diff --git a/python/ql/src/Security/CWE-798/HardcodedCredentials.ql b/python/ql/src/Security/CWE-798/HardcodedCredentials.ql new file mode 100644 index 00000000000..72f45f204ef --- /dev/null +++ b/python/ql/src/Security/CWE-798/HardcodedCredentials.ql @@ -0,0 +1,143 @@ +/** + * @name Hard-coded credentials + * @description Credentials are hard coded in the source code of the application. + * @kind problem + * @problem.severity error + * @precision medium + * @id py/hardcoded-credentials + * @tags security + * external/cwe/cwe-259 + * external/cwe/cwe-321 + * external/cwe/cwe-798 + */ + +import semmle.python.security.TaintTracking +import semmle.python.filters.Tests + +class HardcodedValue extends TaintKind { + + HardcodedValue() { + this = "hard coded value" + } + +} + +bindingset[char, fraction] +predicate fewer_characters_than(StrConst str, string char, float fraction) { + exists(string text, int chars | + text = str.getText() and + chars = count(int i | text.charAt(i) = char) | + /* Allow one character */ + chars = 1 or + chars < text.length() * fraction + ) +} + +predicate possible_reflective_name(string name) { + exists(any(ModuleObject m).getAttribute(name)) + or + exists(any(ClassObject c).lookupAttribute(name)) + or + any(ClassObject c).getName() = name + or + any(ModuleObject m).getName() = name + or + exists(builtin_object(name)) +} + +int char_count(StrConst str) { + result = count(string c | c = str.getText().charAt(_)) +} + +predicate capitalized_word(StrConst str) { + str.getText().regexpMatch("[A-Z][a-z]+") +} + +predicate maybeCredential(ControlFlowNode f) { + /* A string that is not too short and unlikely to be text or an identifier. */ + exists(StrConst str | + str = f.getNode() | + /* At least 10 characters */ + str.getText().length() > 9 and + /* Not too much whitespace */ + fewer_characters_than(str, " ", 0.05) and + /* or underscores */ + fewer_characters_than(str, "_", 0.2) and + /* Not too repetitive */ + exists(int chars | + chars = char_count(str) | + chars > 20 or + chars > str.getText().length()/2 + ) and + not possible_reflective_name(str.getText()) and + not capitalized_word(str) + ) + or + /* Or, an integer with at least 8 digits */ + exists(IntegerLiteral lit | + f.getNode() = lit + | + not exists(lit.getValue()) + or + lit.getValue() > 10000000 + ) +} + +class HardcodedValueSource extends TaintSource { + + HardcodedValueSource() { + maybeCredential(this) + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof HardcodedValue + } + +} + +class CredentialSink extends TaintSink { + + CredentialSink() { + exists(string name | + name.regexpMatch(getACredentialRegex()) and + not name.suffix(name.length()-4) = "file" + | + any(FunctionObject func).getNamedArgumentForCall(_, name) = this + or + exists(Keyword k | + k.getArg() = name and k.getValue().getAFlowNode() = this + ) + or + exists(CompareNode cmp, NameNode n | + n.getId() = name + | + cmp.operands(this, any(Eq eq), n) + or + cmp.operands(n, any(Eq eq), this) + ) + ) + } + + + override predicate sinks(TaintKind kind) { + kind instanceof HardcodedValue + } + +} + +/** + * Gets a regular expression for matching names of locations (variables, parameters, keys) that + * indicate the value being held is a credential. + */ +private string getACredentialRegex() { + result = "(?i).*pass(wd|word|code|phrase)(?!.*question).*" or + result = "(?i).*(puid|username|userid).*" or + result = "(?i).*(cert)(?!.*(format|name)).*" +} + +from TaintSource src, TaintSink sink + +where src.flowsToSink(sink) and +not any(TestScope test).contains(src.(ControlFlowNode).getNode()) + +select sink, "Use of hardcoded credentials from $@.", src, src.toString() diff --git a/python/ql/src/Statements/AssertLiteralConstant.py b/python/ql/src/Statements/AssertLiteralConstant.py new file mode 100644 index 00000000000..13271433e44 --- /dev/null +++ b/python/ql/src/Statements/AssertLiteralConstant.py @@ -0,0 +1,9 @@ +def buy_bananas(n): + if n > 500: + assert False, "Too many bananas." + send_order("bananas", n) + +def buy_bananas_correct(n): + if n > 500: + raise AssertionError("Too many bananas") + send_order("bananas", n) diff --git a/python/ql/src/Statements/AssertLiteralConstant.qhelp b/python/ql/src/Statements/AssertLiteralConstant.qhelp new file mode 100644 index 00000000000..056feabb422 --- /dev/null +++ b/python/ql/src/Statements/AssertLiteralConstant.qhelp @@ -0,0 +1,43 @@ + + + +

    + In Python, assertions are not executed when optimizations are enabled. + This may lead to unexpected behavior when assertions are used to the check + validity of a piece of input. +

    + +
    + + +

    + If the value being tested is false, replace the assert + statement with a raise statement that raises an appropriate + exception. If the value being tested is true, delete the assert + statement or replace it with a pass statement. +

    + +
    + +

    + This example shows a function buy_bananas that takes a + number n as input. The function checks that this number is not too big + before sending off an order for that number of bananas. Because this is done + using an assert statement, the check disappears when + optimizations are enabled. The second function corrects this error by + explicitly raising an AssertionError, and checks the value even + when optimizations are enabled. +

    + + + +
    + + +
  • Python Language Reference: The assert statement.
  • +
  • The Python Tutorial: “Compiled” Python files.
  • + +
    +
    diff --git a/python/ql/src/Statements/AssertLiteralConstant.ql b/python/ql/src/Statements/AssertLiteralConstant.ql new file mode 100644 index 00000000000..bf575dd0e25 --- /dev/null +++ b/python/ql/src/Statements/AssertLiteralConstant.ql @@ -0,0 +1,32 @@ +/** + * @name Assert statement tests the truth value of a literal constant + * @description An assert statement testing a literal constant value may exhibit + * different behavior when optimizations are enabled. + * @kind problem + * @tags reliability + * correctness + * @problem.severity recommendation + * @sub-severity low + * @precision medium + * @id py/assert-literal-constant + */ + +import python +import semmle.python.filters.Tests + +from Assert a, string value +where + /* Exclude asserts inside test cases */ + not a.getScope() instanceof Test and + exists(Expr test | test = a.getTest() | + value = test.(IntegerLiteral).getN() + or + value = "\"" + test.(StrConst).getS() + "\"" + or + value = test.(NameConstant).toString() + ) and + /* Exclude asserts appearing at the end of a chain of `elif`s */ + not exists(If i | + i.getElif().getAnOrelse() = a + ) +select a, "Assert of literal constant " + value + "." diff --git a/python/ql/src/Statements/AssertOnTuple.py b/python/ql/src/Statements/AssertOnTuple.py new file mode 100644 index 00000000000..b42d74faf1f --- /dev/null +++ b/python/ql/src/Statements/AssertOnTuple.py @@ -0,0 +1,7 @@ +assert xxx and yyy # Alternative 1a. Check both expressions are true + +assert xxx, yyy # Alternative 1b. Check 'xxx' is true, 'yyy' is the failure message. + +tuple = (xxx, yyy) # Alternative 2. Check both elements of the tuple match expectations. +assert tuple[0]==xxx +assert tuple[1]==yyy diff --git a/python/ql/src/Statements/AssertOnTuple.qhelp b/python/ql/src/Statements/AssertOnTuple.qhelp new file mode 100644 index 00000000000..35dca722899 --- /dev/null +++ b/python/ql/src/Statements/AssertOnTuple.qhelp @@ -0,0 +1,46 @@ + + + +

    When you define an assert statement to test a tuple the test +will either always succeed (if the tuple is non-empty) or always +fail (if the tuple is empty).

    + +

    This error usually occurs when the programmer writes +assert (condition, message) + +instead of the correct form +assert condition, message + +

    + +
    + + +

    Review the code and determine the purpose of the assert statement:

    +
      +
    • +If the "tuple" has been created in error, then remove the parentheses and correct the statement
    • +
    • If validation of a tuple is intended, then you should define an assert statement +for each element of the tuple.
    • +
    + +
    + +

    The statement assert (xxx, yyy) attempts to test a "tuple" (xxx, yyy). +The original intention may be any of the alternatives listed below:

    + + +

    If you want to define a validity check on the values of a tuple then these must be tested +individually.

    + +
    + + +
  • Python Language Reference: The assert statement.
  • +
  • Tutorials Point: Assertions in Python.
  • + + +
    +
    diff --git a/python/ql/src/Statements/AssertOnTuple.ql b/python/ql/src/Statements/AssertOnTuple.ql new file mode 100644 index 00000000000..8ca00f2391e --- /dev/null +++ b/python/ql/src/Statements/AssertOnTuple.ql @@ -0,0 +1,24 @@ +/** + * @name Asserting a tuple + * @description Using an assert statement to test a tuple provides no validity checking. + * @kind problem + * @tags reliability + * maintainability + * external/cwe/cwe-670 + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/asserts-tuple + */ + +import python + +from Assert a, string b, string non +where a.getTest() instanceof Tuple and + (if exists(((Tuple)a.getTest()).getAnElt()) then + (b = "True" and non = "non-") + else + (b = "False" and non = "") + ) +select a, "Assertion of " + non + "empty tuple is always " + b + "." + diff --git a/python/ql/src/Statements/BreakOrReturnInFinally.qhelp b/python/ql/src/Statements/BreakOrReturnInFinally.qhelp new file mode 100644 index 00000000000..e759b6e8cb4 --- /dev/null +++ b/python/ql/src/Statements/BreakOrReturnInFinally.qhelp @@ -0,0 +1,32 @@ + + + +

    When a break or return statement is used in a +finally block this causes the try-finally block +to exit immediately discarding the exception. This is unlikely to be the +intention of the developer and makes the code more difficult to read.

    + +
    + + +

    Either move the break or return statement to +immediately after the finally block or use an explicit +except block to handle the exception.

    + +

    These modifications are behavior changing so you must take care to ensure +that the resulting behavior is correct.

    + +
    + + +
  • +Python Language Reference: +The try statement, +The break statement, +The return statement.
  • + + +
    +
    diff --git a/python/ql/src/Statements/BreakOrReturnInFinally.ql b/python/ql/src/Statements/BreakOrReturnInFinally.ql new file mode 100644 index 00000000000..1d9bc7296c9 --- /dev/null +++ b/python/ql/src/Statements/BreakOrReturnInFinally.ql @@ -0,0 +1,27 @@ +/** + * @name 'break' or 'return' statement in finally + * @description Using a Break or Return statement in a finally block causes the + * Try-finally block to exit, discarding the exception. + * @kind problem + * @tags reliability + * maintainability + * external/cwe/cwe-584 + * @problem.severity warning + * @sub-severity low + * @precision medium + * @id py/exit-from-finally + */ + +import python + +from Stmt s, string kind +where +s instanceof Return and kind = "return" and exists(Try t | t.getFinalbody().contains(s)) +or +s instanceof Break and kind = "break" and +exists(Try t | t.getFinalbody().contains(s) | + not exists(For loop | loop.contains(s) and t.getFinalbody().contains(loop)) + and + not exists(While loop | loop.contains(s) and t.getFinalbody().contains(loop)) +) +select s, "'" + kind + "' in a finally block will swallow any exceptions raised." diff --git a/python/ql/src/Statements/C_StyleParentheses.py b/python/ql/src/Statements/C_StyleParentheses.py new file mode 100644 index 00000000000..b3b28316ce2 --- /dev/null +++ b/python/ql/src/Statements/C_StyleParentheses.py @@ -0,0 +1,23 @@ + +#Written in Java or C style +def gcd(a, b): + while(a != 0 and b != 0): + if(a > b): + a = a % b + else: + b = b % a + if(a == 0): + return (b) + return (a) + +#Written in a more Pythonic style +def gcd(a, b): + while a != 0 and b != 0: + if a > b: + a = a % b + else: + b = b % a + if a == 0: + return b + return a + diff --git a/python/ql/src/Statements/C_StyleParentheses.qhelp b/python/ql/src/Statements/C_StyleParentheses.qhelp new file mode 100644 index 00000000000..772e7822895 --- /dev/null +++ b/python/ql/src/Statements/C_StyleParentheses.qhelp @@ -0,0 +1,43 @@ + + + +

    Python is designed to be more readable, at least for Western readers, than languages in the C family. +This is achieved, in part, by using English language keywords and more familiar punctuation. +Top level expressions are thus bracketed by the keyword and either a colon or new line, which can be more +easily picked put by eye than parentheses. +

    + +

    Using superfluous parentheses can impair this readability by making the code harder to scan and parse by eye. +Parentheses often serve as a visual clue for more complex expressions, and adding them unnecessarily can be distracting. +

    + +

    One notable exception to this rule is when an expression has to span multiple lines. In which case, using of parentheses is +preferred to using a back slash for line continuation. +

    + +
    + + +

    +Remove the unnecessary parentheses. +

    + +
    + +

    In the first of the two examples, most of the expressions are wrapped in parentheses. +This is harder to read than the second example, especially to a programmer more familiar with Python than with C or Java. + +

    + +
    + + +
  • Python Language Reference: Full grammar specification.
  • +
  • Google Python Style Guide: Use parentheses sparingly.
  • +
  • Python PEP Index: PEP 8.
  • + + +
    +
    diff --git a/python/ql/src/Statements/C_StyleParentheses.ql b/python/ql/src/Statements/C_StyleParentheses.ql new file mode 100644 index 00000000000..c670876e15a --- /dev/null +++ b/python/ql/src/Statements/C_StyleParentheses.ql @@ -0,0 +1,32 @@ +/** + * @name C-style condition + * @description Putting parentheses around a condition in an 'if' or 'while' statement is + * unnecessary and harder to read. + * @kind problem + * @tags maintainability + * @problem.severity recommendation + * @sub-severity low + * @deprecated + * @precision very-high + * @id py/c-style-parentheses + */ + +import python + +from Expr e, Location l, string kind, string what +where e.isParenthesized() and +not e instanceof Tuple and +( + exists(If i | i.getTest() = e) and kind = "if" and what = "condition" + or + exists(While w | w.getTest() = e) and kind = "while" and what = "condition" + or + exists(Return r | r.getValue() = e) and kind = "return" and what = "value" + or + exists(Assert a | a.getTest() = e and not exists(a.getMsg())) and kind = "assert" and what = "test" +) +and +// These require parentheses +(not e instanceof Yield and not e instanceof YieldFrom and not e instanceof GeneratorExp) and +l = e.getLocation() and l.getStartLine() = l.getEndLine() +select e, "Parenthesized " + what + " in '" + kind + "' statement." diff --git a/python/ql/src/Statements/ConstantInConditional.py b/python/ql/src/Statements/ConstantInConditional.py new file mode 100644 index 00000000000..3abdf99836d --- /dev/null +++ b/python/ql/src/Statements/ConstantInConditional.py @@ -0,0 +1,9 @@ +if True: + print "True is true!" + +def limit(l): + if l < -100: + l = -100 + if 1 > 100: + l = 100 + return l diff --git a/python/ql/src/Statements/ConstantInConditional.qhelp b/python/ql/src/Statements/ConstantInConditional.qhelp new file mode 100644 index 00000000000..d388a0b85a9 --- /dev/null +++ b/python/ql/src/Statements/ConstantInConditional.qhelp @@ -0,0 +1,34 @@ + + + +

    Using a constant value as a test in a conditional statement renders the statement pointless as only +one branch will be run regardless of any other factors.

    + +
    + +

    If the conditional statement is required for debugging or similar then use a variable instead. +Otherwise, remove the conditional statement and any associated dead code.

    + +
    + +

    In the first example the if statement will always be executed and therefore can be removed. The +contents of the statement should be kept though.

    + +

    In the second example the statement l = 100 is never executed because 1 > 100 is always false. +However, it is likely that the intention was l > 100 (the number '1' being misread as the letter 'l') +and that the test should be corrected, rather than deleted. + +

    + +
    + + +
  • Python: The If Statement.
  • +
  • Python: The While Statement.
  • +
  • Python: Literals (constant values).
  • + + +
    +
    diff --git a/python/ql/src/Statements/ConstantInConditional.ql b/python/ql/src/Statements/ConstantInConditional.ql new file mode 100644 index 00000000000..06a63cf037a --- /dev/null +++ b/python/ql/src/Statements/ConstantInConditional.ql @@ -0,0 +1,42 @@ +/** + * @name Constant in conditional expression or statement + * @description The conditional is always true or always false + * @kind problem + * @tags maintainability + * useless-code + * external/cwe/cwe-561 + * external/cwe/cwe-570 + * external/cwe/cwe-571 + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/constant-conditional-expression + */ + +import python + + +predicate is_condition(Expr cond) { + exists(If i | i.getTest() = cond) or + exists(IfExp ie | ie.getTest() = cond) +} + +/* Treat certain unmodified builtins as constants as well. */ +predicate effective_constant(Name cond) { + exists(GlobalVariable var | var = cond.getVariable() and not exists(NameNode f | f.defines(var)) | + var.getId() = "True" or var.getId() = "False" or var.getId() = "NotImplemented" + ) +} + +predicate test_makes_code_unreachable(Expr cond) { + exists(If i | i.getTest() = cond | i.getStmt(0).isUnreachable() or i.getOrelse(0).isUnreachable()) + or + exists(While w | w.getTest() = cond and w.getStmt(0).isUnreachable()) +} + + +from Expr cond +where is_condition(cond) and (cond.isConstant() or effective_constant(cond)) and +/* Ignore cases where test makes code unreachable, as that is handled in different query */ +not test_makes_code_unreachable(cond) +select cond, "Testing a constant will always give the same result." diff --git a/python/ql/src/Statements/DocStrings.py b/python/ql/src/Statements/DocStrings.py new file mode 100644 index 00000000000..370d4fb6bbd --- /dev/null +++ b/python/ql/src/Statements/DocStrings.py @@ -0,0 +1,2 @@ +def add(x, y): + return x + y \ No newline at end of file diff --git a/python/ql/src/Statements/DocStrings.qhelp b/python/ql/src/Statements/DocStrings.qhelp new file mode 100644 index 00000000000..1b9938a1fba --- /dev/null +++ b/python/ql/src/Statements/DocStrings.qhelp @@ -0,0 +1,39 @@ + + + + + +

    PEP8 mandates that all public modules, classes, functions and methods should have a documentation +string. Ensuring that every public module, class, function and method is documented makes it easier +for other developers to maintain the code. +

    + +
    + + +

    If a module, class, function or method needs to be public then add a documentation string that +describes the +purpose or use of the object (see PEP 257 for guidelines). If the object does not need to be public +then make it "private" by changing its name from xxx to _xxx.

    + +
    + +

    The following simple, public function should be updated to include a documentation string +immediately after the def line.

    + + +

    You might insert the documentation string: """Return the sum of x and y.""" on line 2. + +

    + + +
  • Python PEP 8: Documentation strings.
  • +
  • Python PEP 257: Documentation string conventions +.
  • + + + +
    +
    diff --git a/python/ql/src/Statements/DocStrings.ql b/python/ql/src/Statements/DocStrings.ql new file mode 100644 index 00000000000..4bf458bd22b --- /dev/null +++ b/python/ql/src/Statements/DocStrings.ql @@ -0,0 +1,50 @@ +/** + * @name Missing docstring + * @description Omitting documentation strings from public classes, functions or methods + * makes it more difficult for other developers to maintain the code. + * @kind problem + * @tags maintainability + * @problem.severity recommendation + * @sub-severity low + * @precision medium + * @id py/missing-docstring + */ +/* NOTE: precision of 'medium' reflects the lack of precision in the underlying rule. + * Do we care whether a function has a docstring? That often depends on the reader of that docstring. + */ + +import python + +predicate needs_docstring(Scope s) { + s.isPublic() and + ( + not s instanceof Function + or + function_needs_docstring(s) + ) +} + +predicate function_needs_docstring(Function f) { + not exists(FunctionObject fo, FunctionObject base | fo.overrides(base) and fo.getFunction() = f | + not function_needs_docstring(base.getFunction())) and + f.getName() != "lambda" and + (f.getMetrics().getNumberOfLinesOfCode() - count(f.getADecorator())) > 2 + and not exists(PythonPropertyObject p | + p.getGetter().getFunction() = f or + p.getSetter().getFunction() = f + ) +} + +string scope_type(Scope s) { + result = "Module" and s instanceof Module and not ((Module)s).isPackage() + or + result = "Class" and s instanceof Class + or + result = "Function" and s instanceof Function +} + +from Scope s +where needs_docstring(s) and not exists(s.getDocString()) +select s, scope_type(s) + " " + s.getName() + " does not have a docstring" + + diff --git a/python/ql/src/Statements/ExecUsed.py b/python/ql/src/Statements/ExecUsed.py new file mode 100644 index 00000000000..6ea92f1d0b3 --- /dev/null +++ b/python/ql/src/Statements/ExecUsed.py @@ -0,0 +1,3 @@ + +to_execute = get_untrusted_code() +exec to_execute diff --git a/python/ql/src/Statements/ExecUsed.qhelp b/python/ql/src/Statements/ExecUsed.qhelp new file mode 100644 index 00000000000..f7b34581578 --- /dev/null +++ b/python/ql/src/Statements/ExecUsed.qhelp @@ -0,0 +1,30 @@ + + + + +

    Using exec may introduce a security vulnerability into your program unless +you ensure that the data passed to the statement is neutralized. +

    + +
    + +

    Review all uses of the exec statement (Python 2) or function (Python 3). +Where possible, replace the exec statement or function with normal python code. +Alternatively, ensure that all data passed to the statement is neutralized.

    + +
    + +

    In the example, the exec statement is used and may result in executing code from an attacker.

    + + + +
    + + +
  • Python 2.7 Language Reference: The exec statement.
  • +
  • Python 3 Standard Library: exec.
  • + +
    +
    diff --git a/python/ql/src/Statements/ExecUsed.ql b/python/ql/src/Statements/ExecUsed.ql new file mode 100644 index 00000000000..7e6363ae3c8 --- /dev/null +++ b/python/ql/src/Statements/ExecUsed.ql @@ -0,0 +1,27 @@ +/** + * @name 'exec' used + * @description The 'exec' statement or function is used which could cause arbitrary code to be executed. + * @kind problem + * @tags security + * correctness + * @problem.severity error + * @sub-severity high + * @precision low + * @id py/use-of-exec + */ + +import python + +string message() { + result = "The 'exec' statement is used." and major_version() = 2 + or + result = "The 'exec' function is used." and major_version() = 3 +} + +predicate exec_function_call(Call c) { + major_version() = 3 and exists(GlobalVariable exec | exec = ((Name)c.getFunc()).getVariable() and exec.getId() = "exec") +} + +from AstNode exec +where exec_function_call(exec) or exec instanceof Exec +select exec, message() \ No newline at end of file diff --git a/python/ql/src/Statements/ExitUsed.py b/python/ql/src/Statements/ExitUsed.py new file mode 100644 index 00000000000..ae03479497e --- /dev/null +++ b/python/ql/src/Statements/ExitUsed.py @@ -0,0 +1,7 @@ + +def main(): + try: + process() + except Exception as ex: + print(ex) + exit(1) diff --git a/python/ql/src/Statements/IterableStringOrSequence.py b/python/ql/src/Statements/IterableStringOrSequence.py new file mode 100644 index 00000000000..7f5eb2959a0 --- /dev/null +++ b/python/ql/src/Statements/IterableStringOrSequence.py @@ -0,0 +1,18 @@ + +#Mistakenly mixed list and string +def greeting(): + if is_global(): + greet = [ "Hello", "World" ] + else: + greet = "Hello" + for word in greet: + print(word) + +#Only use list +def fixed_greeting(): + if is_global(): + greet = [ "Hello", "World" ] + else: + greet = [ "Hello" ] + for word in greet: + print(word) diff --git a/python/ql/src/Statements/IterableStringOrSequence.qhelp b/python/ql/src/Statements/IterableStringOrSequence.qhelp new file mode 100644 index 00000000000..044474de79b --- /dev/null +++ b/python/ql/src/Statements/IterableStringOrSequence.qhelp @@ -0,0 +1,47 @@ + + + +

    The for statement is designed to allow you to iterate over the elements of a +sequence or other iterable object. Strings in Python are iterable, and often used as such. +However, they are also often considered, not as sequences of characters, but as atomic entities. +

    + +

    +One source of defects in Python is mistakenly iterating over a non-iterable object such as an integer. +This sort of defect is easily detected as a TypeError will be raised. However, if a string +is mistakenly used as the iterable in a for statement, which also receives other sequences +(such as lists) then the code will iterate over the string one character at a time. +This is probably not what the programmer intended and results in errors that are hard to find. +

    + +
    + + +

    Since this defect usually indicates a logical error, it is not possible to give a general method +for addressing the defect. However, adding a guard that checks that the iterator is not a string +could be worthwhile. +

    + +
    + +

    +In this example, the loop may iterate over "Hello" producing one character per line, +as well as over [ "Hello", "World" ] +It is likely that the programmer forgot to wrap the "Hello" in brackets. +

    + + +
    + + +
  • Python Language Reference: The for statement, + object.__iter__.
  • +
  • Python Standard Library: Iterator types.
  • +
  • Scipy lecture notes: Iterators, +generator expressions and generators.
  • + + +
    +
    diff --git a/python/ql/src/Statements/IterableStringOrSequence.ql b/python/ql/src/Statements/IterableStringOrSequence.ql new file mode 100644 index 00000000000..a44c3ae7286 --- /dev/null +++ b/python/ql/src/Statements/IterableStringOrSequence.ql @@ -0,0 +1,30 @@ +/** + * @name Iterable can be either a string or a sequence + * @description Iteration over either a string or a sequence in the same loop can cause errors that are hard to find. + * @kind problem + * @tags reliability + * maintainability + * non-local + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/iteration-string-and-sequence + */ + +import python + +predicate is_a_string_type(ClassObject seqtype) { + seqtype = theBytesType() and major_version() = 2 + or + seqtype = theUnicodeType() +} + +from For loop, ControlFlowNode iter, Object str, Object seq, ControlFlowNode seq_origin, ClassObject strtype, ClassObject seqtype, ControlFlowNode str_origin +where loop.getIter().getAFlowNode() = iter and +iter.refersTo(str, strtype, str_origin) and +iter.refersTo(seq, seqtype, seq_origin) and +is_a_string_type(strtype) and +seqtype.isIterable() and +not is_a_string_type(seqtype) + +select loop, "Iteration over $@, of class " + seqtype.getName() + ", may also iterate over $@.", seq_origin, "sequence", str_origin, "string" \ No newline at end of file diff --git a/python/ql/src/Statements/MismatchInMultipleAssignment.py b/python/ql/src/Statements/MismatchInMultipleAssignment.py new file mode 100644 index 00000000000..1671149c52c --- /dev/null +++ b/python/ql/src/Statements/MismatchInMultipleAssignment.py @@ -0,0 +1,14 @@ +# Fibonacci series 1: +# the sum of two elements defines the next + +a, b = 0, 1, 1 # Assignment fails: accidentally put three values on right +while b < 10: + print b + a, b = b, a+b + +# Fibonacci series 2: +# the sum of two elements defines the next +a, b = 0, 1 # Assignment succeeds: two variables on left and two values on right +while b < 10: + print b + a, b = b, a+b diff --git a/python/ql/src/Statements/MismatchInMultipleAssignment.qhelp b/python/ql/src/Statements/MismatchInMultipleAssignment.qhelp new file mode 100644 index 00000000000..dc577baa97e --- /dev/null +++ b/python/ql/src/Statements/MismatchInMultipleAssignment.qhelp @@ -0,0 +1,35 @@ + + + + + +

    An assignment statement evaluates a sequence expression and assigns each item of the sequence to +one of the variables on the left. If there is a mismatch between the number of variables on +the left and the values in the sequence on the right of the statement, then an exception is raised +at runtime. +

    + +
    + +

    Ensure that the number of variables on either side of the assignment match.

    + +
    + +

    The following examples show a simple definition of the Fibonacci series. In the first example, +one of the values in the assignment has been duplicated, causing an exception at runtime.

    + + + +
    + + +
  • Python Language Reference: +Assignment statements.
  • +
  • Python Tutorial: +First steps towards programming.
  • + + +
    +
    diff --git a/python/ql/src/Statements/MismatchInMultipleAssignment.ql b/python/ql/src/Statements/MismatchInMultipleAssignment.ql new file mode 100644 index 00000000000..8dee6d5eb5f --- /dev/null +++ b/python/ql/src/Statements/MismatchInMultipleAssignment.ql @@ -0,0 +1,58 @@ +/** + * @name Mismatch in multiple assignment + * @description Assigning multiple variables without ensuring that you define a + * value for each variable causes an exception at runtime. + * @kind problem + * @tags reliability + * correctness + * types + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/mismatched-multiple-assignment + */ + +import python + +private int len(ExprList el) { + result = count(el.getAnItem()) +} + +predicate mismatched(Assign a, int lcount, int rcount, Location loc, string sequenceType) { + exists(ExprList l, ExprList r | + (a.getATarget().(Tuple).getElts() = l or + a.getATarget().(List).getElts() = l) + and + ((a.getValue().(Tuple).getElts() = r and sequenceType = "tuple") or + (a.getValue().(List).getElts() = r and sequenceType = "list")) + and + loc = a.getValue().getLocation() and + lcount = len(l) and + rcount = len(r) and + lcount != rcount and + not exists(Starred s | l.getAnItem() = s or r.getAnItem() = s) + ) +} + +predicate mismatched_tuple_rhs(Assign a, int lcount, int rcount, Location loc) { + exists(ExprList l, TupleObject r, AstNode origin | + (a.getATarget().(Tuple).getElts() = l or + a.getATarget().(List).getElts() = l) + and + a.getValue().refersTo(r, origin) and + loc = origin.getLocation() and + lcount = len(l) and + rcount = r.getLength() and + lcount != rcount and + not exists(Starred s | l.getAnItem() = s) + ) +} + + +from Assign a, int lcount, int rcount, Location loc, string sequenceType +where + mismatched(a, lcount, rcount, loc, sequenceType) + or + mismatched_tuple_rhs(a, lcount, rcount, loc) and + sequenceType = "tuple" +select a, "Left hand side of assignment contains " + lcount + " variables, but right hand side is a $@ of length " + rcount + "." , loc, sequenceType diff --git a/python/ql/src/Statements/ModificationOfLocals.py b/python/ql/src/Statements/ModificationOfLocals.py new file mode 100644 index 00000000000..3274d3cbb59 --- /dev/null +++ b/python/ql/src/Statements/ModificationOfLocals.py @@ -0,0 +1,10 @@ + +def modifies_locals_sum(x, y): + locals()['z'] = x + y + #z will not be defined as modifications to locals() do not alter the local variables. + return z + +def fixed_sum(x, y): + z = x + y + return z + diff --git a/python/ql/src/Statements/ModificationOfLocals.qhelp b/python/ql/src/Statements/ModificationOfLocals.qhelp new file mode 100644 index 00000000000..a94508447d9 --- /dev/null +++ b/python/ql/src/Statements/ModificationOfLocals.qhelp @@ -0,0 +1,33 @@ + + + +

    + The dictionary returned by locals() is not a view of the function's locals, but a copy. + Therefore, modification of the dictionary returned from locals() will not modify the local + variables of the function. +

    + + +
    + + +

    If the intention is to modify a local variable, then do so directly. +

    + +
    + +

    In this example, rather than assigning to the variable z directly, the dictionary returned by locals() +is modified. + +

    + +
    + + +
  • Python Language Reference: The for statement.
  • +
  • Python Tutorial: for statements.
  • + +
    +
    diff --git a/python/ql/src/Statements/ModificationOfLocals.ql b/python/ql/src/Statements/ModificationOfLocals.ql new file mode 100644 index 00000000000..c65be7b3366 --- /dev/null +++ b/python/ql/src/Statements/ModificationOfLocals.ql @@ -0,0 +1,43 @@ +/** + * @name Modification of dictionary returned by locals() + * @description Modifications of the dictionary returned by locals() are not propagated to the local variables of a function. + * @kind problem + * @tags reliability + * correctness + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/modification-of-locals + */ + +import python + +Object aFunctionLocalsObject() { + exists(Call c, Name n, GlobalVariable v | + c = result.getOrigin() and + n = c.getFunc() and + n.getVariable() = v and + v.getId() = "locals" and + c.getScope() instanceof FastLocalsFunction + ) +} + + + +predicate modification_of_locals(ControlFlowNode f) { + f.(SubscriptNode).getValue().refersTo(aFunctionLocalsObject()) and (f.isStore() or f.isDelete()) + or + exists(string mname, AttrNode attr | + attr = f.(CallNode).getFunction() and + attr.getObject(mname).refersTo(aFunctionLocalsObject(), _) | + mname = "pop" or + mname = "popitem" or + mname = "update" or + mname = "clear" + ) +} + +from AstNode a, ControlFlowNode f +where modification_of_locals(f) and a = f.getNode() + +select a, "Modification of the locals() dictionary will have no effect on the local variables." diff --git a/python/ql/src/Statements/NestedLoopsSameVariable.py b/python/ql/src/Statements/NestedLoopsSameVariable.py new file mode 100644 index 00000000000..4e1fd333e08 --- /dev/null +++ b/python/ql/src/Statements/NestedLoopsSameVariable.py @@ -0,0 +1,6 @@ + +for var in range(3): + for var in range(3): + pass + print (var) # Prints 2 2 2 not 0 1 2 as might be expected + diff --git a/python/ql/src/Statements/NestedLoopsSameVariable.qhelp b/python/ql/src/Statements/NestedLoopsSameVariable.qhelp new file mode 100644 index 00000000000..e2e1806d2c8 --- /dev/null +++ b/python/ql/src/Statements/NestedLoopsSameVariable.qhelp @@ -0,0 +1,32 @@ + + + +

    + In Python variables have function-wide scope which means that if two variables have the same name in the + same scope, they are in fact one variable. Consequently, nested loops in which the target variables have + the same name in fact share a single variable. Such loops are difficult to understand as the inner loop will + modify the target variable of the outer loop; this may be a typographical error. +

    + + +
    + + +

    Carefully examine the code and check for possible errors, +particularly considering what would happen if the inner or outer variable were renamed. +

    + +
    + + + + + + +
  • Python Language Reference: The for statement.
  • +
  • Python Tutorial: for statements.
  • + +
    +
    diff --git a/python/ql/src/Statements/NestedLoopsSameVariable.ql b/python/ql/src/Statements/NestedLoopsSameVariable.ql new file mode 100644 index 00000000000..6c1ed0f68ff --- /dev/null +++ b/python/ql/src/Statements/NestedLoopsSameVariable.ql @@ -0,0 +1,29 @@ +/** + * @name Nested loops with same variable + * @description Nested loops in which the target variable is the same for each loop make + * the behavior of the loops difficult to understand. + * @kind problem + * @tags maintainability + * correctness + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/nested-loops-with-same-variable + */ +import python + +predicate loop_variable(For f, Variable v) { + f.getTarget().defines(v) +} + +predicate variableUsedInNestedLoops(For inner, For outer, Variable v) { + /* Only treat loops in body as inner loops. Loops in the else clause are ignored. */ + outer.getBody().contains(inner) and loop_variable(inner, v) and loop_variable(outer, v) + /* Ignore cases where there is no use of the variable or the only use is in the inner loop */ + and exists(Name n | n.uses(v) and outer.contains(n) and not inner.contains(n)) +} + +from For inner, For outer, Variable v +where variableUsedInNestedLoops(inner, outer, v) +select inner, "Nested for statement uses loop variable '" + v.getId() + "' of enclosing $@.", + outer, "for statement" diff --git a/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.py b/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.py new file mode 100644 index 00000000000..b40c929374d --- /dev/null +++ b/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.py @@ -0,0 +1,16 @@ +def largest_elements(l): + for x in l: + maxnum = 0 + for x in x: + maxnum = max(x, maxnum) + # The outer loop variable x has now been overwritten by the inner loop. + print "The largest element in the list", x, "is", maxnum + + +def largest_elements_correct(l): + for x in l: + maxnum = 0 + for y in x: + maxnum = max(y, maxnum) + print "The largest element in the list", x, "is", maxnum + diff --git a/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.qhelp b/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.qhelp new file mode 100644 index 00000000000..25b047ef6df --- /dev/null +++ b/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.qhelp @@ -0,0 +1,42 @@ + + + +

    + In Python variables have function-wide scope which means that if two + variables have the same name in the same scope, they are in fact one + variable. Consequently, nested loops in which the target variables have the + same name in fact share a single variable. Such loops are difficult to + understand as the inner loop will modify the target variable of the outer + loop. This may lead to unexpected behavior if the loop variable is used + after the inner loop has terminated. +

    + +
    + + +

    + Rename the inner loop variable. +

    + +
    + +

    + This example shows a function that processes a sequence of lists of numbers. It + prints out the largest element from each of the lists. In the first version, the + variable x gets overwritten by the inner loop, resulting in the + wrong output. In the second function, the error has been fixed by renaming the + inner loop variable to stop it overwriting the outer loop variable. +

    + + + +
    + + +
  • Python Language Reference: The for statement.
  • +
  • Python Tutorial: for statements.
  • + +
    +
    diff --git a/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.ql b/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.ql new file mode 100644 index 00000000000..0082f8c3c1a --- /dev/null +++ b/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.ql @@ -0,0 +1,36 @@ +/** + * @name Nested loops with same variable reused after inner loop body + * @description Redefining a variable in an inner loop and then using + * the variable in an outer loop causes unexpected behavior. + * @kind problem + * @tags maintainability + * correctness + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/nested-loops-with-same-variable-reused + */ + +import python + +predicate loop_variable_ssa(For f, Variable v, SsaVariable s) { + f.getTarget().getAFlowNode() = s.getDefinition() and v = s.getVariable() +} + +predicate variableUsedInNestedLoops(For inner, For outer, Variable v, Name n) { + /* Ignore cases where there is no use of the variable or the only use is in the inner loop. */ + outer.contains(n) + and not inner.contains(n) + /* Only treat loops in body as inner loops. Loops in the else clause are ignored. */ + and outer.getBody().contains(inner) + and exists(SsaVariable s | + loop_variable_ssa(inner, v, s.getAnUltimateDefinition()) + and loop_variable_ssa(outer, v, _) + and s.getAUse().getNode() = n + ) +} + +from For inner, For outer, Variable v, Name n +where variableUsedInNestedLoops(inner, outer, v, n) +select inner, "Nested for statement $@ loop variable '" + v.getId() + "' of enclosing $@.", n, "uses", + outer, "for statement" \ No newline at end of file diff --git a/python/ql/src/Statements/NonIteratorInForLoop.py b/python/ql/src/Statements/NonIteratorInForLoop.py new file mode 100644 index 00000000000..40de180da3c --- /dev/null +++ b/python/ql/src/Statements/NonIteratorInForLoop.py @@ -0,0 +1,6 @@ + + +def illegal_for_loop(seq = None): + for x in seq: + print (x) + diff --git a/python/ql/src/Statements/NonIteratorInForLoop.qhelp b/python/ql/src/Statements/NonIteratorInForLoop.qhelp new file mode 100644 index 00000000000..0165db9fea9 --- /dev/null +++ b/python/ql/src/Statements/NonIteratorInForLoop.qhelp @@ -0,0 +1,36 @@ + + + +

    The for statement is designed to allow you to iterate over the elements of a +sequence or other iterable object. If a non-iterable object is used in a for statement +(for var in object:) then a TypeError will be raised. +

    + +
    + + +

    Since this defect usually indicates a logical error, it is not possible to give a general method +for addressing the defect.

    + +
    + +

    +In this example, the loop may attempt to iterate over None, which is not an iterator. +It is likely that the programmer forgot to test for None before the loop. +

    + + +
    + + +
  • Python Language Reference: The for statement, + object.__iter__.
  • +
  • Python Standard Library: Iterator types.
  • +
  • Scipy lecture notes: Iterators, +generator expressions and generators.
  • + + +
    +
    diff --git a/python/ql/src/Statements/NonIteratorInForLoop.ql b/python/ql/src/Statements/NonIteratorInForLoop.ql new file mode 100644 index 00000000000..27d8d47d31f --- /dev/null +++ b/python/ql/src/Statements/NonIteratorInForLoop.ql @@ -0,0 +1,23 @@ +/** + * @name Non-iterable used in for loop + * @description Using a non-iterable as the object in a 'for' loop causes a TypeError. + * @kind problem + * @tags reliability + * correctness + * types + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/non-iterable-in-for-loop + */ + +import python + +from For loop, ControlFlowNode iter, ClassObject t, ControlFlowNode origin +where loop.getIter().getAFlowNode() = iter and +iter.refersTo(_, t, origin) and +not t.isIterable() and not t.failedInference() and +not t = theNoneType() and +not t.isDescriptorType() + +select loop, "$@ of class '$@' may be used in for-loop.", origin, "Non-iterator", t, t.getName() diff --git a/python/ql/src/Statements/RedundantAssignment.py b/python/ql/src/Statements/RedundantAssignment.py new file mode 100644 index 00000000000..74ebd85a622 --- /dev/null +++ b/python/ql/src/Statements/RedundantAssignment.py @@ -0,0 +1,5 @@ +class Spam: + + def __init__(self, eggs): + eggs = eggs + diff --git a/python/ql/src/Statements/RedundantAssignment.qhelp b/python/ql/src/Statements/RedundantAssignment.qhelp new file mode 100644 index 00000000000..67cb7eb17d5 --- /dev/null +++ b/python/ql/src/Statements/RedundantAssignment.qhelp @@ -0,0 +1,29 @@ + + + +

    Assigning a variable to itself is redundant and often an indication of a mistake in the code.

    + +
    + +

    Check the assignment carefully for mistakes. If the assignment is truly redundant and not simply +incorrect then remove it.

    + +
    + +

    In this example the programmer clearly intends to assign to self.eggs but made a +mistake.

    + + + +
    + + + +
  • Python Language Reference: +The assignment statement.
  • + + +
    +
    diff --git a/python/ql/src/Statements/RedundantAssignment.ql b/python/ql/src/Statements/RedundantAssignment.ql new file mode 100644 index 00000000000..231f33e88dc --- /dev/null +++ b/python/ql/src/Statements/RedundantAssignment.ql @@ -0,0 +1,92 @@ +/** + * @name Redundant assignment + * @description Assigning a variable to itself is useless and very likely indicates an error in the code. + * @kind problem + * @tags reliability + * useless-code + * external/cwe/cwe-563 + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/redundant-assignment + */ + +import python +predicate assignment(AssignStmt a, Expr left, Expr right) +{ + a.getATarget() = left and a.getValue() = right +} + +predicate corresponding(Expr left, Expr right) { + assignment(_, left, right) + or + exists(Attribute la, Attribute ra | + corresponding(la, ra) and + left = la.getObject() and + right = ra.getObject()) +} + +predicate same_value(Expr left, Expr right) { + same_name(left, right) + or + same_attribute(left, right) +} + +predicate maybe_defined_in_outer_scope(Name n) { + exists(SsaVariable v | v.getAUse().getNode() = n | + v.maybeUndefined() + ) +} + +Variable relevant_var(Name n) { + n.getVariable() = result and + (corresponding(n, _) or corresponding(_, n)) +} + +predicate same_name(Name n1, Name n2) { + corresponding(n1, n2) and + relevant_var(n1) = relevant_var(n2) and + not exists(builtin_object(n1.getId())) and + not maybe_defined_in_outer_scope(n2) +} + +ClassObject value_type(Attribute a) { + a.getObject().refersTo(_, result, _) +} + +predicate is_property_access(Attribute a) { + value_type(a).lookupAttribute(a.getName()) instanceof PropertyObject +} + +predicate same_attribute(Attribute a1, Attribute a2) { + corresponding(a1, a2) and a1.getName() = a2.getName() and same_value(a1.getObject(), a2.getObject()) and + exists(value_type(a1)) and not is_property_access(a1) +} + +int pyflakes_commented_line(File file) { + exists(Comment c | c.getText().toLowerCase().matches("%pyflakes%") | + c.getLocation().hasLocationInfo(file.getName(), result, _, _, _) + ) +} + +predicate pyflakes_commented(AssignStmt assignment) { + exists(Location loc | + assignment.getLocation() = loc and + loc.getStartLine() = pyflakes_commented_line(loc.getFile())) +} + +predicate side_effecting_lhs(Attribute lhs) { + exists(ClassObject cls, ClassObject decl | + lhs.getObject().refersTo(_, cls, _) and + decl = cls.getAnImproperSuperType() and + not decl.isBuiltin() | + decl.declaresAttribute("__setattr__") + ) +} + +from AssignStmt a, Expr left, Expr right +where assignment(a, left, right) + and same_value(left, right) + and not pyflakes_commented(a) and + not side_effecting_lhs(left) +select a, "This assignment assigns a variable to itself." diff --git a/python/ql/src/Statements/ShouldUseWithStatement.py b/python/ql/src/Statements/ShouldUseWithStatement.py new file mode 100644 index 00000000000..958905d0b82 --- /dev/null +++ b/python/ql/src/Statements/ShouldUseWithStatement.py @@ -0,0 +1,10 @@ + +f = open("filename") +try: # Method of ensuring file closure + f.write(...) +finally: + f.close() + + +with open("filename") as f: # Simpler method of ensuring file closure + f.write(...) \ No newline at end of file diff --git a/python/ql/src/Statements/ShouldUseWithStatement.qhelp b/python/ql/src/Statements/ShouldUseWithStatement.qhelp new file mode 100644 index 00000000000..e024ecef406 --- /dev/null +++ b/python/ql/src/Statements/ShouldUseWithStatement.qhelp @@ -0,0 +1,37 @@ + + + + +

    The with statement was introduced by PEP343 to allow standard uses of +try-finally statements to be factored out. Using this simplification makes code easier +to read.

    + +
    + +

    Review the code and determine whether or not the try-finally is used only to ensure +that a resource is closed. If the only purpose is to ensure that a resource is closed, then replace +the try-finally statement with a with statement.

    + +
    + +

    The following code shows examples of different ways of ensuring that a file is always closed, even +when an error is generated. In the second example, the try-finally block is replaced by +a simpler with statement.

    + + + + +
    + + +
  • Python Language Reference: The with +statement.
  • +
  • Python Standard Library: Context manager +.
  • +
  • Python PEP 343: The "with" Statement.
  • + + +
    +
    diff --git a/python/ql/src/Statements/ShouldUseWithStatement.ql b/python/ql/src/Statements/ShouldUseWithStatement.ql new file mode 100644 index 00000000000..06b3b762db0 --- /dev/null +++ b/python/ql/src/Statements/ShouldUseWithStatement.ql @@ -0,0 +1,37 @@ +/** + * @name Should use a 'with' statement + * @description Using a 'try-finally' block to ensure only that a resource is closed makes code more + * difficult to read. + * @kind problem + * @tags maintainability + * readability + * convention + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/should-use-with + */ + +import python + + +predicate calls_close(Call c) { + exists (Attribute a | c.getFunc() = a and a.getName() = "close") +} + +predicate +only_stmt_in_finally(Try t, Call c) { + exists(ExprStmt s | t.getAFinalstmt() = s and s.getValue() = c and strictcount(t.getAFinalstmt()) = 1) +} + +predicate points_to_context_manager(ControlFlowNode f, ClassObject cls) { + cls.isContextManager() and + forex(Object obj | f.refersTo(obj) | f.refersTo(obj, cls, _)) +} + +from Call close, Try t, ClassObject cls +where only_stmt_in_finally(t, close) and calls_close(close) and +exists(ControlFlowNode f | f = close.getFunc().getAFlowNode().(AttrNode).getObject() | + points_to_context_manager(f, cls)) +select close, "Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement.", cls, cls.getName() + diff --git a/python/ql/src/Statements/SideEffectInAssert.py b/python/ql/src/Statements/SideEffectInAssert.py new file mode 100644 index 00000000000..4aba0adc8de --- /dev/null +++ b/python/ql/src/Statements/SideEffectInAssert.py @@ -0,0 +1 @@ +assert(subprocess.call(['run-backup']) == 0) diff --git a/python/ql/src/Statements/SideEffectInAssert.qhelp b/python/ql/src/Statements/SideEffectInAssert.qhelp new file mode 100644 index 00000000000..0f7a3bfa4b3 --- /dev/null +++ b/python/ql/src/Statements/SideEffectInAssert.qhelp @@ -0,0 +1,37 @@ + + + + + +

    All code defined in assert statements is ignored when optimization is +requested, that is, the program is run with the -O flag. +If an assert statement has any side-effects then the behavior of +the program changes when optimization is requested.

    + +
    + + +

    Move all expressions with side-effects out of assert statements.

    + +
    + +

    +In the example, the exit code from subprocess.call() is checked against 0, but the entire +expression is called from within an assert statement. If the code is ever run, then the +not only the assertion itself, but also the external call, will be discarded. It is better to save the result +of subprocess.call() to a temporary variable, and to assert that variable to be 0. +

    + + + +
    + + +
  • Python Language Reference: The assert statement.
  • +
  • TutorialsPoint, Python Programming: Assertions in Python.
  • + + +
    +
    diff --git a/python/ql/src/Statements/SideEffectInAssert.ql b/python/ql/src/Statements/SideEffectInAssert.ql new file mode 100644 index 00000000000..e62685b7c33 --- /dev/null +++ b/python/ql/src/Statements/SideEffectInAssert.ql @@ -0,0 +1,37 @@ +/** + * @name An assert statement has a side-effect + * @description Side-effects in assert statements result in differences between normal + * and optimized behavior. + * @kind problem + * @tags reliability + * maintainability + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/side-effect-in-assert + */ + +import python + +predicate func_with_side_effects(Expr e) { + exists(string name | + name = ((Attribute)e).getName() or name = ((Name)e).getId() | + name = "print" or name = "write" or name = "append" or + name = "pop" or name = "remove" or name = "discard" or + name = "delete" or name = "close" or name = "open" or + name = "exit" + ) +} + +predicate probable_side_effect(Expr e) { + // Only consider explicit yields, not artificial ones in comprehensions + e instanceof Yield and not exists(Comp c | c.contains(e)) + or + e instanceof YieldFrom + or + e instanceof Call and func_with_side_effects(((Call)e).getFunc()) +} + +from Assert a, Expr e +where probable_side_effect(e) and a.contains(e) +select a, "This 'assert' statement contains $@ which may have side effects.", e, "an expression" diff --git a/python/ql/src/Statements/StatementNoEffect.py b/python/ql/src/Statements/StatementNoEffect.py new file mode 100644 index 00000000000..c22c921a1b9 --- /dev/null +++ b/python/ql/src/Statements/StatementNoEffect.py @@ -0,0 +1,4 @@ + +def increment_and_show(x): + ++x + x.show diff --git a/python/ql/src/Statements/StatementNoEffect.qhelp b/python/ql/src/Statements/StatementNoEffect.qhelp new file mode 100644 index 00000000000..0fd1207fa12 --- /dev/null +++ b/python/ql/src/Statements/StatementNoEffect.qhelp @@ -0,0 +1,35 @@ + + + + + +

    An expression statement without side effects is just clutter. It confuses the reader and may have a slight impact on performance. +

    + +
    + +

    First determine what the intention of the code was, if there is no intention of a side effect, then just delete the statement. +However, it is probable that there is a mistake in the code and some effect was intended. +

    +

    +This query will not flag a statement consisting solely of a string as having no side effect, as these are often used as comments. +If you want to use strings as comments, the most common convention is to use triple quoted strings rather than single quoted ones. +Although consistency is more important than conforming to any particular style. +

    + +
    + + +

    In this example neither line of the increment_and_show() function has any effect. +

    +The first line, ++x, has no effect as it applies the unary plus operator twice. Probably the programmer intended x += 1 +

    +

    +The second line, x.show, has no observable effect, but it is likely that x.show() was intended. +

    + + +
    +
    diff --git a/python/ql/src/Statements/StatementNoEffect.ql b/python/ql/src/Statements/StatementNoEffect.ql new file mode 100644 index 00000000000..b0fcd05b3f1 --- /dev/null +++ b/python/ql/src/Statements/StatementNoEffect.ql @@ -0,0 +1,120 @@ +/** + * @name Statement has no effect + * @description A statement has no effect + * @kind problem + * @tags maintainability + * useless-code + * external/cwe/cwe-561 + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/ineffectual-statement + */ + +import python + +predicate understood_attribute(Attribute attr, ClassObject cls, ClassObject attr_cls) { + exists(string name | + attr.getName() = name | + attr.getObject().refersTo(_, cls, _) and + cls.attributeRefersTo(name, _, attr_cls, _) + ) +} + +/* Conservative estimate of whether attribute lookup has a side effect */ +predicate side_effecting_attribute(Attribute attr) { + exists(ClassObject cls, ClassObject attr_cls | + understood_attribute(attr, cls, attr_cls) and + side_effecting_descriptor_type(attr_cls) + ) +} + +predicate maybe_side_effecting_attribute(Attribute attr) { + not understood_attribute(attr, _, _) and not attr.refersTo(_) + or + side_effecting_attribute(attr) +} + +predicate side_effecting_descriptor_type(ClassObject descriptor) { + descriptor.isDescriptorType() and + /* Technically all descriptor gets have side effects, + * but some are indicative of a missing call and + * we want to treat them as having no effect. */ + not descriptor = thePyFunctionType() and + not descriptor = theStaticMethodType() and + not descriptor = theClassMethodType() +} + +/** Side effecting binary operators are rare, so we assume they are not + * side-effecting unless we know otherwise. + */ +predicate side_effecting_binary(Expr b) { + exists(Expr sub, string method_name | + sub = b.(BinaryExpr).getLeft() and + method_name = b.(BinaryExpr).getOp().getSpecialMethodName() + or + exists(Cmpop op | + b.(Compare).compares(sub, op, _) and + method_name = op.getSpecialMethodName() + ) + | + exists(ClassObject cls | + sub.refersTo(_, cls, _) and + cls.hasAttribute(method_name) + and + not exists(ClassObject declaring | + declaring.declaresAttribute(method_name) + and declaring = cls.getAnImproperSuperType() and + declaring.isBuiltin() and not declaring = theObjectType() + ) + ) + ) +} + +predicate is_notebook(File f) { + exists(Comment c | + c.getLocation().getFile() = f | + c.getText().regexpMatch("#\\s*.+\\s*") + ) +} + +/** Expression (statement) in a jupyter/ipython notebook */ +predicate in_notebook(Expr e) { + is_notebook(e.getScope().(Module).getFile()) +} + +FunctionObject assertRaises() { + exists(ModuleObject unittest, ClassObject testcase | + unittest.getName() = "unittest" and + testcase = unittest.getAttribute("TestCase") and + result = testcase.lookupAttribute("assertRaises") + ) +} + +/** Holds if expression `e` is in a `with` block that tests for exceptions being raised. */ +predicate in_raises_test(Expr e) { + exists(With w | + w.contains(e) and + w.getContextExpr() = assertRaises().getACall().getNode() + ) +} + +predicate no_effect(Expr e) { + not e instanceof StrConst and + not ((StrConst)e).isDocString() and + not e.hasSideEffects() and + forall(Expr sub | + sub = e.getASubExpression*() + | + not side_effecting_binary(sub) + and + not maybe_side_effecting_attribute(sub) + ) and + not in_notebook(e) and + not in_raises_test(e) +} + +from ExprStmt stmt +where no_effect(stmt.getValue()) +select stmt, "This statement has no effect." + diff --git a/python/ql/src/Statements/StringConcatenationInLoop.qhelp b/python/ql/src/Statements/StringConcatenationInLoop.qhelp new file mode 100644 index 00000000000..8d8f494ddd0 --- /dev/null +++ b/python/ql/src/Statements/StringConcatenationInLoop.qhelp @@ -0,0 +1,27 @@ + + + +

    If you concatenate strings in a loop then the time taken by the loop is quadratic in the number +of iterations.

    + +
    + + +

    Initialize an empty list before the start of the list. +During the loop append the substrings to the list. +At the end of the loop, convert the list to a string by using ''.join(list).

    + + +
    + + + +
  • Python Standard Library: The str.join method.
  • +
  • Python Frequently Asked Questions: +What is the most efficient way to concatenate many strings together?.
  • + + +
    +
    diff --git a/python/ql/src/Statements/StringConcatenationInLoop.ql b/python/ql/src/Statements/StringConcatenationInLoop.ql new file mode 100644 index 00000000000..5ca79a345a5 --- /dev/null +++ b/python/ql/src/Statements/StringConcatenationInLoop.ql @@ -0,0 +1,29 @@ +/** + * @name String concatenation in loop + * @description Concatenating strings in loops has quadratic performance. + * @kind problem + * @tags efficiency + * maintainability + * @problem.severity recommendation + * @sub-severity low + * @precision low + * @id py/string-concatenation-in-loop + */ + +import python + +predicate string_concat_in_loop(BinaryExpr b) { + b.getOp() instanceof Add + and + exists(SsaVariable d, SsaVariable u, BinaryExprNode add, ClassObject str_type | + add.getNode() = b and d = u.getAnUltimateDefinition() | + d.getDefinition().(DefinitionNode).getValue() = add and u.getAUse() = add.getAnOperand() and + add.getAnOperand().refersTo(_, str_type, _) and + (str_type = theBytesType() or str_type = theUnicodeType()) + ) +} + + +from BinaryExpr b, Stmt s +where string_concat_in_loop(b) and s.getASubExpression() = b +select s, "String concatenation in a loop is quadratic in the number of iterations." diff --git a/python/ql/src/Statements/SysExitUsed.py b/python/ql/src/Statements/SysExitUsed.py new file mode 100644 index 00000000000..679f37624cd --- /dev/null +++ b/python/ql/src/Statements/SysExitUsed.py @@ -0,0 +1,8 @@ +import sys + +def main(): + try: + process() + except Exception as ex: + print(ex) + sys.exit(1) diff --git a/python/ql/src/Statements/TopLevelPrint.py b/python/ql/src/Statements/TopLevelPrint.py new file mode 100644 index 00000000000..29d6682f291 --- /dev/null +++ b/python/ql/src/Statements/TopLevelPrint.py @@ -0,0 +1,15 @@ + +try: + import fast_system as system +except ImportError: + print ("Cannot import fast system, falling back on slow system") + import slow_system as system + +#Fixed version +import logging + +try: + import fast_system as system +except ImportError: + logging.info("Cannot import fast system, falling back on slow system") + import slow_system as system diff --git a/python/ql/src/Statements/TopLevelPrint.qhelp b/python/ql/src/Statements/TopLevelPrint.qhelp new file mode 100644 index 00000000000..c615f08df8b --- /dev/null +++ b/python/ql/src/Statements/TopLevelPrint.qhelp @@ -0,0 +1,33 @@ + + + + + +

    Using print statements in level scope may result in surprising output at import time. +This in turn means that other code cannot safely import the module in question if the program may only write +real output to standard out. +

    + +
    + + +

    Replace the print statements with calls to some form of logging function or use the warnings module.

    + +
    + +

    In the example, importing the module may cause a message to be printed, which may interfere with the operation of the program.

    + + + +
    + + +
  • Python Language Reference: The print statement.
  • +
  • Python Standard Library: The print function.
  • +
  • Python tutorial: Modules.
  • + + +
    +
    diff --git a/python/ql/src/Statements/TopLevelPrint.ql b/python/ql/src/Statements/TopLevelPrint.ql new file mode 100644 index 00000000000..cc56902cd62 --- /dev/null +++ b/python/ql/src/Statements/TopLevelPrint.ql @@ -0,0 +1,35 @@ +/** + * @name Use of a print statement at module level + * @description Using a print statement at module scope (except when guarded by if __name__ == '__main__') will cause surprising output when the module is imported. + * @kind problem + * @tags reliability + * maintainability + * convention + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/print-during-import + */ + +import python + + +predicate main_eq_name(If i) { + exists(Name n, StrConst m, Compare c | + i.getTest() = c and c.getLeft() = n and + c.getAComparator() = m and + n.getId() = "__name__" and + m.getText() = "__main__" + ) +} + +predicate is_print_stmt(Stmt s) { + s instanceof Print or + exists(ExprStmt e, Call c, Name n | e = s and c = e.getValue() and n = c.getFunc() and n.getId() = "print") +} + +from Stmt p +where is_print_stmt(p) and +exists(ModuleObject m | m.getModule() = p.getScope() and m.getKind() = "module") and +not exists(If i | main_eq_name(i) and i.getASubStatement().getASubStatement*() = p) +select p, "Print statement may execute during import." diff --git a/python/ql/src/Statements/UnnecessaryDelete.py b/python/ql/src/Statements/UnnecessaryDelete.py new file mode 100644 index 00000000000..6ddf2d68210 --- /dev/null +++ b/python/ql/src/Statements/UnnecessaryDelete.py @@ -0,0 +1,4 @@ +def unnecessary_delete(): + x = get_some_object() + do_calculation(x) + del x # This del statement is unnecessary diff --git a/python/ql/src/Statements/UnnecessaryDelete.qhelp b/python/ql/src/Statements/UnnecessaryDelete.qhelp new file mode 100644 index 00000000000..38a6ffe5d64 --- /dev/null +++ b/python/ql/src/Statements/UnnecessaryDelete.qhelp @@ -0,0 +1,34 @@ + + + +

    Passing a local variable to a del statement results in that +variable being removed from the local namespace. When exiting a function all +local variables are deleted, so it is unnecessary to explicitly delete variables +in such cases.

    + +
    + +

    Remove the del statement.

    + +
    + +

    + In the function below, the variable x is assigned a value that + is used for a calculation, and is then explicitly deleted before the + function exits. In this case, the delete statement can be removed without + changing the behavior of the function. +

    + + + +
    + + + + +
  • Python: The 'del' statement.
  • +
  • Python/C API Reference Manual: Reference counts.
  • +
    +
    diff --git a/python/ql/src/Statements/UnnecessaryDelete.ql b/python/ql/src/Statements/UnnecessaryDelete.ql new file mode 100644 index 00000000000..fbe196e9fc1 --- /dev/null +++ b/python/ql/src/Statements/UnnecessaryDelete.ql @@ -0,0 +1,33 @@ +/** + * @name Unnecessary delete statement in function + * @description Using a 'delete' statement to delete a local variable is + * unnecessary, because the variable is deleted automatically when + * the function exits. + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity warning + * @sub-severity low + * @precision high + * @id py/unnecessary-delete + */ + + +import python + +from Delete del, Expr e, Function f +where + f.getLastStatement() = del and + e = del.getATarget() and + f.containsInScope(e) and + not e instanceof Subscript and + not e instanceof Attribute and + not exists(Stmt s | s.(While).contains(del) or s.(For).contains(del)) and + /* False positive: calling `sys.exc_info` within a function results in a + reference cycle,and an explicit call to `del` helps break this cycle. */ + not exists(FunctionObject ex | + ex.hasLongName("sys.exc_info") and + ex.getACall().getScope() = f + ) +select del, "Unnecessary deletion of local variable $@ in function $@.", + e.getLocation(), e.toString(), f.getLocation(), f.getName() \ No newline at end of file diff --git a/python/ql/src/Statements/UnnecessaryElseClause.py b/python/ql/src/Statements/UnnecessaryElseClause.py new file mode 100644 index 00000000000..a5a374d07ca --- /dev/null +++ b/python/ql/src/Statements/UnnecessaryElseClause.py @@ -0,0 +1,21 @@ +def pointless_else(container): + for item in container: + if of_interest(item): + return item + else: + raise NotFoundException() + +def no_else(container): + for item in container: + if of_interest(item): + return item + raise NotFoundException() + +def with_break(container): + for item in container: + if of_interest(item): + found = item + break + else: + raise NotFoundException() + return found diff --git a/python/ql/src/Statements/UnnecessaryElseClause.qhelp b/python/ql/src/Statements/UnnecessaryElseClause.qhelp new file mode 100644 index 00000000000..454fb2a3b13 --- /dev/null +++ b/python/ql/src/Statements/UnnecessaryElseClause.qhelp @@ -0,0 +1,32 @@ + + + +

    The else clause of a loop (either a for or a while statement) executes immediately after the loop terminates normally. +If there is a break statement in the loop body, then the else clause is skipped. +If there is no break statement, then the else clause will always be executed after the loop, unless it exits with a return or raise. +Therefore, if there is no break statement in the loop body then the else clause can be replaced with unindented code.

    + +

    Generally the use of else clauses should be avoided where possible, as they are likely to be misunderstood.

    + +
    + +

    Replace the else clause with unindented code.

    + + +
    + +

    In this example, the pointless_else function contains a redundant else clause. +The else clause can be simplified, as shown in the no_else function, which has the same semantics, but has no else clause. +The third example function, with_break, shows a version where the else clause is necessary, as the break statement skips the else clause. +

    + +
    + + +
  • Python Language Reference: The while statement.
  • +
  • Python Tutorial: Break and continue statements, and else clauses on loops.
  • + +
    +
    diff --git a/python/ql/src/Statements/UnnecessaryElseClause.ql b/python/ql/src/Statements/UnnecessaryElseClause.ql new file mode 100644 index 00000000000..cfb93a7c0b7 --- /dev/null +++ b/python/ql/src/Statements/UnnecessaryElseClause.ql @@ -0,0 +1,22 @@ +/** + * @name Unnecessary 'else' clause in loop + * @description An 'else' clause in a 'for' or 'while' statement that does not contain a 'break' is redundant. + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/redundant-else + */ + +import python + +from Stmt loop, StmtList body, StmtList clause, string kind +where +(exists(For f | f = loop | clause = f.getOrelse() and body = f.getBody() and kind = "for") + or + exists(While w | w = loop | clause = w.getOrelse() and body = w.getBody() and kind = "while") +) +and not exists(Break b | body.contains(b)) +select loop, "This '" + kind + "' statement has a redundant 'else' as no 'break' is present in the body." diff --git a/python/ql/src/Statements/UnnecessaryPass.qhelp b/python/ql/src/Statements/UnnecessaryPass.qhelp new file mode 100644 index 00000000000..1623ac8714d --- /dev/null +++ b/python/ql/src/Statements/UnnecessaryPass.qhelp @@ -0,0 +1,21 @@ + + + +

    A pass statement is only necessary when it is the only statement in a block (the +list of statements forming part of a compound statement). This is because the purpose of the +pass statement is to allow empty blocks where they would otherwise be syntactically invalid. +If the block already contains other statements then the pass statement is unnecessary.

    + +
    + +

    Remove the pass statement.

    + +
    + + +
  • Python: pass.
  • + +
    +
    diff --git a/python/ql/src/Statements/UnnecessaryPass.ql b/python/ql/src/Statements/UnnecessaryPass.ql new file mode 100644 index 00000000000..d98aa947236 --- /dev/null +++ b/python/ql/src/Statements/UnnecessaryPass.ql @@ -0,0 +1,33 @@ +/** + * @name Unnecessary pass + * @description Unnecessary 'pass' statement + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/unnecessary-pass + */ + +import python + +predicate is_doc_string(ExprStmt s) { + s.getValue() instanceof Unicode or s.getValue() instanceof Bytes +} + +predicate has_doc_string(StmtList stmts) { + stmts.getParent() instanceof Scope + and + is_doc_string(stmts.getItem(0)) +} + +from Pass p, StmtList list +where list.getAnItem() = p and +( + strictcount(list.getAnItem()) = 2 and not has_doc_string(list) + or + strictcount(list.getAnItem()) > 2 +) +select p, "Unnecessary 'pass' statement." + diff --git a/python/ql/src/Statements/UnreachableCode.py b/python/ql/src/Statements/UnreachableCode.py new file mode 100644 index 00000000000..e3a4fbd47c2 --- /dev/null +++ b/python/ql/src/Statements/UnreachableCode.py @@ -0,0 +1,5 @@ +import math + +def my_div(x, y): + return math.floor(x / y) + remainder = x - math.floor(x / y) * y diff --git a/python/ql/src/Statements/UnreachableCode.qhelp b/python/ql/src/Statements/UnreachableCode.qhelp new file mode 100644 index 00000000000..38fc0de5821 --- /dev/null +++ b/python/ql/src/Statements/UnreachableCode.qhelp @@ -0,0 +1,27 @@ + + + +

    Unreachable code makes the code more difficult to understand and may slow down loading of modules.

    + +
    + +

    Deleting the unreachable code will make the code clearer and preserve the meaning of the code. +However, it is possible that the original intention was that the code should execute and that it is +unreachable signifies some other error.

    + +
    + +

    In this example the assignment to remainder is never reached because there is a +return statement on the previous line.

    + + + +
    + + +
  • Wikipedia: Unreachable Code.
  • + +
    +
    diff --git a/python/ql/src/Statements/UnreachableCode.ql b/python/ql/src/Statements/UnreachableCode.ql new file mode 100644 index 00000000000..8fa8cb7f9e0 --- /dev/null +++ b/python/ql/src/Statements/UnreachableCode.ql @@ -0,0 +1,49 @@ +/** + * @name Unreachable code + * @description Code is unreachable + * @kind problem + * @tags maintainability + * useless-code + * external/cwe/cwe-561 + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/unreachable-statement + */ + +import python + +predicate typing_import(ImportingStmt is) { + exists(Module m | + is.getScope() = m and + exists(TypeHintComment tc | + tc.getLocation().getFile() = m.getFile() + ) + ) +} + +/** Holds if `s` contains the only `yield` in scope */ +predicate unique_yield(Stmt s) { + exists(Yield y | s.contains(y)) and + exists(Function f | + f = s.getScope() and + strictcount(Yield y | f.containsInScope(y)) = 1 + ) +} + +predicate reportable_unreachable(Stmt s) { + s.isUnreachable() and + not typing_import(s) and + not exists(Stmt other | other.isUnreachable() | + other.contains(s) + or + exists(StmtList l, int i, int j | + l.getItem(i) = other and l.getItem(j) = s and i < j + ) + ) and + not unique_yield(s) +} + +from Stmt s +where reportable_unreachable(s) +select s, "Unreachable statement." diff --git a/python/ql/src/Statements/UnusedExceptionObject.py b/python/ql/src/Statements/UnusedExceptionObject.py new file mode 100644 index 00000000000..3fc8a456bd3 --- /dev/null +++ b/python/ql/src/Statements/UnusedExceptionObject.py @@ -0,0 +1,16 @@ + +def do_action_forgotten_raise(action): + if action == "go": + start() + elif action == "stop": + stop() + else: + ValueError(action) + +def do_action(action): + if action == "go": + start() + elif action == "stop": + stop() + else: + raise ValueError(action) diff --git a/python/ql/src/Statements/UnusedExceptionObject.qhelp b/python/ql/src/Statements/UnusedExceptionObject.qhelp new file mode 100644 index 00000000000..63fca0cf8a4 --- /dev/null +++ b/python/ql/src/Statements/UnusedExceptionObject.qhelp @@ -0,0 +1,25 @@ + + + + + +

    Creating a new exception object is no different from creating any other object. The exception needs to be raised to have an effect. +

    + +
    + +

    Insert a raise before the exception. +

    + +
    + + +

    In this example, the first function do_action_forgotten_raise() silently ignores any erroneous input. +Whereas, the second function do_action correctly raises an exception if the 'action' is not understood. +

    + + +
    +
    diff --git a/python/ql/src/Statements/UnusedExceptionObject.ql b/python/ql/src/Statements/UnusedExceptionObject.ql new file mode 100644 index 00000000000..be848ad69c3 --- /dev/null +++ b/python/ql/src/Statements/UnusedExceptionObject.ql @@ -0,0 +1,19 @@ +/** + * @name Unused exception object + * @description An exception object is created, but is not used. + * @kind problem + * @tags reliability + * maintainability + * @problem.severity error + * @sub-severity low + * @precision very-high + * @id py/unused-exception-object + */ + +import python + +from Call call, ClassObject ex +where call.getFunc().refersTo(ex) and ex.getAnImproperSuperType() = theExceptionType() +and exists(ExprStmt s | s.getValue() = call) + +select call, "Instantiating an exception, but not raising it, has no effect" diff --git a/python/ql/src/Statements/UseOfExit.qhelp b/python/ql/src/Statements/UseOfExit.qhelp new file mode 100644 index 00000000000..004461513e7 --- /dev/null +++ b/python/ql/src/Statements/UseOfExit.qhelp @@ -0,0 +1,35 @@ + + + + +

    The exit and quit "functions" are actually site.Quitter objects and +are loaded, at interpreter start up, from site.py. +However, if the interpreter is started with the -S flag, or a custom site.py +is used then exit and quit may not be present. +

    + +
    + +

    Replace uses of exit() and quit() with sys.exit() which is +built into the interpreter and is guaranteed to be present.

    + +
    + +

    In this example, exit() is used and will fail if the interpreter is passed the -S option.

    + + + +

    In this example, sys.exit() is used and will behave the same regardless of the interpreter options.

    + + + +
    + + +
  • Python Documentation: Command line and environment.
  • +
  • Python Documentation: Site-specific configuration hook.
  • + +
    +
    diff --git a/python/ql/src/Statements/UseOfExit.ql b/python/ql/src/Statements/UseOfExit.ql new file mode 100644 index 00000000000..3c21be6b1de --- /dev/null +++ b/python/ql/src/Statements/UseOfExit.ql @@ -0,0 +1,16 @@ +/** + * @name Use of exit() or quit() + * @description exit() or quit() may fail if the interpreter is run with the -S option. + * @kind problem + * @tags maintainability + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/use-of-exit-or-quit + */ + +import python + +from CallNode call, string name +where call.getFunction().refersTo(quitterObject(name)) +select call, "The '" + name + "' site.Quitter object may not exist if the 'site' module is not loaded or is modified." diff --git a/python/ql/src/Testing/ImpreciseAssert.py b/python/ql/src/Testing/ImpreciseAssert.py new file mode 100644 index 00000000000..08e083ff0bb --- /dev/null +++ b/python/ql/src/Testing/ImpreciseAssert.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +class MyTest(TestCase): + + + def testInts(self): + self.assertTrue(1 == 1) + self.assertFalse(1 > 2) + self.assertTrue(1 in []) #This will fail diff --git a/python/ql/src/Testing/ImpreciseAssert.qhelp b/python/ql/src/Testing/ImpreciseAssert.qhelp new file mode 100644 index 00000000000..2cf9aa3291c --- /dev/null +++ b/python/ql/src/Testing/ImpreciseAssert.qhelp @@ -0,0 +1,37 @@ + + + + + +

    The class unittest.TestCase provides a range of assertion methods. As well as the general forms assertTrue() and assertFalse() +more specific forms such as assertGreaterEquals() and assertNotIn() are provided. +By using the more specific forms it is possible to get more precise and informative failure messages in the event of a test failing. This can speed up the debugging process. +

    +
    + + +

    Replace all calls to assertTrue() and assertFalse() that do not provide a custom failure message with a more specific variant. +Alternatively, provide a tailored failure message using the assertTrue(condition, message) form. +

    +
    + + +

    In this example, assertTrue() and assertFalse() are used.

    + +

    +This will make it more difficult to determine what has gone wrong when self.assertTrue(1 in []) fails. +The failure message "AssertionError: False is not true" is not very helpful. +

    + +

    A more useful error message can be generated by changing the asserts to the more specific forms as in the following example.

    + +

    In this case, the failure message "AssertionError: 1 not found in []" is much more informative.

    +
    + + +
  • Python library reference: TestCase.assertEqual.
  • +
    + +
    diff --git a/python/ql/src/Testing/ImpreciseAssert.ql b/python/ql/src/Testing/ImpreciseAssert.ql new file mode 100644 index 00000000000..589d1c045d5 --- /dev/null +++ b/python/ql/src/Testing/ImpreciseAssert.ql @@ -0,0 +1,101 @@ +/** + * @name Imprecise assert + * @description Using 'assertTrue' or 'assertFalse' rather than a more specific assertion can give uninformative failure messages. + * @kind problem + * @tags maintainability + * testability + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/imprecise-assert + */ + +import python + +/* Helper predicate for CallToAssertOnComparison class */ +predicate callToAssertOnComparison(Call call, string assertName, Cmpop op) { + call.getFunc().(Attribute).getName() = assertName + and + (assertName = "assertTrue" or assertName = "assertFalse") + and + exists(Compare cmp | + cmp = call.getArg(0) and + /* Exclude complex comparisons like: a < b < c */ + not exists(cmp.getOp(1)) and + op = cmp.getOp(0) + ) +} + +class CallToAssertOnComparison extends Call { + + CallToAssertOnComparison() { + callToAssertOnComparison(this, _, _) + } + + Cmpop getOperator() { + callToAssertOnComparison(this, _, result) + } + + string getMethodName() { + callToAssertOnComparison(this, result, _) + } + + string getBetterName() { + exists(Cmpop op | + callToAssertOnComparison(this, "assertTrue", op) and + ( + op instanceof Eq and result = "assertEqual" + or + op instanceof NotEq and result = "assertNotEqual" + or + op instanceof Lt and result = "assertLess" + or + op instanceof LtE and result = "assertLessEqual" + or + op instanceof Gt and result = "assertGreater" + or + op instanceof GtE and result = "assertGreaterEqual" + or + op instanceof In and result = "assertIn" + or + op instanceof NotIn and result = "assertNotIn" + or + op instanceof Is and result = "assertIs" + or + op instanceof IsNot and result = "assertIsNot" + ) + or + callToAssertOnComparison(this, "assertFalse", op) and + ( + op instanceof NotEq and result = "assertEqual" + or + op instanceof Eq and result = "assertNotEqual" + or + op instanceof GtE and result = "assertLess" + or + op instanceof Gt and result = "assertLessEqual" + or + op instanceof LtE and result = "assertGreater" + or + op instanceof Lt and result = "assertGreaterEqual" + or + op instanceof NotIn and result = "assertIn" + or + op instanceof In and result = "assertNotIn" + or + op instanceof IsNot and result = "assertIs" + or + op instanceof Is and result = "assertIsNot" + ) + ) + } + +} + + +from CallToAssertOnComparison call +where + /* Exclude cases where an explicit message is provided*/ + not exists(call.getArg(1)) +select call, call.getMethodName() + "(a " + call.getOperator().getSymbol() + " b) " + + "cannot provide an informative message. Using " + call.getBetterName() + "(a, b) instead will give more informative messages." diff --git a/python/ql/src/Testing/ImpreciseAssert2.py b/python/ql/src/Testing/ImpreciseAssert2.py new file mode 100644 index 00000000000..a2b250b18d3 --- /dev/null +++ b/python/ql/src/Testing/ImpreciseAssert2.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +class MyTest(TestCase): + + + def testInts(self): + self.assertEqual(1, 1) + self.assertLessEqual(1, 2) + self.assertIn(1, []) #This will fail diff --git a/python/ql/src/Testing/Mox.qll b/python/ql/src/Testing/Mox.qll new file mode 100644 index 00000000000..273193298a2 --- /dev/null +++ b/python/ql/src/Testing/Mox.qll @@ -0,0 +1,18 @@ +import python + +/** Whether `mox` or `.StubOutWithMock()` is used in thin module `m`. + */ +predicate useOfMoxInModule(Module m) { + exists(ModuleObject mox | + mox.getName() = "mox" or mox.getName() = "mox3.mox" | + exists(ControlFlowNode use | + use.refersTo(mox) and + use.getScope().getEnclosingModule() = m + ) + ) + or + exists(Call call| + call.getFunc().(Attribute).getName() = "StubOutWithMock" and + call.getEnclosingModule() = m + ) +} diff --git a/python/ql/src/Variables/Definition.qll b/python/ql/src/Variables/Definition.qll new file mode 100644 index 00000000000..0f0fc7f730b --- /dev/null +++ b/python/ql/src/Variables/Definition.qll @@ -0,0 +1,160 @@ +import python + + +/** + * A control-flow node that defines a variable + */ +class Definition extends NameNode, DefinitionNode { + + /** + * The variable defined by this control-flow node. + */ + Variable getVariable() { + this.defines(result) + } + + /** + * The SSA variable corresponding to the current definition. Since SSA variables + * are only generated for definitions with at least one use, not all definitions + * will have an SSA variable. + */ + SsaVariable getSsaVariable() { + result.getDefinition() = this + } + + /** + * The index of this definition in its basic block. + */ + private int indexInBB(BasicBlock bb, Variable v) { + v = this.getVariable() and + this = bb.getNode(result) + } + + /** + * The rank of this definition among other definitions of the same variable + * in its basic block. The first definition will have rank 1, and subsequent + * definitions will have sequentially increasing ranks. + */ + private int rankInBB(BasicBlock bb, Variable v) { + exists(int defIdx | defIdx = this.indexInBB(bb, v) | + defIdx = rank[result](int idx, Definition def | idx = def.indexInBB(bb, v) | idx) + ) + } + + /** Is this definition the first in its basic block for its variable? */ + predicate isFirst() { + this.rankInBB(_, _) = 1 + } + + /** Is this definition the last in its basic block for its variable? */ + predicate isLast() { + exists(BasicBlock b, Variable v | + this.rankInBB(b, v) = max(Definition other | any() | other.rankInBB(b, v)) + ) + } + + /** + * Is this definition unused? A definition is unused if the value it provides + * is not read anywhere. + */ + predicate isUnused() { + // SSA variables only exist for definitions that have at least one use. + not exists(this.getSsaVariable()) and + // If a variable is used in a foreign scope, all bets are off. + not this.getVariable().escapes() and + // Global variables don't have SSA variables unless the scope is global. + this.getVariable().getScope() = this.getScope() and + // A call to locals() or vars() in the variable scope counts as a use + not exists(Function f, Call c, string locals_or_vars | + c.getScope() = f and this.getScope() = f and + c.getFunc().(Name).getId() = locals_or_vars | + locals_or_vars = "locals" or locals_or_vars = "vars" + ) + } + + /** + * An immediate re-definition of this definition's variable. + */ + Definition getARedef() { + result != this and + exists(Variable var | var = this.getVariable() and var = result.getVariable() | + // Definitions in different basic blocks. + this.isLast() and + reaches_without_redef(var, this.getBasicBlock(), result.getBasicBlock()) and + result.isFirst() + ) + or + // Definitions in the same basic block. + exists(BasicBlock common, Variable var | + this.rankInBB(common, var) + 1 = result.rankInBB(common, var) + ) + } + + /** + * We only consider assignments as potential alert targets, not parameters + * and imports and other name-defining constructs. + * We also ignore anything named "_", "empty", "unused" or "dummy" + */ + predicate isRelevant() { + exists(AstNode p | + p = this.getNode().getParentNode() | + p instanceof Assign or p instanceof AugAssign or p instanceof Tuple + ) + and + not name_acceptable_for_unused_variable(this.getVariable()) + and + /* Decorated classes and functions are used */ + not exists(this.getNode().getParentNode().(FunctionDef).getDefinedFunction().getADecorator()) + and + not exists(this.getNode().getParentNode().(ClassDef).getDefinedClass().getADecorator()) + } + +} + +/** + * Check whether basic block `a` reaches basic block `b` without an intervening + * definition of variable `v`. The relation is not transitive by default, so any + * observed transitivity will be caused by loops in the control-flow graph. + */ +private +predicate reaches_without_redef(Variable v, BasicBlock a, BasicBlock b) { + exists(Definition def | a.getASuccessor() = b | + def.getBasicBlock() = a and def.getVariable() = v and maybe_redefined(v) + ) or + exists(BasicBlock mid | reaches_without_redef(v, a, mid) | + not exists(NameNode cfn | cfn.defines(v) | + cfn.getBasicBlock() = mid + ) and + mid.getASuccessor() = b + ) +} + +private predicate maybe_redefined(Variable v) { + strictcount(Definition d | d.defines(v)) > 1 +} + +predicate name_acceptable_for_unused_variable(Variable var) { + exists(string name | + var.getId() = name | + name.regexpMatch("_+") or name = "empty" or + name.matches("%unused%") or name = "dummy" or + name.regexpMatch("__.*") + ) +} + + +class ListComprehensionDeclaration extends ListComp { + + Name getALeakedVariableUse() { + major_version() = 2 and + this.getIterationVariable(_).getId() = result.getId() and + result.getScope() = this.getScope() and + this.getAFlowNode().strictlyReaches(result.getAFlowNode()) and + result.isUse() + } + + Name getDefinition() { + result = this.getIterationVariable(0).getAStore() + } + +} diff --git a/python/ql/src/Variables/Global.qhelp b/python/ql/src/Variables/Global.qhelp new file mode 100644 index 00000000000..31df579a6de --- /dev/null +++ b/python/ql/src/Variables/Global.qhelp @@ -0,0 +1,20 @@ + + + +

    The use of the global keyword enables functions to modify variables outside of their scope. +These functions may then include side effects that may not be apparent to users +of that function, making the code harder to understand.

    +
    + + +

    Remove the global statement, if possible.

    +
    + + + +
  • Python Language Reference: The global statement.
  • + +
    +
    diff --git a/python/ql/src/Variables/Global.ql b/python/ql/src/Variables/Global.ql new file mode 100644 index 00000000000..8adbd06bcf5 --- /dev/null +++ b/python/ql/src/Variables/Global.ql @@ -0,0 +1,18 @@ +/** + * @name Use of the 'global' statement. + * @description Use of the 'global' statement may indicate poor modularity. + * @kind problem + * @problem.severity recommendation + * @sub-severity low + * @deprecated + * @precision very-high + * @id py/use-of-global + */ + +import python + +from Global g +where not g.getScope() instanceof Module +select g, "Updating global variables except at module initialization is discouraged" + + diff --git a/python/ql/src/Variables/GlobalAtModuleLevel.qhelp b/python/ql/src/Variables/GlobalAtModuleLevel.qhelp new file mode 100644 index 00000000000..a0c1c7e673b --- /dev/null +++ b/python/ql/src/Variables/GlobalAtModuleLevel.qhelp @@ -0,0 +1,20 @@ + + + +

    The global statement is used to specify that assignments to that name are assignments to the +variable in the global (module) scope, rather than in the local scope. +At the module level, this statement is redundant because the local scope and global scope are the same.

    + +
    + +

    Remove the global statement.

    + +
    + + +
  • Python Language Reference: The global statement.
  • + +
    +
    diff --git a/python/ql/src/Variables/GlobalAtModuleLevel.ql b/python/ql/src/Variables/GlobalAtModuleLevel.ql new file mode 100644 index 00000000000..f3dc9e21440 --- /dev/null +++ b/python/ql/src/Variables/GlobalAtModuleLevel.ql @@ -0,0 +1,17 @@ +/** + * @name Use of 'global' at module level + * @description Use of the 'global' statement at module level + * @kind problem + * @tags maintainability + * useless-code + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/redundant-global-declaration + */ + +import python + +from Global g +where g.getScope() instanceof Module +select g, "Declaring '" + g.getAName() + "' as global at module-level is redundant." \ No newline at end of file diff --git a/python/ql/src/Variables/LeakingListComprehension.py b/python/ql/src/Variables/LeakingListComprehension.py new file mode 100644 index 00000000000..11b876016bd --- /dev/null +++ b/python/ql/src/Variables/LeakingListComprehension.py @@ -0,0 +1,7 @@ + +def two_or_three(): + x = 3 + [0 for x in range(3)] + return x # Will return 2 in Python 2 and 3 in Python 3. + +print(two_or_three()) \ No newline at end of file diff --git a/python/ql/src/Variables/LeakingListComprehension.qhelp b/python/ql/src/Variables/LeakingListComprehension.qhelp new file mode 100644 index 00000000000..17b5a097625 --- /dev/null +++ b/python/ql/src/Variables/LeakingListComprehension.qhelp @@ -0,0 +1,41 @@ + + + + + +

    In Python 2 list comprehensions are evaluated in the enclosing scope, which means that the iteration variable of a list comprehension is visible +outside of the list comprehension. In Python 3 the iteration variable is no longer visible in the enclosing scope. +

    + +

    +Code that uses the value of a list comprehension iteration variable after the list comprehension has finished will +behave differently under Python 2 and Python 3. +

    + +
    + +

    Explicitly set the variable in the outer scope to the value that it would have held when run under Python 2. +Then rename the list comprehension variable for additional clarity. +

    + +
    + +

    In this example, x is initially assigned the value of 3. +In Python 3, x will be unchanged as the list comprehension is evaluated in its own scope. +In Python 2, evaluation of the list comprehension occurs in the scope of two_or_three, setting x to 2.

    + + +

    The following example is the same code as above, but the list comprehension variable is renamed to ensure it does not overwrite x.

    + + +
    + + +
  • Python Tutorial: List Comprehensions.
  • +
  • The History of Python: From List Comprehensions to Generator Expressions.
  • +
  • Python Language Reference: List displays.
  • + +
    +
    diff --git a/python/ql/src/Variables/LeakingListComprehension.ql b/python/ql/src/Variables/LeakingListComprehension.ql new file mode 100644 index 00000000000..efec82af4ad --- /dev/null +++ b/python/ql/src/Variables/LeakingListComprehension.ql @@ -0,0 +1,30 @@ +/** + * @name List comprehension variable used in enclosing scope + * @description Using the iteration variable of a list comprehension in the enclosing scope will result in different behavior between Python 2 and 3 and is confusing. + * @kind problem + * @tags portability + * correctness + * @problem.severity warning + * @sub-severity high + * @precision very-high + * @id py/leaking-list-comprehension + */ + +import python +import Definition + +from ListComprehensionDeclaration l, Name use, Name defn +where + use = l.getALeakedVariableUse() and + defn = l.getDefinition() and + l.getAFlowNode().strictlyReaches(use.getAFlowNode()) and + /* Make sure we aren't in a loop, as the variable may be redefined */ + not use.getAFlowNode().strictlyReaches(l.getAFlowNode()) and + not l.contains(use) and + not use.deletes(_) and + not exists(SsaVariable v | + v.getAUse() = use.getAFlowNode() and + not v.getDefinition().strictlyDominates(l.getAFlowNode()) + ) + +select use, use.getId() + " may have a different value in Python 3, as the $@ will not be in scope.", defn, "list comprehension variable" diff --git a/python/ql/src/Variables/LeakingListComprehensionFixed.py b/python/ql/src/Variables/LeakingListComprehensionFixed.py new file mode 100644 index 00000000000..e9cd52363be --- /dev/null +++ b/python/ql/src/Variables/LeakingListComprehensionFixed.py @@ -0,0 +1,7 @@ + +def just_three(): + x = 3 + [0 for y in range(3)] + return x # Will return always return 3. + +print(just_three()) \ No newline at end of file diff --git a/python/ql/src/Variables/Loop.qll b/python/ql/src/Variables/Loop.qll new file mode 100644 index 00000000000..f3b105463ac --- /dev/null +++ b/python/ql/src/Variables/Loop.qll @@ -0,0 +1,38 @@ +import python + + +private predicate empty_sequence(Expr e) { + exists(SsaVariable var | var.getAUse().getNode() = e | empty_sequence(var.getDefinition().getNode())) or + e instanceof List and not exists(e.(List).getAnElt()) or + e instanceof Tuple and not exists(e.(Tuple).getAnElt()) or + e.(StrConst).getText().length() = 0 +} + +/* This has the potential for refinement, but we err on the side of fewer false positives for now. */ +private predicate probably_non_empty_sequence(Expr e) { + not empty_sequence(e) +} + +/** A loop which probably defines v */ +private Stmt loop_probably_defines(Variable v) { + exists(Name defn | defn.defines(v) and result.contains(defn) | + probably_non_empty_sequence(result.(For).getIter()) + or + probably_non_empty_sequence(result.(While).getTest()) + ) +} + +/** Holds if the variable used by `use` is probably defined in a loop */ +predicate probably_defined_in_loop(Name use) { + exists(Stmt loop | + loop = loop_probably_defines(use.getVariable()) | + loop.getAFlowNode().strictlyReaches(use.getAFlowNode()) + ) +} + +/** Holds if `s` is a loop that probably executes at least once */ +predicate loop_probably_executes_at_least_once(Stmt s) { + probably_non_empty_sequence(s.(For).getIter()) + or + probably_non_empty_sequence(s.(While).getTest()) +} diff --git a/python/ql/src/Variables/LoopVariableCapture.py b/python/ql/src/Variables/LoopVariableCapture.py new file mode 100644 index 00000000000..4a6abcb8894 --- /dev/null +++ b/python/ql/src/Variables/LoopVariableCapture.py @@ -0,0 +1,18 @@ + +#Make a list of functions to increment their arguments by 0 to 9. +def make_incrementers(): + result = [] + for i in range(10): + def incrementer(x): + return x + i + result.append(incrementer) + return result + +#This will fail +def test(): + incs = make_incrementers() + for x in range(10): + for y in range(10): + assert incs[x](y) == x+y + +test() \ No newline at end of file diff --git a/python/ql/src/Variables/LoopVariableCapture.qhelp b/python/ql/src/Variables/LoopVariableCapture.qhelp new file mode 100644 index 00000000000..15f2b185eb9 --- /dev/null +++ b/python/ql/src/Variables/LoopVariableCapture.qhelp @@ -0,0 +1,60 @@ + + + + +

    +Nested functions are a useful feature of Python as it allows a function to access the variables of its enclosing function. +However, the programmer needs to be aware that when an inner function accesses a variable in an outer scope, +it is the variable that is captured, not the value of that variable. +

    +

    +Therefore, care must be taken when the captured variable is a loop variable, since it is the loop variable and +not the value of that variable that is captured. +This will mean that by the time that the inner function executes, +the loop variable will have its final value, not the value when the inner function was created. +

    + +
    + +

    +The simplest way to fix this problem is to add a local variable of the same name as the outer variable and initialize that +using the outer variable as a default. + +for var in seq: + ... + def inner_func(arg): + ... + use(var) + +becomes + +for var in seq: + ... + def inner_func(arg, var=var): + ... + use(var) + +

    + +
    + +

    +In this example, a list of functions is created which should each increment its argument by its index in the list. +However, since i will be 9 when the functions execute, they will each increment their argument by 9. +

    + +

    +This can be fixed by adding the default value as shown below. The default value is computed when the function is created, so the desired effect is achieved. +

    + + + +
    + +
  • The Hitchhiker’s Guide to Python: Late Binding Closures
  • +
  • Python Language Reference: Naming and binding
  • + +
    +
    diff --git a/python/ql/src/Variables/LoopVariableCapture.ql b/python/ql/src/Variables/LoopVariableCapture.ql new file mode 100644 index 00000000000..307da04861d --- /dev/null +++ b/python/ql/src/Variables/LoopVariableCapture.ql @@ -0,0 +1,47 @@ +/** + * @name Loop variable capture + * @description Capture of a loop variable is not the same as capturing the value of a loop variable, and may be erroneous. + * @kind problem + * @tags correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/loop-variable-capture + */ + +import python + +// Gets the scope of the iteration variable of the looping scope +Scope iteration_variable_scope(AstNode loop) { + result = loop.(For).getScope() + or + result = loop.(Comp).getFunction() +} + +predicate capturing_looping_construct(CallableExpr capturing, AstNode loop, Variable var) { + var.getScope() = iteration_variable_scope(loop) and + var.getAnAccess().getScope() = capturing.getInnerScope() and + capturing.getParentNode+() = loop and + ( + loop.(For).getTarget() = var.getAnAccess() + or + var = loop.(Comp).getAnIterationVariable() + ) +} + +predicate escaping_capturing_looping_construct(CallableExpr capturing, AstNode loop, Variable var) { + capturing_looping_construct(capturing, loop, var) + and + // Escapes if used out side of for loop or is a lambda in a comprehension + ( + exists(Expr e, For forloop | forloop = loop and e.refersTo(_, _, capturing) | not forloop.contains(e)) + or + loop.(Comp).getElt() = capturing + or + loop.(Comp).getElt().(Tuple).getAnElt() = capturing + ) +} + +from CallableExpr capturing, AstNode loop, Variable var +where escaping_capturing_looping_construct(capturing, loop, var) +select capturing, "Capture of loop variable '$@'", loop, var.getId() diff --git a/python/ql/src/Variables/LoopVariableCapture2.py b/python/ql/src/Variables/LoopVariableCapture2.py new file mode 100644 index 00000000000..e0b3db76b17 --- /dev/null +++ b/python/ql/src/Variables/LoopVariableCapture2.py @@ -0,0 +1,18 @@ + +#Make a list of functions to increment their arguments by 0 to 9. +def make_incrementers(): + result = [] + for i in range(10): + def incrementer(x, i=i): + return x + i + result.append(incrementer) + return result + +#This will pass +def test(): + incs = make_incrementers() + for x in range(10): + for y in range(10): + assert incs[x](y) == x+y + +test() \ No newline at end of file diff --git a/python/ql/src/Variables/MonkeyPatched.qll b/python/ql/src/Variables/MonkeyPatched.qll new file mode 100644 index 00000000000..5ee67edc0d1 --- /dev/null +++ b/python/ql/src/Variables/MonkeyPatched.qll @@ -0,0 +1,25 @@ +import python + + +predicate monkey_patched_builtin(string name) { + exists(AttrNode attr, SubscriptNode subscr, StrConst s | + subscr.isStore() and + subscr.getIndex().getNode() = s and + s.getText() = name and + subscr.getValue() = attr and + attr.getObject("__dict__").refersTo(theBuiltinModuleObject()) + ) + or + exists(CallNode call, ControlFlowNode bltn, StrConst s | + call.getArg(0) = bltn and + bltn.refersTo(theBuiltinModuleObject()) and + call.getArg(1).getNode() = s and + s.getText() = name and + call.getFunction().refersTo(builtin_object("setattr")) + ) + or + exists(AttrNode attr | + attr.isStore() and + attr.getObject(name).refersTo(theBuiltinModuleObject()) + ) +} diff --git a/python/ql/src/Variables/MultiplyDefined.py b/python/ql/src/Variables/MultiplyDefined.py new file mode 100644 index 00000000000..5d9ee98ca4c --- /dev/null +++ b/python/ql/src/Variables/MultiplyDefined.py @@ -0,0 +1,3 @@ +x = 42 +x = 12 +print x \ No newline at end of file diff --git a/python/ql/src/Variables/MultiplyDefined.qhelp b/python/ql/src/Variables/MultiplyDefined.qhelp new file mode 100644 index 00000000000..94200b44e14 --- /dev/null +++ b/python/ql/src/Variables/MultiplyDefined.qhelp @@ -0,0 +1,29 @@ + + + + + +

    Multiple assignments to a single variable without an intervening usage makes the first assignment redundant. +Its value is lost. +

    + +
    + +

    Ensure that the second assignment is in fact correct. +Then delete the first assignment (taking care not to delete right hand side if it has side effects).

    + +
    + +

    In this example, x is assigned the value of 42 but then the value is changed to 12 +before x is used. This makes the first assignment useless.

    + + +
    + + +
  • Python: Assignment statements.
  • + +
    +
    diff --git a/python/ql/src/Variables/MultiplyDefined.ql b/python/ql/src/Variables/MultiplyDefined.ql new file mode 100644 index 00000000000..14c95acb1fd --- /dev/null +++ b/python/ql/src/Variables/MultiplyDefined.ql @@ -0,0 +1,61 @@ +/** + * @name Variable defined multiple times + * @description Assignment to a variable occurs multiple times without any intermediate use of that variable + * @kind problem + * @tags maintainability + * useless-code + * external/cwe/cwe-563 + * @problem.severity warning + * @sub-severity low + * @precision very-high + * @id py/multiple-definition + */ + +import python +import Definition + +predicate multiply_defined(AstNode asgn1, AstNode asgn2, Variable v) { + /* Must be redefined on all possible paths in the CFG corresponding to the original source. + * For example, splitting may create a path where `def` is unconditionally redefined, even though + * it is not in the original source. */ + forex(Definition def, Definition redef | + def.getVariable() = v and + def = asgn1.getAFlowNode() and + redef = asgn2.getAFlowNode() | + def.isUnused() and + def.getARedef() = redef and + def.isRelevant() + ) +} + +predicate simple_literal(Expr e) { + e.(Num).getN() = "0" or + e instanceof NameConstant or + e instanceof List and not exists(e.(List).getAnElt()) or + e instanceof Tuple and not exists(e.(Tuple).getAnElt()) or + e instanceof Dict and not exists(e.(Dict).getAKey()) or + e.(StrConst).getText() = "" +} + +/** A multiple definition is 'uninteresting' if it sets a variable to a + * simple literal before reassigning it. + * x = None + * if cond: + * x = value1 + * else: + * x = value2 + */ +predicate uninteresting_definition(AstNode asgn1) { + exists(AssignStmt a | + a.getATarget() = asgn1 | + simple_literal(a.getValue()) + ) +} + + +from AstNode asgn1, AstNode asgn2, Variable v +where + multiply_defined(asgn1, asgn2, v) and + forall(Name el | el = asgn1.getParentNode().(Tuple).getAnElt() | multiply_defined(el, _, _)) and + not uninteresting_definition(asgn1) +select asgn1, "This assignment to '" + v.getId() + "' is unnecessary as it is redefined $@ before this value is used.", asgn2 as t, "here" diff --git a/python/ql/src/Variables/ShadowBuiltin.py b/python/ql/src/Variables/ShadowBuiltin.py new file mode 100644 index 00000000000..ab57e30d382 --- /dev/null +++ b/python/ql/src/Variables/ShadowBuiltin.py @@ -0,0 +1,8 @@ +def test(): + int = 1 # Variable should be renamed to avoid + def print_int(): # shadowing the int() built-in function + print int + print_int() + print int + +test() diff --git a/python/ql/src/Variables/ShadowBuiltin.qhelp b/python/ql/src/Variables/ShadowBuiltin.qhelp new file mode 100644 index 00000000000..ff6a02b6951 --- /dev/null +++ b/python/ql/src/Variables/ShadowBuiltin.qhelp @@ -0,0 +1,30 @@ + + + + +

    When a local variable is defined with the same name as a built-in type or function, the local +variable "shadows" or "hides" the built-in object. This can lead to +confusion as a reader of the code may expect the variable to refer to a built-in object. +

    + +
    + + +

    Change the name of the local variable so that it no longer matches the name of a built-in object. +

    + +
    + + + + + + + +
  • Python Standard Library: Built-in Functions, + Built-in Types.
  • + +
    +
    diff --git a/python/ql/src/Variables/ShadowBuiltin.ql b/python/ql/src/Variables/ShadowBuiltin.ql new file mode 100644 index 00000000000..8bf59411b91 --- /dev/null +++ b/python/ql/src/Variables/ShadowBuiltin.ql @@ -0,0 +1,64 @@ +/** + * @name Builtin shadowed by local variable + * @description Defining a local variable with the same name as a built-in object + * makes the built-in object unusable within the current scope and makes the code + * more difficult to read. + * @kind problem + * @tags maintainability + * readability + * @problem.severity recommendation + * @sub-severity low + * @precision medium + * @id py/local-shadows-builtin + */ + +import python +import Shadowing + +predicate white_list(string name) { + /* These are rarely used and thus unlikely to be confusing */ + name = "iter" or + name = "next" or + name = "input" or + name = "file" or + name = "apply" or + name = "slice" or + name = "buffer" or + name = "coerce" or + name = "intern" or + name = "exit" or + name = "quit" or + name = "license" or + /* These are short and/or hard to avoid */ + name = "dir" or + name = "id" or + name = "max" or + name = "min" or + name = "sum" or + name = "cmp" or + name = "chr" or + name = "ord" or + name = "bytes" or + name = "_" +} + +predicate shadows(Name d, string name, Scope scope, int line) { + exists(LocalVariable l | d.defines(l) and scope instanceof Function and + l.getId() = name and + exists(builtin_object(l.getId())) + ) and + d.getScope() = scope and + d.getLocation().getStartLine() = line and + not white_list(name) and + not optimizing_parameter(d) +} + +predicate first_shadowing_definition(Name d, string name) { + exists(int first, Scope scope | + shadows(d, name, scope, first) and + first = min(int line | shadows(_, name, scope, line))) +} + +from Name d, string name +where first_shadowing_definition(d, name) +select d, "Local variable " + name + " shadows a builtin variable." diff --git a/python/ql/src/Variables/ShadowGlobal.py b/python/ql/src/Variables/ShadowGlobal.py new file mode 100644 index 00000000000..672463f4bbb --- /dev/null +++ b/python/ql/src/Variables/ShadowGlobal.py @@ -0,0 +1,10 @@ +var = 2 # Global variable + +def test2(): + def print_var(): + var = 3 + print var # Local variable which "shadows" the global variable + print_var() # making it more difficult to determine which "var" + print var # is referenced + +test2() diff --git a/python/ql/src/Variables/ShadowGlobal.qhelp b/python/ql/src/Variables/ShadowGlobal.qhelp new file mode 100644 index 00000000000..52c88337570 --- /dev/null +++ b/python/ql/src/Variables/ShadowGlobal.qhelp @@ -0,0 +1,36 @@ + + + + +

    Python statements can access variables in both the local namespace and in the global namespace. +When a local and a global variable have the same name, the local variable "shadows" or "hides" the +global variable. When the variable is referenced, the variable with local scope is used unless you +explicitly use the global statement to reference the global variable. This can lead to +confusion as a reader of the code may expect the variable to refer to a global. +

    + +
    + + +

    Avoid using the same name for variables in local and global namespaces.

    + +
    + +

    The following simple example shows how a local variable can "shadow" a global variable. The local +variable should be renamed to make the code easier to interpret.

    + + + +
    + + +
  • J. Lusth, The Art and Craft of Programming - Python Edition, Section: Scope. University of Alabama, 2012. (Published online).
  • +
  • New Mexico Tech Computer Center: The global +statement: Declare access to a global name.
  • + + + +
    +
    diff --git a/python/ql/src/Variables/ShadowGlobal.ql b/python/ql/src/Variables/ShadowGlobal.ql new file mode 100644 index 00000000000..2bfb91e5a73 --- /dev/null +++ b/python/ql/src/Variables/ShadowGlobal.ql @@ -0,0 +1,66 @@ +/** + * @name Global shadowed by local variable + * @description Defining a local variable with the same name as a global variable + * makes the global variable unusable within the current scope and makes the code + * more difficult to read. + * @kind problem + * @tags maintainability + * readability + * @problem.severity recommendation + * @sub-severity low + * @precision medium + * @id py/local-shadows-global + */ + +import python +import Shadowing + +predicate shadows(Name d, GlobalVariable g, Scope scope, int line) { + exists(LocalVariable l | d.defines(l) and l.getId() = g.getId() and + scope instanceof Function and g.getScope() = scope.getScope() and + not exists(Import il, Import ig, Name gd | il.contains(d) and gd.defines(g) and ig.contains(gd)) and + not exists(Assign a | a.getATarget() = d and a.getValue() = g.getAnAccess()) + ) and + not exists(builtin_object(g.getId())) and + d.getScope() = scope and + d.getLocation().getStartLine() = line and + exists(Name defn | defn.defines(g) | + not exists(If i | i.isNameEqMain() | + i.contains(defn) + ) + ) and + not optimizing_parameter(d) +} + +/* pytest dynamically populates its namespace so, we cannot look directly for the pytest.fixture function */ +AttrNode pytest_fixture_attr() { + exists(ModuleObject pytest | + result.getObject("fixture").refersTo(pytest) + ) +} + +Object pytest_fixture() { + exists(CallNode call | + call.getFunction() = pytest_fixture_attr() + or + call.getFunction().(CallNode).getFunction() = pytest_fixture_attr() + | + call.refersTo(result) + ) +} + +/* pytest fixtures require that the parameter name is also a global */ +predicate assigned_pytest_fixture(GlobalVariable v) { + exists(NameNode def | def.defines(v) and def.(DefinitionNode).getValue().refersTo(pytest_fixture())) +} + +predicate first_shadowing_definition(Name d, GlobalVariable g) { + exists(int first, Scope scope | + shadows(d, g, scope, first) and + first = min(int line | shadows(_, g, scope, line))) +} + +from Name d, GlobalVariable g, Name def +where first_shadowing_definition(d, g) and not exists(Name n | n.deletes(g)) and + def.defines(g) and not assigned_pytest_fixture(g) and not g.getId() = "_" +select d, "Local variable '" + g.getId() + "' shadows a global variable defined $@.", def, "here" diff --git a/python/ql/src/Variables/Shadowing.qll b/python/ql/src/Variables/Shadowing.qll new file mode 100644 index 00000000000..5c56f5cacc2 --- /dev/null +++ b/python/ql/src/Variables/Shadowing.qll @@ -0,0 +1,13 @@ +import python + +/* Parameters with defaults that are used as an optimization. + * E.g. def f(x, len=len): ... + * (In general, this kind of optimization is not recommended.) + */ +predicate optimizing_parameter(Parameter p) { + exists(string name, Name glob | + p.getDefault() = glob | + glob.getId() = name and + p.asName().getId() = name + ) +} diff --git a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.py b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.py new file mode 100644 index 00000000000..7b91ea8a6a4 --- /dev/null +++ b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.py @@ -0,0 +1,6 @@ + +# +def test(): + for t in [TypeA, TypeB]: + x = TypeA() + run_test(x) diff --git a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.qhelp b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.qhelp new file mode 100644 index 00000000000..d537051c9f3 --- /dev/null +++ b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.qhelp @@ -0,0 +1,35 @@ + + + + +

    A for loop iteration variable is not used in the body of the loop, and the loop does not count the number of items in the sequence. +This is suspicious as there is rarely any reason to iterate over a sequence and not use the contents. +Not using the loop variable can often indicate a logical error or typo. +

    + +
    + +

    Carefully check that the loop variable should not be used. +If the variable is genuinely not being used and the code is correct, then rename the variable to _ +or unused to indicate to readers of the code that it is intentionally unused. +

    + +
    + +

    In this example, the for loop iteration variable x is never used. It appears that the +original test function was used to test TypeA and was subsequently modified to test TypeB as well. +

    + +

    +It is likely that the change from x = TypeA() to x = t() was forgotten. The fixed version is shown below. +

    + + +
    + +
  • Python Language Reference: The for statement.
  • +
  • Python Tutorial: For statements.
  • +
    +
    diff --git a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql new file mode 100644 index 00000000000..4fbfd1a42a9 --- /dev/null +++ b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql @@ -0,0 +1,126 @@ +/** + * @name Suspicious unused loop iteration variable + * @description A loop iteration variable is unused, which suggests an error. + * @kind problem + * @tags maintainability + * correctness + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/unused-loop-variable + */ + +import python +import Definition + +predicate is_increment(Stmt s) { + /* x += n */ + s.(AugAssign).getValue() instanceof IntegerLiteral + or + /* x = x + n */ + exists(Name t, BinaryExpr add | + t = s.(AssignStmt).getTarget(0) and + add = s.(AssignStmt).getValue() and + add.getLeft().(Name).getVariable() = t.getVariable() and + add.getRight() instanceof IntegerLiteral + ) +} + +predicate counting_loop(For f) { + is_increment(f.getAStmt()) +} + +predicate empty_loop(For f) { + not exists(f.getStmt(1)) and f.getStmt(0) instanceof Pass +} + +predicate one_item_only(For f) { + not exists(Continue c | f.contains(c)) and + exists(Stmt s | + s = f.getBody().getLastItem() | + s instanceof Return + or + s instanceof Break + ) +} + +predicate points_to_call_to_range(ControlFlowNode f) { + /* (x)range is a function in Py2 and a class in Py3, so we must treat it as a plain object */ + exists(Object range, Object call | + range = builtin_object("range") or + range = builtin_object("xrange") + | + f.refersTo(call) and + call.(CallNode).getFunction().refersTo(range) + ) + or + /* In case points-to fails due to 'from six.moves import range' or similar. */ + exists(string range | + f.getNode().(Call).getFunc().(Name).getId() = range | + range = "range" or range = "xrange" + ) + or + /* If range is wrapped in a list it is still a range */ + exists(CallNode call | + f.refersTo(call) and + call = theListType().getACall() and + points_to_call_to_range(call.getArg(0)) + ) +} + +/** Whether n is a use of a variable that is a not effectively a constant. */ +predicate use_of_non_constant(Name n) { + exists(Variable var | + n.uses(var) and + /* use is local */ + not n.getScope() instanceof Module and + /* variable is not global */ + not var.getScope() instanceof Module | + /* Defined more than once (dynamically) */ + strictcount(Name def | def.defines(var)) > 1 or + exists(For f, Name def | f.contains(def) and def.defines(var)) or + exists(While w, Name def | w.contains(def) and def.defines(var)) + ) +} + +/** Whether loop body is implicitly repeating something N times. + * E.g. queue.add(None) + */ +predicate implicit_repeat(For f) { + not exists(f.getStmt(1)) and + exists(ImmutableLiteral imm | + f.getStmt(0).contains(imm) + ) and + not exists(Name n | f.getBody().contains(n) and use_of_non_constant(n)) +} + +/** Get the CFG node for the iterable relating to the for-statement `f` in a comprehension. + * The for-statement `f` is the artificial for-statement in a comprehension + * and the result is the iterable in that comprehension. + * E.g. gets `x` from `{ y for y in x }`. + */ +ControlFlowNode get_comp_iterable(For f) { + exists(Comp c | + c.getFunction().getStmt(0) = f | + c.getAFlowNode().getAPredecessor() = result + ) +} + +from For f, Variable v, string msg + +where f.getTarget() = v.getAnAccess() and + not f.getAStmt().contains(v.getAnAccess()) and + not points_to_call_to_range(f.getIter().getAFlowNode()) and + not points_to_call_to_range(get_comp_iterable(f)) and + not name_acceptable_for_unused_variable(v) and + not f.getScope().getName() = "genexpr" and + not empty_loop(f) and + not one_item_only(f) and + not counting_loop(f) and + not implicit_repeat(f) and + if exists(Name del | del.deletes(v) and f.getAStmt().contains(del)) then + msg = "' is deleted, but not used, in the loop body." + else + msg = "' is not used in the loop body." + +select f, "For loop variable '" + v.getId() + msg diff --git a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariableFixed.py b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariableFixed.py new file mode 100644 index 00000000000..f28c276a626 --- /dev/null +++ b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariableFixed.py @@ -0,0 +1,6 @@ + +# +def test(): + for t in [TypeA, TypeB]: + x = t + run_test(x) diff --git a/python/ql/src/Variables/Undefined.qll b/python/ql/src/Variables/Undefined.qll new file mode 100644 index 00000000000..eca28fe9aa5 --- /dev/null +++ b/python/ql/src/Variables/Undefined.qll @@ -0,0 +1,138 @@ +import python +import Loop +import semmle.python.security.TaintTracking + +/** Marker for "uninitialized". */ +class Uninitialized extends TaintKind { + + Uninitialized() { this = "undefined" } + +} + +/** A source of an uninitialized variable. + * Either the start of the scope or a deletion. + */ +class UninitializedSource extends TaintedDefinition { + + UninitializedSource() { + exists(FastLocalVariable var | + this.getSourceVariable() = var and + not var.escapes() | + this instanceof ScopeEntryDefinition + or + this instanceof DeletionDefinition + ) + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof Uninitialized + } + +} + +/** A loop where we are guaranteed (or is at least likely) to execute the body at least once. + */ +class AtLeastOnceLoop extends DataFlowExtension::DataFlowVariable { + + AtLeastOnceLoop() { + loop_entry_variables(this, _) + } + + /* If we are guaranteed to iterate over a loop at least once, then we can prune any edges that + * don't pass through the body. + */ + override predicate prunedSuccessor(EssaVariable succ) { + loop_entry_variables(this, succ) + } + +} + +private predicate loop_entry_variables(EssaVariable pred, EssaVariable succ) { + exists(PhiFunction phi, BasicBlock pb | + loop_entry_edge(pb, phi.getBasicBlock()) and + succ = phi.getVariable() and + pred = phi.getInput(pb) + ) +} + +private predicate loop_entry_edge(BasicBlock pred, BasicBlock loop) { + pred = loop.getAPredecessor() and + pred = loop.getImmediateDominator() and + exists(Stmt s | + loop_probably_executes_at_least_once(s) and + s.getAFlowNode().getBasicBlock() = loop + ) +} + +class UnitializedSanitizer extends Sanitizer { + + UnitializedSanitizer() { this = "use of variable" } + + override + predicate sanitizingDefinition(TaintKind taint, EssaDefinition def) { + // An assignment cannot leave a variable uninitialized + taint instanceof Uninitialized and + ( + def instanceof AssignmentDefinition + or + def instanceof ExceptionCapture + or + def instanceof ParameterDefinition + or + /* A use is a "sanitizer" of "uninitialized", as any use of an undefined + * variable will raise, making the subsequent code unreacahable. + */ + exists(def.(EssaNodeRefinement).getInput().getASourceUse()) + or + exists(def.(PhiFunction).getAnInput().getASourceUse()) + or + exists(def.(EssaEdgeRefinement).getInput().getASourceUse()) + ) + } + + override + predicate sanitizingNode(TaintKind taint, ControlFlowNode node) { + taint instanceof Uninitialized and + exists(EssaVariable v | + v.getASourceUse() = node and + not first_use(node, v) + ) + } + +} + +/** Since any use of a local will raise if it is uninitialized, then + * any use dominated by another use of the same variable must be defined, or is unreachable. + */ +private predicate first_use(NameNode u, EssaVariable v) { + v.getASourceUse() = u and + not exists(NameNode other | + v.getASourceUse() = other and + other.strictlyDominates(u) + ) +} + +/* Holds if `call` is a call of the form obj.method_name(...) and + * there is a function called `method_name` that can exit the program. + */ +private predicate maybe_call_to_exiting_function(CallNode call) { + exists(FunctionObject exits, string name | + exits.neverReturns() and exits.getName() = name + | + call.getFunction().(NameNode).getId() = name or + call.getFunction().(AttrNode).getName() = name + ) +} + +/** Prune edges where the predecessor block looks like it might contain a call to an exit function. */ +class ExitFunctionGuardedEdge extends DataFlowExtension::DataFlowVariable { + + predicate prunedSuccessor(EssaVariable succ) { + exists(CallNode exit_call | + succ.(PhiFunction).getInput(exit_call.getBasicBlock()) = this and + maybe_call_to_exiting_function(exit_call) + ) + } + +} + diff --git a/python/ql/src/Variables/UndefinedExport.py b/python/ql/src/Variables/UndefinedExport.py new file mode 100644 index 00000000000..1d1834854f7 --- /dev/null +++ b/python/ql/src/Variables/UndefinedExport.py @@ -0,0 +1,5 @@ +__all__ = ['spamm', 'troll', 'paywall'] + +def spam(): return 'Spam' +def troll(): return 'Troll' +def paywall(): return 'Pay wall' diff --git a/python/ql/src/Variables/UndefinedExport.qhelp b/python/ql/src/Variables/UndefinedExport.qhelp new file mode 100644 index 00000000000..f053de5048d --- /dev/null +++ b/python/ql/src/Variables/UndefinedExport.qhelp @@ -0,0 +1,37 @@ + + + + +

    When a module is imported using import *, all attributes listed in +__all__ are imported. If __all__ includes attributes that +are not defined in the module then an exception is triggered. This usually indicates +a typographic error in the attributes in __all__ or in the name of the +object.

    + +
    + + +

    Correct any typographic errors, either in the name of the object or in the string in +__all__. If there are no typographic errors, either delete the name from +__all__ or add the object to the module.

    + +
    + +

    +In the example, the function name spam has been misspelled in the __all__ list. +This will result in spamm being highlighted as an undefined export. +Correcting the spelling will fix the defect. +

    + + +
    + + +
  • Python Language Reference: The import statement.
  • +
  • Python Tutorial: Importing * from a Package.
  • + + +
    +
    diff --git a/python/ql/src/Variables/UndefinedExport.ql b/python/ql/src/Variables/UndefinedExport.ql new file mode 100644 index 00000000000..7ec2647a209 --- /dev/null +++ b/python/ql/src/Variables/UndefinedExport.ql @@ -0,0 +1,52 @@ +/** + * @name Explicit export is not defined + * @description Including an undefined attribute in __all__ causes an exception when + * the module is imported using '*' + * @kind problem + * @tags reliability + * maintainability + * @problem.severity error + * @sub-severity low + * @precision high + * @id py/undefined-export + */ + +import python + +/** Whether name is declared in the __all__ list of this module */ +predicate declaredInAll(Module m, StrConst name) +{ + exists(Assign a, GlobalVariable all | + a.defines(all) and a.getScope() = m and + all.getId() = "__all__" and ((List)a.getValue()).getAnElt() = name + ) +} + +predicate mutates_globals(PythonModuleObject m) { + exists(CallNode globals | + globals = theGlobalsFunction().(FunctionObject).getACall() and + globals.getScope() = m.getModule() | + exists(AttrNode attr | attr.getObject() = globals) + or + exists(SubscriptNode sub | sub.getValue() = globals and sub.isStore()) + ) + or + exists(Object enum_convert | + enum_convert.hasLongName("enum.Enum._convert") and + exists(CallNode call | + call.getScope() = m.getModule() + | + enum_convert.(FunctionObject).getACall() = call or + call.getFunction().refersTo(enum_convert) + ) + ) +} + +from PythonModuleObject m, StrConst name, string exported_name +where declaredInAll(m.getModule(), name) and +exported_name = name.strValue() and +not m.hasAttribute(exported_name) and +not (m.getShortName() = "__init__" and exists(m.getPackage().getModule().getSubModule(exported_name))) and +not exists(ImportStarNode imp | imp.getEnclosingModule() = m.getModule() | not imp.getModule().refersTo(_)) and +not mutates_globals(m) +select name, "The name '" + exported_name + "' is exported by __all__ but is not defined." \ No newline at end of file diff --git a/python/ql/src/Variables/UndefinedGlobal.py b/python/ql/src/Variables/UndefinedGlobal.py new file mode 100644 index 00000000000..cfbff8bfacb --- /dev/null +++ b/python/ql/src/Variables/UndefinedGlobal.py @@ -0,0 +1,12 @@ +import math + +angle = 0.01 + +sin(angle) # NameError: name 'sin' is not defined (function imported from 'math') + +math.sin(angle) # 'sin' function now correctly defined + +math.tan(angel) # NameError: name 'angel' not defined (typographic error) + +math.tan(angle) # Global variable now correctly defined + diff --git a/python/ql/src/Variables/UndefinedGlobal.qhelp b/python/ql/src/Variables/UndefinedGlobal.qhelp new file mode 100644 index 00000000000..f77a96aad19 --- /dev/null +++ b/python/ql/src/Variables/UndefinedGlobal.qhelp @@ -0,0 +1,36 @@ + + + + +

    This global variable may not be defined. +If this code is executed and the variable is undefined then a NameError will occur. +

    + +
    + + +

    Check that the name of the global variable is not a typographic error. If the name is correct +then define the variable or import the module that defines the function or method.

    + +

    If it is expected this variable will be initialized from another module before it is used, then the NameError may not occur. +Nonetheless, the code will be more robust and clearer if the variable is set to a default value in its own module. +

    + +
    + +

    The following examples show two different examples of undefined "global variables".

    + + + + +
    + + +
  • Python Standard Library: NameError.
  • +
  • The Python Tutorial: Modules.
  • + + +
    +
    diff --git a/python/ql/src/Variables/UndefinedGlobal.ql b/python/ql/src/Variables/UndefinedGlobal.ql new file mode 100644 index 00000000000..6e1b3d36429 --- /dev/null +++ b/python/ql/src/Variables/UndefinedGlobal.ql @@ -0,0 +1,132 @@ +/** + * @name Use of an undefined global variable + * @description Using a global variable before it is initialized causes an exception. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision low + * @id py/undefined-global-variable + */ + +import python +import Variables.MonkeyPatched +import Loop +import semmle.python.pointsto.PointsTo + +predicate guarded_against_name_error(Name u) { + exists(Try t | t.getBody().getAnItem().contains(u) | + ((Name)((ExceptStmt)t.getAHandler()).getType()).getId() = "NameError" + ) + or + exists(ConditionBlock guard, BasicBlock controlled, Call globals | + guard.getLastNode().getNode().contains(globals) or + guard.getLastNode().getNode() = globals | + globals.getFunc().(Name).getId() = "globals" and + guard.controls(controlled, _) and + controlled.contains(u.getAFlowNode()) + ) +} + +predicate contains_unknown_import_star(Module m) { + exists(ImportStar imp | imp.getScope() = m | + not exists(ModuleObject imported | imported.importedAs(imp.getImportedModuleName())) + or + exists(ModuleObject imported | + imported.importedAs(imp.getImportedModuleName()) | + not imported.exportsComplete() + ) + ) +} + +predicate undefined_use_in_function(Name u) { + exists(Function f | u.getScope().getScope*() = f and + /* Either function is a method or inner function or it is live at the end of the module scope */ + (not f.getScope() = u.getEnclosingModule() or ((ImportTimeScope)u.getEnclosingModule()).definesName(f.getName())) + and + /* There is a use, but not a definition of this global variable in the function or enclosing scope */ + exists(GlobalVariable v | u.uses(v) | + not exists(Assign a, Scope defnScope | + a.getATarget() = v.getAnAccess() and a.getScope() = defnScope | + defnScope = f or + /* Exclude modules as that case is handled more precisely below. */ + (defnScope = f.getScope().getScope*() and not defnScope instanceof Module) + ) + ) + ) + and + not ((ImportTimeScope)u.getEnclosingModule()).definesName(u.getId()) + and + not exists(ModuleObject m | m.getModule() = u.getEnclosingModule() | m.hasAttribute(u.getId())) + and + not globallyDefinedName(u.getId()) + and + not exists(SsaVariable var | var.getAUse().getNode() = u and not var.maybeUndefined()) + and + not guarded_against_name_error(u) + and + not (u.getEnclosingModule().isPackageInit() and u.getId() = "__path__") +} + +predicate undefined_use_in_class_or_module(Name u) { + exists(GlobalVariable v | u.uses(v)) + and + not exists(Function f | u.getScope().getScope*() = f) + and + exists(SsaVariable var | var.getAUse().getNode() = u | var.maybeUndefined()) + and + not guarded_against_name_error(u) + and + not exists(ModuleObject m | m.getModule() = u.getEnclosingModule() | m.hasAttribute(u.getId())) + and + not (u.getEnclosingModule().isPackageInit() and u.getId() = "__path__") + and + not globallyDefinedName(u.getId()) +} + +predicate use_of_exec(Module m) { + exists(Exec exec | exec.getScope() = m) + or + exists(CallNode call, FunctionObject exec | + exec.getACall() = call and call.getScope() = m | + exec = builtin_object("exec") or + exec = builtin_object("execfile") + ) +} + +predicate undefined_use(Name u) { + ( + undefined_use_in_class_or_module(u) + or + undefined_use_in_function(u) + ) and + not monkey_patched_builtin(u.getId()) and + not contains_unknown_import_star(u.getEnclosingModule()) and + not use_of_exec(u.getEnclosingModule()) and + not exists(u.getVariable().getAStore()) and + not u.refersTo(_) and + not probably_defined_in_loop(u) +} + +private predicate first_use_in_a_block(Name use) { + exists(GlobalVariable v, BasicBlock b, int i | + i = min(int j | b.getNode(j).getNode() = v.getALoad()) and b.getNode(i) = use.getAFlowNode() + ) +} + +predicate first_undefined_use(Name use) { + undefined_use(use) and + exists(GlobalVariable v | + v.getALoad() = use | + first_use_in_a_block(use) and + not exists(ControlFlowNode other | + other.getNode() = v.getALoad() and + other.getBasicBlock().strictlyDominates(use.getAFlowNode().getBasicBlock()) + ) + ) +} + +from Name u +where first_undefined_use(u) +select u, "This use of global variable '" + u.getId() + "' may be undefined." diff --git a/python/ql/src/Variables/UndefinedPlaceHolder.qhelp b/python/ql/src/Variables/UndefinedPlaceHolder.qhelp new file mode 100644 index 00000000000..726ac86a386 --- /dev/null +++ b/python/ql/src/Variables/UndefinedPlaceHolder.qhelp @@ -0,0 +1,29 @@ + + + + +

    This place-holder variable may not be defined. +If this code is executed and the variable is undefined then a NameError will occur. +

    + +
    + + +

    Check that the name of the place-holder variable is not a typographic error. +If the name is correct, either define a value for the variable, or import the module that defines the function or method that sets the value. +

    + +

    If another module initializes this variable before it is used, then the NameError may not occur. +However, you can make the code more robust and clearer by setting the variable to a default value in its own module. +

    + +
    + + + +
  • Python Standard Library: NameError.
  • + +
    +
    diff --git a/python/ql/src/Variables/UndefinedPlaceHolder.ql b/python/ql/src/Variables/UndefinedPlaceHolder.ql new file mode 100644 index 00000000000..f3eb960045c --- /dev/null +++ b/python/ql/src/Variables/UndefinedPlaceHolder.ql @@ -0,0 +1,54 @@ +/** + * @name Use of an undefined placeholder variable + * @description Using a variable before it is initialized causes an exception. + * @kind problem + * @problem.severity error + * @sub-severity low + * @precision medium + * @id py/undefined-placeholder-variable + */ + +import python +import Variables.MonkeyPatched + +/* Local variable part */ + +predicate initialized_as_local(PlaceHolder use) { + exists(SsaVariable l, Function f | f = use.getScope() and l.getAUse() = use.getAFlowNode() | + l.getVariable() instanceof LocalVariable and + not l.maybeUndefined() + ) +} + +/* Not a template member */ + +Class enclosing_class(PlaceHolder use) { + result.getAMethod() = use.getScope() +} + +predicate template_attribute(PlaceHolder use) { + exists(ImportTimeScope cls | + cls = enclosing_class(use) | + cls.definesName(use.getId()) + ) +} + +/* Global Stuff */ + +predicate not_a_global(PlaceHolder use) { + not exists(PythonModuleObject mo | mo.hasAttribute(use.getId()) and mo.getModule() = use.getEnclosingModule()) + and + not globallyDefinedName(use.getId()) and + not monkey_patched_builtin(use.getId()) and + not globallyDefinedName(use.getId()) +} + +from PlaceHolder p +where +not initialized_as_local(p) and +not template_attribute(p) and +not_a_global(p) +select p, "This use of place-holder variable '" + p.getId() + "' may be undefined" + + + diff --git a/python/ql/src/Variables/UninitializedLocal.py b/python/ql/src/Variables/UninitializedLocal.py new file mode 100644 index 00000000000..b079652e953 --- /dev/null +++ b/python/ql/src/Variables/UninitializedLocal.py @@ -0,0 +1,33 @@ +def test(): + var = 1 + def print_var(): + print var # Use variable from outer scope + print_var() + print var + + +def test1(): + var = 2 + def print_var(): + print var # Attempt to use variable from local scope. + var = 3 # Since this is not initialized yet, this results + print_var() # in an UnboundLocalError + print var + + +def test2(): + var = 2 + def print_var(): + var = 3 # Initialize local version of the variable + print var # Use variable from local scope. + print_var() # Note that this local variable "shadows" the variable from + print var # outer scope which makes code more difficult to interpret. + + +def test3(): + var = 4 + def print_var(): + nonlocal var # Use non-local variable from outer scope. + print var + print_var() + print var \ No newline at end of file diff --git a/python/ql/src/Variables/UninitializedLocal.qhelp b/python/ql/src/Variables/UninitializedLocal.qhelp new file mode 100644 index 00000000000..e9af644d682 --- /dev/null +++ b/python/ql/src/Variables/UninitializedLocal.qhelp @@ -0,0 +1,41 @@ + + + + + +

    This local variable may be used before it is defined. If a variable is assigned to in a function +and not explicitly declared global or nonlocal then it is assumed to be a +local variable. +If it is used before it is defined then an UnboundLocalError will be raised. +

    + +
    + + +

    Review the code and consider the intended scope of the variable. Determine whether the variable +should be global or local in scope. If a global variable is required then add a global +statement, or in Python 3 you can use a nonlocal statement if the variable occurs in an +enclosing function. Otherwise, ensure that the variable is defined before it is used.

    + +
    + +

    The following code includes different functions that use variables. test1() +fails with an UnboundLocalError because the local variable var is used +before it is initialized.

    + + + + +
    + + +
  • Python Standard Library: Built-in Exceptions: UnboundLocalError.
  • +
  • Python Frequently Asked Questions: Why am I getting an UnboundLocalError when the variable has a value?.
  • +
  • Python Course: Global and Local Variables.
  • +
  • Python Language Reference: The global statement, + The nonlocal statement.
  • + +
    +
    diff --git a/python/ql/src/Variables/UninitializedLocal.ql b/python/ql/src/Variables/UninitializedLocal.ql new file mode 100644 index 00000000000..2e01e2f7c3d --- /dev/null +++ b/python/ql/src/Variables/UninitializedLocal.ql @@ -0,0 +1,42 @@ +/** + * @name Potentially uninitialized local variable + * @description Using a local variable before it is initialized causes an UnboundLocalError. + * @kind problem + * @tags reliability + * correctness + * @problem.severity error + * @sub-severity low + * @precision medium + * @id py/uninitialized-local-variable + */ + +import python +import Undefined + + +predicate uninitialized_local(NameNode use) { + exists(FastLocalVariable local | + use.uses(local) or use.deletes(local) | + not local.escapes() + ) + and + ( + any(Uninitialized uninit).taints(use) + or + not exists(EssaVariable var | var.getASourceUse() = use) + ) +} + +predicate explicitly_guarded(NameNode u) { + exists(Try t | + t.getBody().contains(u.getNode()) and + t.getAHandler().getType().refersTo(theNameErrorType()) + ) +} + + +from NameNode u +where uninitialized_local(u) and not explicitly_guarded(u) +select u.getNode(), "Local variable '" + u.getId() + "' may be used before it is initialized." + + diff --git a/python/ql/src/Variables/UnusedLocalVariable.py b/python/ql/src/Variables/UnusedLocalVariable.py new file mode 100644 index 00000000000..d0d46f0e1df --- /dev/null +++ b/python/ql/src/Variables/UnusedLocalVariable.py @@ -0,0 +1,11 @@ +import random + +def write_random_to_file(): + no = random.randint(1, 10) + with open("random.txt", "w") as file: + file.write(str(no)) + return no + +def write_random(): + random_no = write_random_to_file() + print "A random number was written to random.txt" \ No newline at end of file diff --git a/python/ql/src/Variables/UnusedLocalVariable.qhelp b/python/ql/src/Variables/UnusedLocalVariable.qhelp new file mode 100644 index 00000000000..e24b5d17464 --- /dev/null +++ b/python/ql/src/Variables/UnusedLocalVariable.qhelp @@ -0,0 +1,31 @@ + + + + +

    A local variable is defined (by an assignment) but never used. +

    + + + + +
    + +

    If the variable is included for documentation purposes or is otherwise intentionally unused, then change its name to indicate that it is unused, +otherwise delete the assignment (taking care not to delete right hand side if it has side effects).

    + +
    + +

    In this example, the random_no variable is never read but its assignment +has a side effect. Because of this it is important to remove only the left hand side of the +assignment in line 10.

    + + +
    + + +
  • Python: Assignment statements.
  • + +
    +
    diff --git a/python/ql/src/Variables/UnusedLocalVariable.ql b/python/ql/src/Variables/UnusedLocalVariable.ql new file mode 100644 index 00000000000..42f28c5e085 --- /dev/null +++ b/python/ql/src/Variables/UnusedLocalVariable.ql @@ -0,0 +1,34 @@ +/** + * @name Unused local variable + * @description Local variable is defined but not used + * @kind problem + * @tags maintainability + * useless-code + * external/cwe/cwe-563 + * @problem.severity recommendation + * @sub-severity high + * @precision very-high + * @id py/unused-local-variable + */ + +import python +import Definition + +predicate unused_local(Name unused, LocalVariable v) { + forex(Definition def | + def.getNode() = unused | + def.getVariable() = v and + def.isUnused() and + not exists(def.getARedef()) and + def.isRelevant() and + not exists(def.getNode().getParentNode().(FunctionDef).getDefinedFunction().getADecorator()) and + not exists(def.getNode().getParentNode().(ClassDef).getDefinedClass().getADecorator()) + ) +} + + +from Name unused, LocalVariable v +where unused_local(unused, v) and +// If unused is part of a tuple, count it as unused if all elements of that tuple are unused. +forall(Name el | el = unused.getParentNode().(Tuple).getAnElt() | unused_local(el, _)) +select unused, "The value assigned to local variable '" + v.getId() + "' is never used." diff --git a/python/ql/src/Variables/UnusedModuleVariable.py b/python/ql/src/Variables/UnusedModuleVariable.py new file mode 100644 index 00000000000..91bc0951848 --- /dev/null +++ b/python/ql/src/Variables/UnusedModuleVariable.py @@ -0,0 +1,9 @@ +import random + +def write_random_to_file(): + no = random.randint(1, 10) + with open("random.txt", "w") as file: + file.write(str(no)) + return no + +random_no = write_random_to_file() \ No newline at end of file diff --git a/python/ql/src/Variables/UnusedModuleVariable.qhelp b/python/ql/src/Variables/UnusedModuleVariable.qhelp new file mode 100644 index 00000000000..b5e57bf55ce --- /dev/null +++ b/python/ql/src/Variables/UnusedModuleVariable.qhelp @@ -0,0 +1,34 @@ + + + + +

    A global (module-level) variable is defined (by an assignment) but never used +and is not explicitly made public by inclusion in the __all__ list. +

    + + + + +
    + +

    If the variable is included for documentation purposes or is otherwise intentionally unused, then change its name to indicate that it is unused, +otherwise delete the assignment (taking care not to delete right hand side if it has side effects).

    + +
    + +

    In this example, the random_no variable is never read but its assignment +has a side effect. Because of this it is important to only remove the left hand side of the +assignment in line 9.

    + + +
    + + +
  • Python: Assignment statements, + The import statement.
  • +
  • Python Tutorial: Importing * from a package.
  • + +
    +
    diff --git a/python/ql/src/Variables/UnusedModuleVariable.ql b/python/ql/src/Variables/UnusedModuleVariable.ql new file mode 100644 index 00000000000..888b9546ce1 --- /dev/null +++ b/python/ql/src/Variables/UnusedModuleVariable.ql @@ -0,0 +1,62 @@ +/** + * @name Unused global variable + * @description Global variable is defined but not used + * @kind problem + * @tags efficiency + * useless-code + * external/cwe/cwe-563 + * @problem.severity recommendation + * @sub-severity low + * @precision high + * @id py/unused-global-variable + */ + +import python +import Definition + +/** Whether the module contains an __all__ definition, + * but it is more complex than a simple list of strings */ +predicate complex_all(Module m) { + exists(Assign a, GlobalVariable all | + a.defines(all) and a.getScope() = m and all.getId() = "__all__" | + not a.getValue() instanceof List or + exists(Expr e | + e = a.getValue().(List).getAnElt() | + not e instanceof StrConst + ) + ) + or + exists(Call c, GlobalVariable all | + c.getFunc().(Attribute).getObject() = all.getALoad() and + c.getScope() = m and all.getId() = "__all__" + ) +} + +predicate unused_global(Name unused, GlobalVariable v) { + not exists(ImportingStmt is | is.contains(unused)) and + forex(DefinitionNode defn | + defn.getNode() = unused | + not defn.getValue().getNode() instanceof FunctionExpr and + not defn.getValue().getNode() instanceof ClassExpr and + not exists(Name u | + // A use of the variable + u.uses(v) | + // That is reachable from this definition, directly + defn.strictlyReaches(u.getAFlowNode()) + or // indirectly + defn.getBasicBlock().reachesExit() and u.getScope() != unused.getScope() + ) and + not unused.getEnclosingModule().getAnExport() = v.getId() and + not exists(unused.getParentNode().(ClassDef).getDefinedClass().getADecorator()) and + not exists(unused.getParentNode().(FunctionDef).getDefinedFunction().getADecorator()) and + unused.defines(v) and + not name_acceptable_for_unused_variable(v) and + not complex_all(unused.getEnclosingModule()) + ) +} + +from Name unused, GlobalVariable v +where unused_global(unused, v) and +// If unused is part of a tuple, count it as unused if all elements of that tuple are unused. +forall(Name el | el = unused.getParentNode().(Tuple).getAnElt() | unused_global(el, _)) +select unused, "The global variable '" + v.getId() + "' is not used." diff --git a/python/ql/src/Variables/UnusedParameter.py b/python/ql/src/Variables/UnusedParameter.py new file mode 100644 index 00000000000..b0eee5e4f91 --- /dev/null +++ b/python/ql/src/Variables/UnusedParameter.py @@ -0,0 +1,5 @@ +import random + +def write_to_file(text, filename): + with open("log.txt", "w") as file: + file.write(text) diff --git a/python/ql/src/Variables/UnusedParameter.qhelp b/python/ql/src/Variables/UnusedParameter.qhelp new file mode 100644 index 00000000000..4fa34ba65ec --- /dev/null +++ b/python/ql/src/Variables/UnusedParameter.qhelp @@ -0,0 +1,30 @@ + + + + + + +

    A parameter is never used. +

    + + + +
    + + +

    Delete the parameter from the relevant function or method. +If that is not possible (due to overriding or similar) rename the parameter + as described above. + +

    + + +

    In this example the parameter filename is ignored which is misleading. + +

    + + +
    +
    diff --git a/python/ql/src/Variables/UnusedParameter.ql b/python/ql/src/Variables/UnusedParameter.ql new file mode 100644 index 00000000000..e27d151b72e --- /dev/null +++ b/python/ql/src/Variables/UnusedParameter.ql @@ -0,0 +1,31 @@ +/** + * @name Unused parameter + * @description Parameter is defined but not used + * @kind problem + * @tags maintainability + * @problem.severity recommendation + * @sub-severity high + * @precision medium + * @id py/unused-parameter + */ + +import python +import Definition + + +predicate unused_parameter(FunctionObject f, LocalVariable v) { + v.isParameter() and + v.getScope() = f.getFunction() and + not name_acceptable_for_unused_variable(v) and + not exists(NameNode u | u.uses(v)) and + not exists(Name inner, LocalVariable iv | inner.uses(iv) and iv.getId() = v.getId() and inner.getScope().getScope() = v.getScope()) +} + +predicate is_abstract(FunctionObject func) { + ((Name)func.getFunction().getADecorator()).getId().matches("%abstract%") +} + +from PyFunctionObject f, LocalVariable v +where v.getId() != "self" and unused_parameter(f, v) and not f.isOverridingMethod() and not f.isOverriddenMethod() and +not is_abstract(f) +select f, "The parameter '" + v.getId() + "' is never used." diff --git a/python/ql/src/Variables/UnusedTuple.qhelp b/python/ql/src/Variables/UnusedTuple.qhelp new file mode 100644 index 00000000000..e1fd8c54a92 --- /dev/null +++ b/python/ql/src/Variables/UnusedTuple.qhelp @@ -0,0 +1,12 @@ + + + + +

    Variables that are defined in a group, for example x, y = func() are handled collectively. +If they are all unused, then this is reported. Otherwise they are all treated as used. +

    + +
    +
    diff --git a/python/ql/src/Variables/UnusedVariableNaming.qhelp b/python/ql/src/Variables/UnusedVariableNaming.qhelp new file mode 100644 index 00000000000..f02a3fa2e38 --- /dev/null +++ b/python/ql/src/Variables/UnusedVariableNaming.qhelp @@ -0,0 +1,24 @@ + + + + +

    It is sometimes necessary to have a variable which is not used. +These unused variables should have distinctive names, to make it clear to readers of the code that they are deliberately not used. +The most common conventions for indicating this are to name the variable _ or to start the name of the +variable with unused or _unused. +

    + +

    +The query accepts the following names for variables that are intended to be unused: +

    +
      +
    • Any name consisting entirely of underscores.
    • +
    • Any name containing unused.
    • +
    • The names dummy or empty.
    • +
    • Any "special" name of the form __xxx__.
    • +
    + +
    +
    \ No newline at end of file diff --git a/python/ql/src/analysis/AlertSuppression.ql b/python/ql/src/analysis/AlertSuppression.ql new file mode 100644 index 00000000000..56622f005ca --- /dev/null +++ b/python/ql/src/analysis/AlertSuppression.ql @@ -0,0 +1,126 @@ +/** + * @name Alert suppression + * @description Generates information about alert suppressions. + * @kind alert-suppression + * @id py/alert-suppression + */ + +import python + +/** + * An alert suppression comment. + */ +abstract class SuppressionComment extends Comment { + + /** Gets the scope of this suppression. */ + abstract SuppressionScope getScope(); + + /** Gets the suppression annotation in this comment. */ + abstract string getAnnotation(); + + /** + * Holds if this comment applies to the range from column `startcolumn` of line `startline` + * to column `endcolumn` of line `endline` in file `filepath`. + */ + abstract predicate covers(string filepath, int startline, int startcolumn, int endline, int endcolumn); + +} + +/** + * An alert comment that applies to a single line + */ +abstract class LineSuppressionComment extends SuppressionComment { + + LineSuppressionComment() { + exists(string filepath, int l | + this.getLocation().hasLocationInfo(filepath, l, _, _, _) and + any(AstNode a).getLocation().hasLocationInfo(filepath, l, _, _, _) + ) + } + + /** Gets the scope of this suppression. */ + override SuppressionScope getScope() { + result = this + } + + override predicate covers(string filepath, int startline, int startcolumn, int endline, int endcolumn) { + this.getLocation().hasLocationInfo(filepath, startline, _, endline, endcolumn) and + startcolumn = 1 + } + +} + +/** + * An lgtm suppression comment. + */ +class LgtmSuppressionComment extends LineSuppressionComment { + + string annotation; + + LgtmSuppressionComment() { + exists(string all | + all = this.getContents() + | + // match `lgtm[...]` anywhere in the comment + annotation = all.regexpFind("(?i)\\blgtm\\s*\\[[^\\]]*\\]", _, _) + or + // match `lgtm` at the start of the comment and after semicolon + annotation = all.regexpFind("(?i)(?<=^|;)\\s*lgtm(?!\\B|\\s*\\[)", _, _).trim() + ) + } + + /** Gets the suppression annotation in this comment. */ + override string getAnnotation() { + result = annotation + } + +} + +/** + * A noqa suppression comment. Both pylint and pyflakes respect this, so lgtm ought to too. + */ +class NoqaSuppressionComment extends LineSuppressionComment { + + NoqaSuppressionComment() { + this.getContents().toLowerCase().regexpMatch("\\s*noqa\\s*") + } + + override string getAnnotation() { + result = "lgtm" + } + +} + + +/** + * The scope of an alert suppression comment. + */ +class SuppressionScope extends @py_comment { + + SuppressionScope() { + this instanceof SuppressionComment + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [LGTM locations](https://lgtm.com/help/ql/locations). + */ + predicate hasLocationInfo(string filepath, int startline, int startcolumn, int endline, int endcolumn) { + this.(SuppressionComment).covers(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets a textual representation of this element. */ + string toString() { + result = "suppression range" + } + +} + +from SuppressionComment c +select c, // suppression comment + c.getContents(), // text of suppression comment (excluding delimiters) + c.getAnnotation(), // text of suppression annotation + c.getScope() // scope of suppression diff --git a/python/ql/src/analysis/CallGraphEfficiency.ql b/python/ql/src/analysis/CallGraphEfficiency.ql new file mode 100644 index 00000000000..f1556568733 --- /dev/null +++ b/python/ql/src/analysis/CallGraphEfficiency.ql @@ -0,0 +1,25 @@ +/** Compute the total call-graph facts, the total size of the call-graph relation and + * the ratio of the two in relation to the depth of context. + */ + + +import python +import semmle.python.pointsto.PointsTo +import semmle.python.pointsto.PointsToContext + +from int total_facts, int total_size, int depth, float efficiency +where +total_facts = strictcount(ControlFlowNode call, FunctionObject func | + exists(PointsToContext ctx | + call = PointsTo::get_a_call(func, ctx) and + depth = ctx.getDepth() + ) +) +and +total_size = strictcount(ControlFlowNode call, FunctionObject func, PointsToContext ctx | + call = PointsTo::get_a_call(func, ctx) and + depth = ctx.getDepth() +) +and +efficiency = 100.0 * total_facts / total_size +select depth, total_facts, total_size, efficiency diff --git a/python/ql/src/analysis/CallGraphMarginalEfficiency.ql b/python/ql/src/analysis/CallGraphMarginalEfficiency.ql new file mode 100644 index 00000000000..72ca0383d5c --- /dev/null +++ b/python/ql/src/analysis/CallGraphMarginalEfficiency.ql @@ -0,0 +1,29 @@ +/** Compute the marginal increase call-graph facts, the total size of the call-graph relation and + * the ratio of the two in relation to the depth of context. + */ + +import python +import semmle.python.pointsto.PointsTo +import semmle.python.pointsto.PointsToContext + +from int total_facts, int total_size, int depth, float efficiency +where +total_facts = strictcount(ControlFlowNode call, FunctionObject func | + exists(PointsToContext ctx | + call = PointsTo::get_a_call(func, ctx) and + depth = ctx.getDepth() + and not + exists(PointsToContext shallower | + call = PointsTo::get_a_call(func, shallower) and + shallower.getDepth() < depth + ) + ) +) +and +total_size = strictcount(ControlFlowNode call, FunctionObject func, PointsToContext ctx | + call = PointsTo::get_a_call(func, ctx) and + depth = ctx.getDepth() +) +and +efficiency = 100.0 * total_facts / total_size +select depth, total_facts, total_size, efficiency diff --git a/python/ql/src/analysis/ContextEfficiency.ql b/python/ql/src/analysis/ContextEfficiency.ql new file mode 100644 index 00000000000..e25d69715b9 --- /dev/null +++ b/python/ql/src/analysis/ContextEfficiency.ql @@ -0,0 +1,25 @@ +/** Compute the total points-to facts, the total size of the points-to relation and + * the ratio of the two in relation to the depth of context. + */ + + +import python +import semmle.python.pointsto.PointsTo +import semmle.python.pointsto.PointsToContext + +from int total_facts, int total_size, int depth, float efficiency +where +total_facts = strictcount(ControlFlowNode f, Object value, ClassObject cls | + exists(PointsToContext ctx | + PointsTo::points_to(f, ctx, value, cls, _) and + depth = ctx.getDepth() + ) +) +and +total_size = strictcount(ControlFlowNode f, Object value, ClassObject cls, PointsToContext ctx, ControlFlowNode orig | + PointsTo::points_to(f, ctx, value, cls, orig) and + depth = ctx.getDepth() +) +and +efficiency = 100.0 * total_facts / total_size +select depth, total_facts, total_size, efficiency diff --git a/python/ql/src/analysis/ContextMarginalEfficiency.ql b/python/ql/src/analysis/ContextMarginalEfficiency.ql new file mode 100644 index 00000000000..f48e0530123 --- /dev/null +++ b/python/ql/src/analysis/ContextMarginalEfficiency.ql @@ -0,0 +1,32 @@ +/** Compute the marginal increase points-to facts, the total size of the points-to relation and + * the ratio of the two in relation to the depth of context. + */ + +import python +import semmle.python.pointsto.PointsTo +import semmle.python.pointsto.PointsToContext + +int depth(ControlFlowNode f, Object value, ClassObject cls) { + exists(PointsToContext ctx | + PointsTo::points_to(f, ctx, value, cls, _) and + result = ctx.getDepth() + ) +} + +int shallowest(ControlFlowNode f, Object value, ClassObject cls) { + result = min(int x | x = depth(f, value, cls)) +} + +from int total_facts, int total_size, int depth, float efficiency +where +total_facts = strictcount(ControlFlowNode f, Object value, ClassObject cls | + depth = shallowest(f, value, cls) +) +and +total_size = strictcount(ControlFlowNode f, Object value, ClassObject cls, PointsToContext ctx, ControlFlowNode orig | + PointsTo::points_to(f, ctx, value, cls, orig) and + depth = ctx.getDepth() +) +and +efficiency = 100.0 * total_facts / total_size +select depth, total_facts, total_size, efficiency \ No newline at end of file diff --git a/python/ql/src/analysis/CrossProjectDefinitions.qll b/python/ql/src/analysis/CrossProjectDefinitions.qll new file mode 100644 index 00000000000..e09fce9e9a2 --- /dev/null +++ b/python/ql/src/analysis/CrossProjectDefinitions.qll @@ -0,0 +1,115 @@ +/** + * Symbols for crosss-project jump-to-definition resolution. + */ + import python + +import semmle.dataflow.SSA +import semmle.python.pointsto.PointsTo + +private newtype TSymbol = + TModule(Module m) + or + TMember(Symbol outer, string part) { + exists(Object o | + outer.resolvesTo() = o | + o.(ModuleObject).hasAttribute(part) + or + o.(ClassObject).hasAttribute(part) + ) + } + +/** A "symbol" referencing an object in another module + * Symbols are represented by the module name and the dotted name by which the + * object would be referred to in that module. + * For example for the code: + * ``` + * class C: + * def m(self): pass + * ``` + * If the code were in a module `mod`, + * then symbol for the method `m` would be "mod/C.m" + */ +class Symbol extends TSymbol { + + string toString() { + exists(Module m | + this = TModule(m) and result = m.getName() + ) + or + exists(TModule outer, string part | + this = TMember(outer, part) and + outer = TModule(_) and + result = outer.(Symbol).toString() + "/" + part + ) + or + exists(TMember outer, string part | + this = TMember(outer, part) and + outer = TMember(_, _) and + result = outer.(Symbol).toString() + "." + part + ) + } + + /** Finds the `AstNode` that this `Symbol` refers to. + */ + AstNode find() { + this = TModule(result) + or + exists(Symbol s, string name | + this = TMember(s, name) | + exists(ClassObject cls | + s.resolvesTo() = cls and + cls.attributeRefersTo(name, _, result.getAFlowNode()) + ) + or + exists(ModuleObject m | + s.resolvesTo() = m and + m.attributeRefersTo(name, _, result.getAFlowNode()) + ) + ) + } + + /** Find the class or module `Object` that this `Symbol` refers to, if + * this `Symbol` refers to a class or module. + */ + Object resolvesTo() { + this = TModule(result.(ModuleObject).getModule()) + or + exists(Symbol s, string name, Object o | + this = TMember(s, name) and + o = s.resolvesTo() and + result = attribute_in_scope(o, name) + ) + } + + /** Gets the `Module` for the module part of this `Symbol`. + * For example, this would return the `os` module for the `Symbol` "os/environ". + */ + Module getModule() { + this = TModule(result) + or + exists(Symbol outer | + this = TMember(outer, _) and result = outer.getModule() + ) + } + + /** Gets the `Symbol` that is the named member of this `Symbol`. + */ + Symbol getMember(string name) { + result = TMember(this, name) + } + +} + +/* Helper for `Symbol`.resolvesTo() */ +private Object attribute_in_scope(Object obj, string name) { + exists(ClassObject cls | + cls = obj | + cls.lookupAttribute(name) = result and result.(ControlFlowNode).getScope() = cls.getPyClass() + ) + or + exists(ModuleObject mod | + mod = obj | + mod.getAttribute(name) = result and result.(ControlFlowNode).getScope() = mod.getModule() + and not result.(ControlFlowNode).isEntryNode() + ) +} diff --git a/python/ql/src/analysis/DefinitionTracking.qll b/python/ql/src/analysis/DefinitionTracking.qll new file mode 100644 index 00000000000..f3e23270370 --- /dev/null +++ b/python/ql/src/analysis/DefinitionTracking.qll @@ -0,0 +1,483 @@ +/** + * Definition tracking for jump-to-defn query. + */ + import python + +import semmle.dataflow.SSA +import semmle.python.pointsto.PointsTo + +private newtype TDefinition = + TLocalDefinition(AstNode a) { + a instanceof Expr or a instanceof Stmt or a instanceof Module + } + +/** A definition for the purposes of jump-to-definition. + */ +class Definition extends TLocalDefinition { + + + string toString() { + result = "Definition " + this.getAstNode().getLocation().toString() + } + + AstNode getAstNode() { + this = TLocalDefinition(result) + } + + Module getModule() { + result = this.getAstNode().getScope().getEnclosingModule() + } + + Location getLocation() { + result = this.getAstNode().getLocation() + } + +} + +private predicate jump_to_defn(ControlFlowNode use, Definition defn) { + exists(EssaVariable var | + use = var.getASourceUse() and + ssa_variable_defn(var, defn) + ) + or + exists(string name | + use.isLoad() and + jump_to_defn_attribute(use.(AttrNode).getObject(name), name, defn) + ) + or + exists(PythonModuleObject mod | + use.(ImportExprNode).refersTo(mod) and + defn.getAstNode() = mod.getModule() + ) + or + exists(PythonModuleObject mod, string name | + use.(ImportMemberNode).getModule(name).refersTo(mod) and + scope_jump_to_defn_attribute(mod.getModule(), name, defn) + ) + or + exists(PackageObject package | + use.(ImportExprNode).refersTo(package) and + defn.getAstNode() = package.getInitModule().getModule() + ) + or + exists(PackageObject package, string name | + use.(ImportMemberNode).getModule(name).refersTo(package) and + scope_jump_to_defn_attribute(package.getInitModule().getModule(), name, defn) + ) + or + (use instanceof PyFunctionObject or use instanceof ClassObject) and + defn.getAstNode() = use.getNode() +} + +/* Prefer class and functions to class-expressions and function-expressions. */ +private predicate preferred_jump_to_defn(Expr use, Definition def) { + not use instanceof ClassExpr and + not use instanceof FunctionExpr and + jump_to_defn(use.getAFlowNode(), def) +} + +private predicate unique_jump_to_defn(Expr use, Definition def) { + preferred_jump_to_defn(use, def) and + not exists(Definition other | + other != def and + preferred_jump_to_defn(use, other) + ) +} + +private predicate ssa_variable_defn(EssaVariable var, Definition defn) { + ssa_defn_defn(var.getDefinition(), defn) +} + +/** Holds if the phi-function `phi` refers to (`value`, `cls`, `origin`) given the context `context`. */ +private predicate ssa_phi_defn(PhiFunction phi, Definition defn) { + ssa_variable_defn(phi.getAnInput(), defn) +} + +/** Holds if the ESSA defn `def` refers to (`value`, `cls`, `origin`) given the context `context`. */ +private predicate ssa_defn_defn(EssaDefinition def, Definition defn) { + ssa_phi_defn(def, defn) + or + ssa_node_defn(def, defn) + or + ssa_filter_defn(def, defn) + or + ssa_node_refinement_defn(def, defn) +} + +/** Holds if ESSA edge refinement, `def`, is defined by `defn` */ +predicate ssa_filter_defn(PyEdgeRefinement def, Definition defn) { + ssa_variable_defn(def.getInput(), defn) +} + +/** Holds if ESSA defn, `uniphi`,is defined by `defn` */ +predicate uni_edged_phi_defn(SingleSuccessorGuard uniphi, Definition defn) { + ssa_variable_defn(uniphi.getInput(), defn) +} + +pragma [noinline] +private predicate ssa_node_defn(EssaNodeDefinition def, Definition defn) { + assignment_jump_to_defn(def, defn) + or + parameter_defn(def, defn) + or + delete_defn(def, defn) + or + scope_entry_defn(def, defn) + or + implicit_submodule_defn(def, defn) +} + +/* Definition for normal assignments `def = ...` */ +private predicate assignment_jump_to_defn(AssignmentDefinition def, Definition defn) { + defn = TLocalDefinition(def.getValue().getNode()) +} + +pragma [noinline] +private predicate ssa_node_refinement_defn(EssaNodeRefinement def, Definition defn) { + method_callsite_defn(def, defn) + or + import_star_defn(def, defn) + or + attribute_assignment_defn(def, defn) + or + callsite_defn(def, defn) + or + argument_defn(def, defn) + or + attribute_delete_defn(def, defn) + or + uni_edged_phi_defn(def, defn) +} + + +/* Definition for parameter. `def foo(param): ...` */ +private predicate parameter_defn(ParameterDefinition def, Definition defn) { + defn.getAstNode() = def.getDefiningNode().getNode() +} + +/* Definition for deletion: `del name` */ +private predicate delete_defn(DeletionDefinition def, Definition defn) { + none() +} + +/* Implicit "defn" of the names of submodules at the start of an `__init__.py` file. + */ +private predicate implicit_submodule_defn(ImplicitSubModuleDefinition def, Definition defn) { + exists(PackageObject package, ModuleObject mod | + package.getInitModule().getModule() = def.getDefiningNode().getScope() and + mod = package.submodule(def.getSourceVariable().getName()) and + defn.getAstNode() = mod.getModule() + ) + +} + +/* Helper for scope_entry_value_transfer(...). Transfer of values from the callsite to the callee, for enclosing variables, but not arguments/parameters */ +private predicate scope_entry_value_transfer_at_callsite(EssaVariable pred_var, ScopeEntryDefinition succ_def) { + exists(CallNode callsite, FunctionObject f | + f.getACall() = callsite and + pred_var.getSourceVariable() = succ_def.getSourceVariable() and + pred_var.getAUse() = callsite and + succ_def.getDefiningNode() = f.getFunction().getEntryNode() + ) +} + +/* Model the transfer of values at scope-entry points. Transfer from `pred_var, pred_context` to `succ_def, succ_context` */ +private +predicate scope_entry_value_transfer(EssaVariable pred_var, ScopeEntryDefinition succ_def) { + BaseFlow::scope_entry_value_transfer_from_earlier(pred_var, _, succ_def, _) + or + scope_entry_value_transfer_at_callsite(pred_var, succ_def) + or + class_entry_value_transfer(pred_var, succ_def) +} + +/* Helper for scope_entry_value_transfer */ +private +predicate class_entry_value_transfer(EssaVariable pred_var, ScopeEntryDefinition succ_def) { + exists(ImportTimeScope scope, ControlFlowNode class_def | + class_def = pred_var.getAUse() and + scope.entryEdge(class_def, succ_def.getDefiningNode()) and + pred_var.getSourceVariable() = succ_def.getSourceVariable() + ) +} + +/* Definition for implicit variable declarations at scope-entry. */ +pragma [noinline] +private predicate scope_entry_defn(ScopeEntryDefinition def, Definition defn) { + /* Transfer from another scope */ + exists(EssaVariable var | + scope_entry_value_transfer(var, def) and + ssa_variable_defn(var, defn) + ) +} + +/* Definition for a variable (possibly) redefined by a call: + * Just assume that call does not define variable + */ +pragma [noinline] +private predicate callsite_defn(CallsiteRefinement def, Definition defn) { + ssa_variable_defn(def.getInput(), defn) +} + +/* Pass through for `self` for the implicit re-defn of `self` in `self.foo()` */ +private predicate method_callsite_defn(MethodCallsiteRefinement def, Definition defn) { + /* The value of self remains the same, only the attributes may change */ + ssa_variable_defn(def.getInput(), defn) +} + +/** Helpers for import_star_defn */ +pragma [noinline] +private predicate module_and_name_for_import_star(ModuleObject mod, string name, ImportStarRefinement def) { + exists(ImportStarNode im_star | + im_star = def.getDefiningNode() | + name = def.getSourceVariable().getName() and + im_star.getModule().refersTo(mod) and + mod.exports(name) + ) +} + +/** Holds if `def` is technically a defn of `var`, but the `from ... import *` does not in fact define `var` */ +pragma [noinline] +private predicate variable_not_redefined_by_import_star(EssaVariable var, ImportStarRefinement def) { + var = def.getInput() and + exists(ModuleObject mod | + def.getDefiningNode().(ImportStarNode).getModule().refersTo(mod) and + not mod.exports(var.getSourceVariable().getName()) + ) +} + +/* Definition for `from ... import *` */ +private predicate import_star_defn(ImportStarRefinement def, Definition defn) { + exists(ModuleObject mod, string name | + module_and_name_for_import_star(mod, name, def) | + /* Attribute from imported module */ + scope_jump_to_defn_attribute(mod.getModule(), name, defn) + ) + or + exists(EssaVariable var | + /* Retain value held before import */ + variable_not_redefined_by_import_star(var, def) and + ssa_variable_defn(var, defn) + ) +} + +/** Attribute assignments have no effect as far as defn tracking is concerned */ +private predicate attribute_assignment_defn(AttributeAssignment def, Definition defn) { + ssa_variable_defn(def.getInput(), defn) +} + +/** Ignore the effects of calls on their arguments. This is an approximation, but attempting to improve accuracy would be very expensive for very little gain. */ +private predicate argument_defn(ArgumentRefinement def, Definition defn) { + ssa_variable_defn(def.getInput(), defn) +} + +/** Attribute deletions have no effect as far as value tracking is concerned. */ +pragma [noinline] +private predicate attribute_delete_defn(EssaAttributeDeletion def, Definition defn) { + ssa_variable_defn(def.getInput(), defn) +} + +/* Definition flow for attributes. These mirror the "normal" defn predicates. + * For each defn predicate `xxx_defn(XXX def, Definition defn)` + * There is an equivalent predicate that tracks the values in attributes: + * `xxx_jump_to_defn_attribute(XXX def, string name, Definition defn)` + * */ + +/** INTERNAL -- Public for testing only. + * Holds if the attribute `name` of the ssa variable `var` refers to (`value`, `cls`, `origin`) + */ +predicate ssa_variable_jump_to_defn_attribute(EssaVariable var, string name, Definition defn) { + ssa_defn_jump_to_defn_attribute(var.getDefinition(), name, defn) +} + +/** Helper for ssa_variable_jump_to_defn_attribute */ +private predicate ssa_defn_jump_to_defn_attribute(EssaDefinition def, string name, Definition defn) { + ssa_phi_jump_to_defn_attribute(def, name, defn) + or + ssa_node_jump_to_defn_attribute(def, name, defn) + or + ssa_node_refinement_jump_to_defn_attribute(def, name, defn) + or + ssa_filter_jump_to_defn_attribute(def, name, defn) +} + +/** Holds if ESSA edge refinement, `def`, is defined by `defn` of `priority` */ +predicate ssa_filter_jump_to_defn_attribute(PyEdgeRefinement def, string name, Definition defn) { + ssa_variable_jump_to_defn_attribute(def.getInput(), name, defn) +} + +/** Holds if the attribute `name` of the ssa phi-function defn `phi` refers to (`value`, `cls`, `origin`) */ +private predicate ssa_phi_jump_to_defn_attribute(PhiFunction phi, string name, Definition defn) { + ssa_variable_jump_to_defn_attribute(phi.getAnInput(), name, defn) +} + +/** Helper for ssa_defn_jump_to_defn_attribute */ +pragma[noinline] +private predicate ssa_node_jump_to_defn_attribute(EssaNodeDefinition def, string name, Definition defn) { + assignment_jump_to_defn_attribute(def, name, defn) + or + self_parameter_jump_to_defn_attribute(def, name, defn) + or + scope_entry_jump_to_defn_attribute(def, name, defn) +} + +/** Helper for ssa_defn_jump_to_defn_attribute */ +pragma[noinline] +private predicate ssa_node_refinement_jump_to_defn_attribute(EssaNodeRefinement def, string name, Definition defn) { + attribute_assignment_jump_to_defn_attribute(def, name, defn) + or + argument_jump_to_defn_attribute(def, name, defn) +} + +pragma[noinline] +private predicate scope_entry_jump_to_defn_attribute(ScopeEntryDefinition def, string name, Definition defn) { + exists(EssaVariable var | + scope_entry_value_transfer(var, def) and + ssa_variable_jump_to_defn_attribute(var, name, defn) + ) +} + +private predicate scope_jump_to_defn_attribute(ImportTimeScope s, string name, Definition defn) { + exists(EssaVariable var | + BaseFlow::reaches_exit(var) and var.getScope() = s and + var.getName() = name + | + ssa_variable_defn(var, defn) + ) +} + +private predicate jump_to_defn_attribute(ControlFlowNode use, string name, Definition defn) { + /* Local attribute */ + exists(EssaVariable var | + use = var.getASourceUse() and + ssa_variable_jump_to_defn_attribute(var, name, defn) + ) + or + /* Instance attributes */ + exists(ClassObject cls | + use.refersTo(_, cls, _) | + scope_jump_to_defn_attribute(cls.getPyClass(), name, defn) + ) + or + /* Super attributes */ + exists(AttrNode f, SuperBoundMethod sbm, Object function | + use = f.getObject(name) and + f.refersTo(sbm) and function = sbm.getFunction(_) and + function.getOrigin() = defn.getAstNode() + ) + or + /* Class or module attribute */ + exists(Object obj, Scope scope | + use.refersTo(obj) and + scope_jump_to_defn_attribute(scope, name, defn) | + obj.(ClassObject).getPyClass() = scope + or + obj.(PythonModuleObject).getModule() = scope + or + obj.(PackageObject).getInitModule().getModule() = scope + ) +} + +pragma[noinline] +private predicate assignment_jump_to_defn_attribute(AssignmentDefinition def, string name, Definition defn) { + jump_to_defn_attribute(def.getValue(), name, defn) +} + +pragma[noinline] +private predicate attribute_assignment_jump_to_defn_attribute(AttributeAssignment def, string name, Definition defn) { + defn.getAstNode() = def.getDefiningNode().getNode() and name = def.getName() + or + ssa_variable_jump_to_defn_attribute(def.getInput(), name, defn) and not name = def.getName() +} + +/** Holds if `def` defines the attribute `name` + * `def` takes the form `setattr(use, "name")` where `use` is the input to the defn. + */ +private predicate sets_attribute(ArgumentRefinement def, string name) { + exists(CallNode call | + call = def.getDefiningNode() and + call.getFunction().refersTo(builtin_object("setattr")) and + def.getInput().getAUse() = call.getArg(0) and + call.getArg(1).getNode().(StrConst).getText() = name + ) +} + +pragma[noinline] +private predicate argument_jump_to_defn_attribute(ArgumentRefinement def, string name, Definition defn) { + if sets_attribute(def, name) then + jump_to_defn(def.getDefiningNode().(CallNode).getArg(2), defn) + else + ssa_variable_jump_to_defn_attribute(def.getInput(), name, defn) +} + +/** Gets the (temporally) preceding variable for "self", e.g. `def` is in method foo() and `result` is in `__init__()`. */ +private EssaVariable preceding_self_variable(ParameterDefinition def) { + def.isSelf() and + exists(Function preceding, Function method | + method = def.getScope() and + // Only methods + preceding.isMethod() and preceding.precedes(method) and + BaseFlow::reaches_exit(result) and result.getSourceVariable().(Variable).isSelf() and + result.getScope() = preceding + ) +} + +pragma [noinline] +private predicate self_parameter_jump_to_defn_attribute(ParameterDefinition def, string name, Definition defn) { + ssa_variable_jump_to_defn_attribute(preceding_self_variable(def), name, defn) +} + +/** Gets a definition for 'use'. + * This exists primarily for testing use `getPreferredDefinition()` instead. + */ +Definition getADefinition(Expr use) { + jump_to_defn(use.getAFlowNode(), result) and + not use instanceof Call and + not use.isArtificial() and + // Not the use itself + not result = TLocalDefinition(use) +} + +/** Gets the unique definition for 'use', if one can be found. + * Helper for the jump-to-definition query. + */ +Definition getUniqueDefinition(Expr use) { + unique_jump_to_defn(use, result) and + not use instanceof Call and + not use.isArtificial() and + // Not the use itself + not result = TLocalDefinition(use) +} + + +/** Helper class to get suitable locations for attributes */ +class NiceLocationExpr extends @py_expr { + + string toString() { + result = this.(Expr).toString() + } + + predicate hasLocationInfo(string f, int bl, int bc, int el, int ec) { + /* Attribute location for x.y is that of 'y' so that url does not overlap with that of 'x' */ + exists(int abl, int abc | + this.(Attribute).getLocation().hasLocationInfo(f, abl, abc, el, ec) | + bl = el and bc = ec - this.(Attribute).getName().length() + 1 + ) + or + this.(Name).getLocation().hasLocationInfo(f, bl, bc, el, ec) + or + /* Show xxx for `xxx` in `from xxx import y` or + * for `import xxx` or for `import xxx as yyy`. */ + this.(ImportExpr).getLocation().hasLocationInfo(f, bl, bc, el, ec) + or + /* Show y for `y` in `from xxx import y` */ + exists(string name | + name = this.(ImportMember).getName() and + this.(ImportMember).getLocation().hasLocationInfo(f, _, _, el, ec) and + bl = el and bc = ec-name.length()+1 + ) + } + +} + + diff --git a/python/ql/src/analysis/Definitions.ql b/python/ql/src/analysis/Definitions.ql new file mode 100644 index 00000000000..b0cf6f01bc1 --- /dev/null +++ b/python/ql/src/analysis/Definitions.ql @@ -0,0 +1,17 @@ +/** + * @name Definitions + * @description Jump to definition helper query. + * @kind definitions + * @id py/jump-to-definition + */ + +import python +import DefinitionTracking + + +from NiceLocationExpr use, Definition defn, string kind, string f, int l +where defn = getUniqueDefinition(use) and kind = "Definition" +and use.hasLocationInfo(f, l, _, _, _) and +// Ignore if the definition is on the same line as the use +not defn.getLocation().hasLocationInfo(f, l, _, _, _) +select use, defn, kind diff --git a/python/ql/src/analysis/Efficiency.ql b/python/ql/src/analysis/Efficiency.ql new file mode 100644 index 00000000000..bbdd3a7506d --- /dev/null +++ b/python/ql/src/analysis/Efficiency.ql @@ -0,0 +1,33 @@ +/** + * Compute the efficiency of the points-to relation. That is the ratio of + * "interesting" facts to total facts. + */ + +import python +import semmle.python.pointsto.PointsTo +import semmle.python.pointsto.PointsToContext + +predicate trivial(ControlFlowNode f) { + exists(Parameter p | p = f.getNode()) + or + f instanceof NameConstantNode + or + f.getNode() instanceof ImmutableLiteral +} + +from int interesting_facts, int interesting_facts_in_source, int total_size,float efficiency +where +interesting_facts = strictcount(ControlFlowNode f, Object value, ClassObject cls | + f.refersTo(value, cls, _) and not trivial(f) +) +and +interesting_facts_in_source = strictcount(ControlFlowNode f, Object value, ClassObject cls | + f.refersTo(value, cls, _) and not trivial(f) and exists(f.getScope().getEnclosingModule().getFile().getRelativePath()) +) +and +total_size = strictcount(ControlFlowNode f, PointsToContext ctx, Object value, ClassObject cls, ControlFlowNode orig | + PointsTo::points_to(f, ctx, value, cls, orig) +) +and +efficiency = 100.0 * interesting_facts_in_source / total_size +select interesting_facts, interesting_facts_in_source, total_size, efficiency diff --git a/python/ql/src/analysis/FailedInference.ql b/python/ql/src/analysis/FailedInference.ql new file mode 100644 index 00000000000..129c17ffd9d --- /dev/null +++ b/python/ql/src/analysis/FailedInference.ql @@ -0,0 +1,11 @@ + +import python +import semmle.python.pointsto.PointsTo + +from ClassObject cls, string reason + +where +PointsTo::Types::failed_inference(cls, reason) + +select cls, reason + diff --git a/python/ql/src/analysis/ImportFailure.qhelp b/python/ql/src/analysis/ImportFailure.qhelp new file mode 100644 index 00000000000..2832f217f3b --- /dev/null +++ b/python/ql/src/analysis/ImportFailure.qhelp @@ -0,0 +1,28 @@ + + + +

    Tracing which module is imported by an import statement is very important in ensuring that the whole program is available +for analysis. Failure to determine which module is imported by an import reduces the extent and accuracy of Semmle's analysis. +

    + +

    +Missing imports will degrade the effectiveness of code analysis and may result in errors going undetected. +

    + +
    + +

    +Ensure that all required modules and packages can be found when running the extractor. +

    + + +
    + + +
  • Semmle Tutorial: Basic project creation (Python).
  • + + +
    +
    diff --git a/python/ql/src/analysis/ImportFailure.ql b/python/ql/src/analysis/ImportFailure.ql new file mode 100644 index 00000000000..95fd38a3748 --- /dev/null +++ b/python/ql/src/analysis/ImportFailure.ql @@ -0,0 +1,71 @@ +/** + * @name Unresolved import + * @description An unresolved import may result in reduced coverage and accuracy of analysis. + * @kind problem + * @problem.severity info + * @id py/import-failure + */ + +import python + +ImportExpr alternative_import(ImportExpr ie) { + exists(Alias thisalias, Alias otheralias | + (thisalias.getValue() = ie or ((ImportMember)thisalias.getValue()).getModule() = ie) + and + (otheralias.getValue() = result or ((ImportMember)otheralias.getValue()).getModule() = result) + and + ( + exists(If i | i.getBody().contains(ie) and i.getOrelse().contains(result)) or + exists(If i | i.getBody().contains(result) and i.getOrelse().contains(ie)) or + exists(Try t | t.getBody().contains(ie) and t.getAHandler().contains(result)) or + exists(Try t | t.getBody().contains(result) and t.getAHandler().contains(ie)) + ) + ) +} + +string os_specific_import(ImportExpr ie) { + exists(string name | name = ie.getImportedModuleName() | + name.matches("org.python.%") and result = "java" + or + name.matches("java.%") and result = "java" + or + name.matches("Carbon.%") and result = "darwin" + or + result = "win32" and ( + name = "_winapi" or name = "_win32api" or name = "_winreg" or + name = "nt" or name.matches("win32%") or name = "ntpath" + ) + or + result = "linux2" and ( + name = "posix" or name = "posixpath" + ) + or + result = "unsupported" and ( + name = "__pypy__" or name = "ce" or name.matches("riscos%") + + ) + ) +} + +string get_os() { + py_flags_versioned("sys.platform", result, major_version().toString()) +} + +predicate ok_to_fail(ImportExpr ie) { + alternative_import(ie).refersTo(_) + or + os_specific_import(ie) != get_os() +} + +from ImportExpr ie +where not ie.refersTo(_) and + exists(Context c | c.appliesTo(ie.getAFlowNode())) and + not ok_to_fail(ie) and + not exists(VersionGuard guard | + if guard.isTrue() then + guard.controls(ie.getAFlowNode().getBasicBlock(), false) + else + guard.controls(ie.getAFlowNode().getBasicBlock(), true) + ) + +select ie, "Unable to resolve import of '" + ie.getImportedModuleName() + "'." \ No newline at end of file diff --git a/python/ql/src/analysis/KeyPointsToFailure.qhelp b/python/ql/src/analysis/KeyPointsToFailure.qhelp new file mode 100644 index 00000000000..4aaf00a6bbc --- /dev/null +++ b/python/ql/src/analysis/KeyPointsToFailure.qhelp @@ -0,0 +1,11 @@ + + + +

    Points-to analysis underpins type inference and thus most of Semmle's Python analysis. +Failures in points-to undermines type inference and reduces the coverage and also accuracy of many queries. +

    + +
    +
    diff --git a/python/ql/src/analysis/KeyPointsToFailure.ql b/python/ql/src/analysis/KeyPointsToFailure.ql new file mode 100644 index 00000000000..46b1156dd7b --- /dev/null +++ b/python/ql/src/analysis/KeyPointsToFailure.ql @@ -0,0 +1,31 @@ +/** + * @name Key "points-to" fails for expression. + * @description Expression does not "point-to" an object which prevents further points-to analysis. + * @kind problem + * @problem.severity info + * @id py/key-points-to-failure + */ + +import python + +predicate points_to_failure(Expr e) { + exists(ControlFlowNode f | + f = e.getAFlowNode() | + not f.refersTo(_) + ) +} + +predicate key_points_to_failure(Expr e) { + points_to_failure(e) and not points_to_failure(e.getASubExpression()) + and + not exists(SsaVariable ssa | + ssa.getAUse() = e.getAFlowNode() | + points_to_failure(ssa.getAnUltimateDefinition().getDefinition().getNode()) + ) + and + not exists(Assign a | a.getATarget() = e) +} + +from Attribute e +where key_points_to_failure(e) and not exists(Call c | c.getFunc() = e) +select e, "Expression does not 'point-to' any object, but all its sources do." diff --git a/python/ql/src/analysis/PointsToFailure.qhelp b/python/ql/src/analysis/PointsToFailure.qhelp new file mode 100644 index 00000000000..4aaf00a6bbc --- /dev/null +++ b/python/ql/src/analysis/PointsToFailure.qhelp @@ -0,0 +1,11 @@ + + + +

    Points-to analysis underpins type inference and thus most of Semmle's Python analysis. +Failures in points-to undermines type inference and reduces the coverage and also accuracy of many queries. +

    + +
    +
    diff --git a/python/ql/src/analysis/PointsToFailure.ql b/python/ql/src/analysis/PointsToFailure.ql new file mode 100644 index 00000000000..53c2296c529 --- /dev/null +++ b/python/ql/src/analysis/PointsToFailure.ql @@ -0,0 +1,18 @@ +/** + * @name "points-to" fails for expression. + * @description Expression does not "point-to" an object which prevents type inference. + * @kind problem + * @id py/points-to-failure + * @problem.severity info + * @tags reliability + */ + +import python + +from Expr e +where exists(ControlFlowNode f | + f = e.getAFlowNode() | + not f.refersTo(_) +) + +select e, "Expression does not 'point-to' any object." \ No newline at end of file diff --git a/python/ql/src/analysis/Pruned.ql b/python/ql/src/analysis/Pruned.ql new file mode 100644 index 00000000000..a40d47949e5 --- /dev/null +++ b/python/ql/src/analysis/Pruned.ql @@ -0,0 +1,13 @@ + +import python +import semmle.python.pointsto.PointsTo + +from int size + +where +size = count(ControlFlowNode f | + not PointsTo::Test::reachableBlock(f.getBasicBlock(), _) +) + + +select size diff --git a/python/ql/src/analysis/RatioOfDefinitions.ql b/python/ql/src/analysis/RatioOfDefinitions.ql new file mode 100644 index 00000000000..66e0683eab2 --- /dev/null +++ b/python/ql/src/analysis/RatioOfDefinitions.ql @@ -0,0 +1,27 @@ +/** + * @name Ratio of jump-to-definitions computed + */ + +import python + +import DefinitionTracking + +predicate want_to_have_definition(Expr e) { + /* not builtin object like len, tuple, etc. */ + not exists(Object cobj | e.refersTo(cobj) and cobj.isC()) and + ( + e instanceof Name and e.(Name).getCtx() instanceof Load + or + e instanceof Attribute and e.(Attribute).getCtx() instanceof Load + or + e instanceof ImportMember or + e instanceof ImportExpr + ) +} + +from int yes, int no +where +yes = count(Expr e | want_to_have_definition(e) and exists(getUniqueDefinition(e))) +and +no = count(Expr e | want_to_have_definition(e) and not exists(getUniqueDefinition(e))) +select yes, no, yes*100/(yes+no) + "%" diff --git a/python/ql/src/analysis/Sanity.ql b/python/ql/src/analysis/Sanity.ql new file mode 100644 index 00000000000..113f107ebc9 --- /dev/null +++ b/python/ql/src/analysis/Sanity.ql @@ -0,0 +1,228 @@ +/** + * @name Sanity check + * @description General sanity check to be run on any and all code. Should never produce any results. + * @id py/sanity-check + */ + +import python +import DefinitionTracking + +predicate uniqueness_error(int number, string what, string problem) { + ( + what = "toString" or what = "getLocation" or what = "getNode" or what = "getDefinition" or + what = "getEntryNode" or what = "getOrigin" or what = "getAnInferredType" + ) + and + ( + number = 0 and problem = "no results for " + what + "()" + or + number in [2 .. 10] and problem = number.toString() + " results for " + what + "()" + ) +} + +predicate ast_sanity(string clsname, string problem, string what) { + exists(AstNode a | + clsname = a.getAQlClass() | + uniqueness_error(count(a.toString()), "toString", problem) and what = "at " + a.getLocation().toString() or + uniqueness_error(strictcount(a.getLocation()), "getLocation", problem) and what = a.getLocation().toString() or + not exists(a.getLocation()) and problem = "no location" and what = a.toString() + ) +} + +predicate location_sanity(string clsname, string problem, string what) { + exists(Location l | + clsname = l.getAQlClass() | + uniqueness_error(count(l.toString()), "toString", problem) and what = "at " + l.toString() or + not exists(l.toString()) and problem = "no toString" and + ( + exists(AstNode thing | + thing.getLocation() = l | + what = "a location of a " + thing.getAQlClass() + ) + or + not exists(AstNode thing | thing.getLocation() = l) and + what = "a location" + ) + or + l.getEndLine() < l.getStartLine() and problem = "end line before start line" and what = "at " + l.toString() + or + l.getEndLine() = l.getStartLine() and l.getEndColumn() < l.getStartColumn() and + problem = "end column before start column" and what = "at " + l.toString() + ) +} + +predicate cfg_sanity(string clsname, string problem, string what) { + exists(ControlFlowNode f | + clsname = f.getAQlClass() | + uniqueness_error(count(f.getNode()), "getNode", problem) and what = "at " + f.getLocation().toString() or + not exists(f.getLocation()) and problem = "no location" and what = f.toString() or + uniqueness_error(count(f.(AttrNode).getObject()), "getValue", problem) and what = "at " + f.getLocation().toString() + ) +} + +predicate scope_sanity(string clsname, string problem, string what) { + exists(Scope s | + clsname = s.getAQlClass() | + uniqueness_error(count(s.getEntryNode()), "getEntryNode", problem) and what = "at " + s.getLocation().toString() or + uniqueness_error(count(s.toString()), "toString", problem) and what = "at " + s.getLocation().toString() or + uniqueness_error(strictcount(s.getLocation()), "getLocation", problem) and what = "at " + s.getLocation().toString() or + not exists(s.getLocation()) and problem = "no location" and what = s.toString() + ) +} + +string best_description_builtin_object(Object o) { + o.isBuiltin() and + ( + result = o.toString() + or + not exists(o.toString()) and py_cobjectnames(o, result) + or + not exists(o.toString()) and not py_cobjectnames(o, _) and result = "builtin object of type " + o.getAnInferredType().toString() + or + not exists(o.toString()) and not py_cobjectnames(o, _) and not exists(o.getAnInferredType().toString()) and result = "builtin object" + ) +} + +private predicate introspected_builtin_object(Object o) { + /* Only check objects from the extractor, missing data for objects generated from C source code analysis is OK. + * as it will be ignored if it doesn't match up with the introspected form. */ + py_cobject_sources(o, 0) +} + +predicate builtin_object_sanity(string clsname, string problem, string what) { + exists(Object o | + clsname = o.getAQlClass() and what = best_description_builtin_object(o) and introspected_builtin_object(o) | + not exists(o.getAnInferredType()) and not py_cobjectnames(o, _) and problem = "neither name nor type" + or + uniqueness_error(count(string name | py_cobjectnames(o, name)), "name", problem) + or + not exists(o.getAnInferredType()) and problem = "no results for getAnInferredType" + or + not exists(o.toString()) and problem = "no toString" and + not exists(string name | name.prefix(7) = "_semmle" | py_special_objects(o, name)) and + not o = unknownValue() + ) +} + +predicate source_object_sanity(string clsname, string problem, string what) { + exists(Object o | + clsname = o.getAQlClass() and not o.isBuiltin() | + uniqueness_error(count(o.getOrigin()), "getOrigin", problem) and what = "at " + o.getOrigin().getLocation().toString() + or + not exists(o.getOrigin().getLocation()) and problem = "no location" and what = "??" + or + not exists(o.toString()) and problem = "no toString" and what = "at " + o.getOrigin().getLocation().toString() + or + strictcount(o.toString()) > 1 and problem = "multiple toStrings()" and what = o.toString() + ) +} + +predicate ssa_sanity(string clsname, string problem, string what) { + /* Zero or one definitions of each SSA variable */ + exists(SsaVariable var | + clsname = var.getAQlClass() | + uniqueness_error(strictcount(var.getDefinition()), "getDefinition", problem) and what = var.getId() + ) + or + /* Dominance criterion: Definition *must* dominate *all* uses. */ + exists(SsaVariable var, ControlFlowNode defn, ControlFlowNode use | + defn = var.getDefinition() and use = var.getAUse() | + not defn.strictlyDominates(use) and not defn = use and + /* Phi nodes which share a flow node with a use come *before* the use */ + not (exists(var.getAPhiInput()) and defn = use) and + clsname = var.getAQlClass() and problem = "a definition which does not dominate a use at " + use.getLocation() and what = var.getId() + " at " + var.getLocation() + ) + or + /* Minimality of phi nodes */ + exists(SsaVariable var | + strictcount(var.getAPhiInput()) = 1 and + var.getAPhiInput().getDefinition().getBasicBlock().strictlyDominates(var.getDefinition().getBasicBlock()) + | + clsname = var.getAQlClass() and problem = " a definition which is dominated by the definition of an incoming phi edge." and what = var.getId() + " at " + var.getLocation() + ) +} + +predicate function_object_sanity(string clsname, string problem, string what) { + exists(FunctionObject func | + clsname = func.getAQlClass() | + what = func.getName() and + ( + count(func.descriptiveString()) = 0 and problem = "no descriptiveString()" + or + exists(int c | + c = strictcount(func.descriptiveString()) and c > 1 | + problem = c + "descriptiveString()s" + ) + ) + or + not exists(func.getName()) and what = "?" and problem = "no name" + ) + +} + +predicate multiple_origins_per_object(Object obj) { + not obj.isC() and not obj instanceof ModuleObject and + exists(ControlFlowNode use | strictcount(ControlFlowNode orig | use.refersTo(obj, orig)) > 1) +} + +predicate intermediate_origins(ControlFlowNode use, ControlFlowNode inter, Object obj) { + exists(ControlFlowNode orig | + not inter = orig | + use.refersTo(obj, inter) and + inter.refersTo(obj, orig) and + // It can sometimes happen that two different modules (e.g. cPickle and Pickle) + // have the same attribute, but different origins. + not strictcount(Object val | inter.(AttrNode).getObject().refersTo(val)) > 1 + ) +} + +predicate points_to_sanity(string clsname, string problem, string what) { + exists(Object obj | + multiple_origins_per_object(obj) and clsname = obj.getAQlClass() and + problem = "multiple origins for an object" and what = obj.toString() + ) + or + exists(ControlFlowNode use, ControlFlowNode inter, Object obj | + intermediate_origins(use, inter, obj) and + clsname = use.getAQlClass() and + problem = "has intermediate origin " + inter and + what = use.toString() + ) +} + +predicate jump_to_definition_sanity(string clsname, string problem, string what) { + problem = "multiple (jump-to) definitions" and + exists(Expr use | + strictcount(getUniqueDefinition(use)) > 1 and + clsname = use.getAQlClass() and + what = use.toString() + ) +} + +predicate file_sanity(string clsname, string problem, string what) { + exists(File file, Folder folder | + clsname = file.getAQlClass() and + problem = "has same name as a folder" and + what = file.getName() and + what = folder.getName() + ) or + exists(Container f | + clsname = f.getAQlClass() and + uniqueness_error(count(f.toString()), "toString", problem) and what = "file " + f.getName() + ) +} + +from string clsname, string problem, string what +where +ast_sanity(clsname, problem, what) or +location_sanity(clsname, problem, what)or +scope_sanity(clsname, problem, what) or +cfg_sanity(clsname, problem, what) or +ssa_sanity(clsname, problem, what) or +builtin_object_sanity(clsname, problem, what) or +source_object_sanity(clsname, problem, what) or +function_object_sanity(clsname, problem, what) or +points_to_sanity(clsname, problem, what) or +jump_to_definition_sanity(clsname, problem, what) or +file_sanity(clsname, problem, what) +select clsname + " " + what + " has " + problem diff --git a/python/ql/src/analysis/Summary.ql b/python/ql/src/analysis/Summary.ql new file mode 100644 index 00000000000..ba2fee0b4a8 --- /dev/null +++ b/python/ql/src/analysis/Summary.ql @@ -0,0 +1,38 @@ +/** Summarize a snapshot + */ + +import python + +from string key, string value +where +key = "Extractor version" and py_flags_versioned("extractor.version", value, _) +or +key = "Snapshot build time" and exists(date d | snapshotDate(d) and value = d.toString()) +or +key = "Interpreter version" and +exists(string major, string minor | + py_flags_versioned("version.major", major, _) and + py_flags_versioned("version.minor", minor, _) and + value = major + "." + minor +) +or +key = "Build platform" and +exists(string raw | + py_flags_versioned("sys.platform", raw, _) | + if raw = "win32" then + value = "Windows" + else if raw = "linux2" then + value = "Linux" + else if raw = "darwin" then + value = "OSX" + else + value = raw +) +or +key = "Source location" and sourceLocationPrefix(value) +or +key = "Lines of code (source)" and value = sum(ModuleMetrics m | exists(m.getFile().getRelativePath()) | m.getNumberOfLinesOfCode()).toString() +or +key = "Lines of code (total)" and value = sum(ModuleMetrics m | any() | m.getNumberOfLinesOfCode()).toString() + +select key, value diff --git a/python/ql/src/analysis/TypeHierarchyFailure.qhelp b/python/ql/src/analysis/TypeHierarchyFailure.qhelp new file mode 100644 index 00000000000..0f908372272 --- /dev/null +++ b/python/ql/src/analysis/TypeHierarchyFailure.qhelp @@ -0,0 +1,15 @@ + + + +

    In order to analyse uses of a class, all its attributes need to be known. Without the full inheritance hierarchy this is impossible. +This is an informational query only. +

    + +

    +This is an informational query only, this query depends on points-to and type inference. +

    + +
    +
    diff --git a/python/ql/src/analysis/TypeHierarchyFailure.ql b/python/ql/src/analysis/TypeHierarchyFailure.ql new file mode 100644 index 00000000000..072eba9ac47 --- /dev/null +++ b/python/ql/src/analysis/TypeHierarchyFailure.ql @@ -0,0 +1,16 @@ +/** + * @name Inheritance hierarchy cannot be inferred for class + * @description Inability to infer inheritance hierarchy cannot be inferred for class will impair analysis + * @id py/failed-inheritance-inference + * @kind problem + * @problem.severity info + */ + +import python + + +from Class cls +where not exists(ClassObject c | c.getPyClass() = cls) +or +exists(ClassObject c | c.getPyClass() = cls | c.failedInference()) +select cls, "Inference of class hierarchy failed for class." \ No newline at end of file diff --git a/python/ql/src/analysis/TypeInferenceFailure.qhelp b/python/ql/src/analysis/TypeInferenceFailure.qhelp new file mode 100644 index 00000000000..3ca947b376d --- /dev/null +++ b/python/ql/src/analysis/TypeInferenceFailure.qhelp @@ -0,0 +1,13 @@ + + + + +

    +Type inference is the key part of Semmle's Python analysis. +Failures in type inference and reduces the coverage and also accuracy of many queries. +

    + +
    +
    diff --git a/python/ql/src/analysis/TypeInferenceFailure.ql b/python/ql/src/analysis/TypeInferenceFailure.ql new file mode 100644 index 00000000000..18744a1a6da --- /dev/null +++ b/python/ql/src/analysis/TypeInferenceFailure.ql @@ -0,0 +1,14 @@ +/** + * @name Type inference fails for 'object' + * @description Type inference fails for 'object' which reduces recall for many queries. + * @kind problem + * @problem.severity info + * @id py/type-inference-failure + */ +import python + + +from ControlFlowNode f, Object o +where f.refersTo(o) and +not exists(ClassObject c | f.refersTo(o, c, _)) +select o, "Type inference fails for 'object'." \ No newline at end of file diff --git a/python/ql/src/external/CodeDuplication.qll b/python/ql/src/external/CodeDuplication.qll new file mode 100644 index 00000000000..7db04663ae0 --- /dev/null +++ b/python/ql/src/external/CodeDuplication.qll @@ -0,0 +1,281 @@ +/** Provides classes for detecting duplicate or similar code. */ + +import python + +/** Gets the relative path of `file`, with backslashes replaced by forward slashes. */ +private +string relativePath(File file) { + result = file.getRelativePath().replaceAll("\\", "/") +} + +/** + * Holds if the `index`-th token of block `copy` is in file `file`, spanning + * column `sc` of line `sl` to column `ec` of line `el`. + * + * For more information, see [LGTM locations](https://lgtm.com/help/ql/locations). + */ +pragma[noinline, nomagic] +private predicate tokenLocation(File file, int sl, int sc, int ec, int el, Copy copy, int index) { + file = copy.sourceFile() and + tokens(copy, index, sl, sc, ec, el) +} + +/** A token block used for detection of duplicate and similar code. */ +class Copy extends @duplication_or_similarity +{ + private + int lastToken() { + result = max(int i | tokens(this, i, _, _, _, _) | i) + } + + /** Gets the index of the token in this block starting at the location `loc`, if any. */ + int tokenStartingAt(Location loc) { + tokenLocation(loc.getFile(), loc.getStartLine(), loc.getStartColumn(), + _, _, this, result) + } + + /** Gets the index of the token in this block ending at the location `loc`, if any. */ + int tokenEndingAt(Location loc) { + tokenLocation(loc.getFile(), _, _, + loc.getEndLine(), loc.getEndColumn(), this, result) + } + + /** Gets the line on which the first token in this block starts. */ + int sourceStartLine() { + tokens(this, 0, result, _, _, _) + } + + /** Gets the column on which the first token in this block starts. */ + int sourceStartColumn() { + tokens(this, 0, _, result, _, _) + } + + /** Gets the line on which the last token in this block ends. */ + int sourceEndLine() { + tokens(this, this.lastToken(), _, _, result, _) + } + + /** Gets the column on which the last token in this block ends. */ + int sourceEndColumn() { + tokens(this, this.lastToken(), _, _, _, result) + } + + /** Gets the number of lines containing at least (part of) one token in this block. */ + int sourceLines() { + result = this.sourceEndLine() + 1 - this.sourceStartLine() + } + + /** Gets an opaque identifier for the equivalence class of this block. */ + int getEquivalenceClass() { + duplicateCode(this, _, result) or similarCode(this, _, result) + } + + /** Gets the source file in which this block appears. */ + File sourceFile() { + exists(string name | + duplicateCode(this, name, _) or similarCode(this, name, _) | + name.replaceAll("\\", "/") = relativePath(result)) + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [LGTM locations](https://lgtm.com/help/ql/locations). + */ + predicate hasLocationInfo(string filepath, int startline, int startcolumn, int endline, int endcolumn) { + sourceFile().getName() = filepath and + startline = sourceStartLine() and + startcolumn = sourceStartColumn() and + endline = sourceEndLine() and + endcolumn = sourceEndColumn() + } + + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a block that extends this one, that is, its first token is also + * covered by this block, but they are not the same block. + */ + Copy extendingBlock() { + exists(File file, int sl, int sc, int ec, int el | + tokenLocation(file, sl, sc, ec, el, this, _) and + tokenLocation(file, sl, sc, ec, el, result, 0)) and + this != result + } +} + +/** + * Holds if there is a sequence of `SimilarBlock`s `start1, ..., end1` and another sequence + * `start2, ..., end2` such that each block extends the previous one and corresponding blocks + * have the same equivalence class, with `start` being the equivalence class of `start1` and + * `start2`, and `end` the equivalence class of `end1` and `end2`. + */ +predicate similar_extension(SimilarBlock start1, SimilarBlock start2, SimilarBlock ext1, SimilarBlock ext2, int start, int ext) { + start1.getEquivalenceClass() = start and + start2.getEquivalenceClass() = start and + ext1.getEquivalenceClass() = ext and + ext2.getEquivalenceClass() = ext and + start1 != start2 and + (ext1 = start1 and ext2 = start2 or + similar_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext) + ) +} + +/** + * Holds if there is a sequence of `DuplicateBlock`s `start1, ..., end1` and another sequence + * `start2, ..., end2` such that each block extends the previous one and corresponding blocks + * have the same equivalence class, with `start` being the equivalence class of `start1` and + * `start2`, and `end` the equivalence class of `end1` and `end2`. + */ +predicate duplicate_extension(DuplicateBlock start1, DuplicateBlock start2, DuplicateBlock ext1, DuplicateBlock ext2, int start, int ext) { + start1.getEquivalenceClass() = start and + start2.getEquivalenceClass() = start and + ext1.getEquivalenceClass() = ext and + ext2.getEquivalenceClass() = ext and + start1 != start2 and + (ext1 = start1 and ext2 = start2 or + duplicate_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext) + ) +} + +/** A block of duplicated code. */ +class DuplicateBlock extends Copy, @duplication +{ + override string toString() { + result = "Duplicate code: " + sourceLines() + " duplicated lines." + } +} + +/** A block of similar code. */ +class SimilarBlock extends Copy, @similarity +{ + override string toString() { + result = "Similar code: " + sourceLines() + " almost duplicated lines." + } +} + +/** + * Holds if `stmt1` and `stmt2` are duplicate statements in function or toplevel `sc1` and `sc2`, + * respectively, where `scope1` and `scope2` are not the same. + */ +predicate duplicateStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) { + exists(int equivstart, int equivend, int first, int last | + scope1.contains(stmt1) and + scope2.contains(stmt2) and + duplicateCoversStatement(equivstart, equivend, first, last, stmt1) and + duplicateCoversStatement(equivstart, equivend, first, last, stmt2) and + stmt1 != stmt2 and scope1 != scope2 + ) +} + +/** + * Holds if statement `stmt` is covered by a sequence of `DuplicateBlock`s, where `first` + * is the index of the token in the first block that starts at the beginning of `stmt`, + * while `last` is the index of the token in the last block that ends at the end of `stmt`, + * and `equivstart` and `equivend` are the equivalence classes of the first and the last + * block, respectively. + */ +private +predicate duplicateCoversStatement(int equivstart, int equivend, int first, int last, Stmt stmt) { + exists(DuplicateBlock b1, DuplicateBlock b2, Location startloc, Location endloc | + stmt.getLocation() = startloc and + stmt.getLastStatement().getLocation() = endloc and + first = b1.tokenStartingAt(startloc) and + last = b2.tokenEndingAt(endloc) and + b1.getEquivalenceClass() = equivstart and + b2.getEquivalenceClass() = equivend and + duplicate_extension(b1, _, b2, _, equivstart, equivend) + ) +} + +/** + * Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or + * toplevel that has `duplicate` lines in common with `scope1`. + */ +predicate duplicateStatements(Scope scope1, Scope scope2, int duplicate, int total) { + duplicate = strictcount(Stmt stmt | duplicateStatement(scope1, scope2, stmt, _)) and + total = strictcount(Stmt stmt | scope1.contains(stmt)) +} + +/** + * Find pairs of scopes that are identical or almost identical + */ +predicate duplicateScopes(Scope s, Scope other, float percent, string message) { + exists(int total, int duplicate | + duplicateStatements(s, other, duplicate, total) | + percent = 100.0 * duplicate / total and percent >= 80.0 and + if duplicate = total then + message = "All " + total + " statements in " + s.getName() + " are identical in $@." + else + message = duplicate + " out of " + total + " statements in " + s.getName() + " are duplicated in $@." + ) +} + +/** + * Holds if `stmt1` and `stmt2` are similar statements in function or toplevel `scope1` and `scope2`, + * respectively, where `scope1` and `scope2` are not the same. + */ +private predicate similarStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) { + exists(int start, int end, int first, int last | + scope1.contains(stmt1) and + scope2.contains(stmt2) and + similarCoversStatement(start, end, first, last, stmt1) and + similarCoversStatement(start, end, first, last, stmt2) and + stmt1 != stmt2 and scope1 != scope2 + ) +} + +/** + * Holds if statement `stmt` is covered by a sequence of `SimilarBlock`s, where `first` + * is the index of the token in the first block that starts at the beginning of `stmt`, + * while `last` is the index of the token in the last block that ends at the end of `stmt`, + * and `equivstart` and `equivend` are the equivalence classes of the first and the last + * block, respectively. + */ +private predicate similarCoversStatement(int equivstart, int equivend, int first, int last, Stmt stmt) { + exists(SimilarBlock b1, SimilarBlock b2, Location startloc, Location endloc | + stmt.getLocation() = startloc and + stmt.getLastStatement().getLocation() = endloc and + first = b1.tokenStartingAt(startloc) and + last = b2.tokenEndingAt(endloc) and + b1.getEquivalenceClass() = equivstart and + b2.getEquivalenceClass() = equivend and + similar_extension(b1, _, b2, _, equivstart, equivend) + ) +} + +/** + * Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or + * toplevel that has `similar` similar lines to `scope1`. + */ +private predicate similarStatements(Scope scope1, Scope scope2, int similar, int total) { + similar = strictcount(Stmt stmt | similarStatement(scope1, scope2, stmt, _)) and + total = strictcount(Stmt stmt | scope1.contains(stmt)) +} + +/** + * Find pairs of scopes that are similar + */ +predicate similarScopes(Scope s, Scope other, float percent, string message) { + exists(int total, int similar | + similarStatements(s, other, similar, total) | + percent = 100.0 * similar / total and percent >= 80.0 and + if similar = total then + message = "All statements in " + s.getName() + " are similar in $@." + else + message = similar + " out of " + total + " statements in " + s.getName() + " are similar in $@." + ) +} + +/** + * Holds if the line is acceptable as a duplicate. + * This is true for blocks of import statements. + */ +predicate whitelistedLineForDuplication(File f, int line) { + exists(ImportingStmt i | + i.getLocation().getFile() = f and i.getLocation().getStartLine() = line + ) +} diff --git a/python/ql/src/external/DefectFilter.qll b/python/ql/src/external/DefectFilter.qll new file mode 100644 index 00000000000..9504cd08554 --- /dev/null +++ b/python/ql/src/external/DefectFilter.qll @@ -0,0 +1,67 @@ +/** Provides a class for working with defect query results stored in dashboard databases. */ + +import semmle.python.Files + +/** + * Holds if `id` is the opaque identifier of a result reported by query `queryPath`, + * such that `message` is the associated message and the location of the result spans + * column `startcol` of line `startline` to column `endcol` of line `endline` + * in file `filepath`. + * + * For more information, see [LGTM locations](https://lgtm.com/help/ql/locations). + */ +external predicate defectResults(int id, string queryPath, string filepath, int startline, + int startcol, int endline, int endcol, string message); + +/** + * A defect query result stored in a dashboard database. + */ +class DefectResult extends int { + + DefectResult() { defectResults(this, _, _, _, _, _, _, _) } + + /** Gets the path of the query that reported the result. */ + string getQueryPath() { defectResults(this, result, _, _, _, _, _, _) } + + /** Gets the file in which this query result was reported. */ + File getFile() { + exists(string path | defectResults(this, _, path, _, _, _, _, _) and result.getName() = path) + } + + /** Gets the file path in which this query result was reported. */ + string getFilePath() { defectResults(this, _, result, _, _, _, _, _) } + + /** Gets the line on which the location of this query result starts. */ + int getStartLine() { defectResults(this, _, _, result, _, _, _, _) } + + /** Gets the column on which the location of this query result starts. */ + int getStartColumn() { defectResults(this, _, _, _, result, _, _, _) } + + /** Gets the line on which the location of this query result ends. */ + int getEndLine() { defectResults(this, _, _, _, _, result, _, _) } + + /** Gets the column on which the location of this query result ends. */ + int getEndColumn() { defectResults(this, _, _, _, _, _, result, _) } + + /** Gets the message associated with this query result. */ + string getMessage() { defectResults(this, _, _, _, _, _, _, result) } + + predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { + defectResults(this, _, path, sl, sc, el, ec, _) + } + + /** Gets the URL corresponding to the location of this query result. */ + string getURL() { + result = "file://" + getFile().getName() + ":" + getStartLine() + ":" + getStartColumn() + ":" + getEndLine() + ":" + getEndColumn() + } + +} + +// crude containment by line number only +predicate contains(Location l, DefectResult res) { + exists(string path, int bl1, int el1, int bl2, int el2 | + l.hasLocationInfo(path, bl1, _, el1, _) + and res.hasLocationInfo(path, bl2, _, el2, _) + and bl1 <= bl2 and el1 >= el2 + ) +} diff --git a/python/ql/src/external/DuplicateBlock.qhelp b/python/ql/src/external/DuplicateBlock.qhelp new file mode 100644 index 00000000000..b1e12aa3ef9 --- /dev/null +++ b/python/ql/src/external/DuplicateBlock.qhelp @@ -0,0 +1,31 @@ + + + + + +

    Blocks of code that are duplicated verbatim in several places in the code are candidates for +refactoring into functions. The severity of this anti-pattern is higher for longer blocks than for short blocks.

    + +
    + +

    Code duplication is undesirable for a range of reasons: The artificially +inflated amount of code hinders comprehension, and ranges of similar but subtly different lines +can mask the real purpose or intention behind a function. There's also a risk of +update anomalies, where only one of several copies of the code is updated to address a defect or +add a feature.

    + +

    In the case of code block duplication, how to address the issue depends on the blocks of code themselves. +It may be possible to extract the block of code into its own function and call that instead of duplicating the code.

    + +
    + + +
  • Elmar Juergens, Florian Deissenboeck, Benjamin Hummel, and Stefan Wagner. 2009. +Do code clones matter? In Proceedings of the 31st International Conference on +Software Engineering (ICSE '09). IEEE Computer Society, Washington, DC, USA, +485-495.
  • + +
    +
    diff --git a/python/ql/src/external/DuplicateBlock.ql b/python/ql/src/external/DuplicateBlock.ql new file mode 100644 index 00000000000..1a892b87900 --- /dev/null +++ b/python/ql/src/external/DuplicateBlock.ql @@ -0,0 +1,33 @@ +/** + * @name Duplicate code block + * @description This block of code is duplicated elsewhere. If possible, the shared code should be refactored so there is only one occurrence left. It may not always be possible to address these issues; other duplicate code checks (such as duplicate function, duplicate class) give subsets of the results with higher confidence. + * @kind problem + * @problem.severity recommendation + * @sub-severity low + * @tags testability + * maintainability + * useless-code + * duplicate-code + * statistical + * non-attributable + * @deprecated + * @precision medium + * @id py/duplicate-block + */ +import CodeDuplication + +predicate sorted_by_location(DuplicateBlock x, DuplicateBlock y) { + if x.sourceFile() = y.sourceFile() then + x.sourceStartLine() < y.sourceStartLine() + else + x.sourceFile().getName() < y.sourceFile().getName() +} + +from DuplicateBlock d, DuplicateBlock other +where d.sourceLines() > 10 and + other.getEquivalenceClass() = d.getEquivalenceClass() and + sorted_by_location(other, d) +select + d, + "Duplicate code: " + d.sourceLines() + " lines are duplicated at " + + other.sourceFile().getShortName() + ":" + other.sourceStartLine().toString() diff --git a/python/ql/src/external/DuplicateFunction.qhelp b/python/ql/src/external/DuplicateFunction.qhelp new file mode 100644 index 00000000000..c7ae7965ace --- /dev/null +++ b/python/ql/src/external/DuplicateFunction.qhelp @@ -0,0 +1,43 @@ + + + + + +

    A function should never be duplicated verbatim in several places in the code. Of course +the severity of this anti-pattern is higher for longer functions than for extremely short +functions of one or two statements, but there are usually better ways of achieving the same +effect.

    + +
    + +

    Code duplication in general is highly undesirable for a range of reasons: The artificially +inflated amount of code hinders comprehension, and ranges of similar but subtly different lines +can mask the real purpose or intention behind a function. There's also an omnipresent risk of +update anomalies, where only one of several copies of the code is updated to address a defect or +add a feature.

    + +

    In the case of function duplication, how to address the issue depends on the functions themselves +and on the precise classes or modules in which the duplication occurs. At its simplest, the duplication can +be addressed by simply removing all but one of the duplicate function definitions and making +callers of the removed functions refer to the (now canonical) single remaining definition +instead.

    + +

    This may not be possible for reasons of accessibility. A common example might +be where two classes implement the same functionality but neither is a subtype of the other, +so it is not possible to inherit a single method definition. In such cases, introducing a +common superclass to share the duplicated code is a viable option. Alternatively, if the methods +don't need access to private object state, they can be moved to a module-level function.

    + + +
    + + +
  • Elmar Juergens, Florian Deissenboeck, Benjamin Hummel, and Stefan Wagner. 2009. +Do code clones matter? In Proceedings of the 31st International Conference on +Software Engineering (ICSE '09). IEEE Computer Society, Washington, DC, USA, +485-495.
  • + +
    +
    diff --git a/python/ql/src/external/DuplicateFunction.ql b/python/ql/src/external/DuplicateFunction.ql new file mode 100644 index 00000000000..ddf587caf68 --- /dev/null +++ b/python/ql/src/external/DuplicateFunction.ql @@ -0,0 +1,31 @@ +/** + * @name Duplicate function + * @description There is another identical implementation of this function. Extract the code to a common file or superclass to improve sharing. + * @kind problem + * @tags testability + * useless-code + * maintainability + * duplicate-code + * statistical + * non-attributable + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/duplicate-function + */ +import python +import CodeDuplication + +predicate relevant(Function m) { + m.getMetrics().getNumberOfLinesOfCode() > 5 +} + +from Function m, Function other, string message, int percent +where duplicateScopes(m, other, percent, message) + and relevant(m) + and percent > 95.0 + and not duplicateScopes(m.getEnclosingModule(), other.getEnclosingModule(), _, _) + and not duplicateScopes(m.getScope(), other.getScope(), _, _) +select m, message, + other, + other.getName() diff --git a/python/ql/src/external/ExternalArtifact.qll b/python/ql/src/external/ExternalArtifact.qll new file mode 100644 index 00000000000..9c2fe6a9b66 --- /dev/null +++ b/python/ql/src/external/ExternalArtifact.qll @@ -0,0 +1,103 @@ +import python + +class ExternalDefect extends @externalDefect { + + string getQueryPath() { + exists(string path | + externalDefects(this, path, _, _, _) and + result = path.replaceAll("\\", "/") + ) + } + + string getMessage() { + externalDefects(this, _, _, result, _) + } + + float getSeverity() { + externalDefects(this, _, _, _, result) + } + + Location getLocation() { + externalDefects(this,_,result,_,_) + } + + string toString() { + result = getQueryPath() + ": " + getLocation() + " - " + getMessage() + } +} + +class ExternalMetric extends @externalMetric { + + string getQueryPath() { + externalMetrics(this, result, _, _) + } + + float getValue() { + externalMetrics(this, _, _, result) + } + + Location getLocation() { + externalMetrics(this,_,result,_) + } + + string toString() { + result = getQueryPath() + ": " + getLocation() + " - " + getValue() + } +} + +class ExternalData extends @externalDataElement { + + string getDataPath() { + externalData(this, result, _, _) + } + + string getQueryPath() { + result = getDataPath().regexpReplaceAll("\\.[^.]*$", ".ql") + } + + int getNumFields() { + result = 1 + max(int i | externalData(this, _, i, _) | i) + } + + string getField(int index) { + externalData(this, _, index, result) + } + + int getFieldAsInt(int index) { + result = getField(index).toInt() + } + + float getFieldAsFloat(int index) { + result = getField(index).toFloat() + } + + date getFieldAsDate(int index) { + result = getField(index).toDate() + } + + string toString() { + result = getQueryPath() + ": " + buildTupleString(0) + } + + private string buildTupleString(int start) { + (start = getNumFields() - 1 and result = getField(start)) + or + (start < getNumFields() - 1 and result = getField(start) + "," + buildTupleString(start+1)) + } + +} + +/** + * External data with a location, and a message, as produced by tools that used to produce QLDs. + */ +class DefectExternalData extends ExternalData { + DefectExternalData() { + this.getField(0).regexpMatch("\\w+://.*:[0-9]+:[0-9]+:[0-9]+:[0-9]+$") and + this.getNumFields() = 2 + } + + string getURL() { result = getField(0) } + + string getMessage() { result = getField(1) } +} + diff --git a/python/ql/src/external/MostlyDuplicateClass.qhelp b/python/ql/src/external/MostlyDuplicateClass.qhelp new file mode 100644 index 00000000000..e7e5a0dc50f --- /dev/null +++ b/python/ql/src/external/MostlyDuplicateClass.qhelp @@ -0,0 +1,31 @@ + + + +

    If two classes share a lot of code then there is a lot of unnecessary code +duplication. This makes it difficult to make changes in future and makes the classes less easy to +read.

    + +
    + +

    While completely duplicated classes are rare, they are usually a sign of a simple oversight. +Usually the required action is to remove all but one of them. A common exception to this rule may +arise from generated code that simply occurs in several places in the source tree; the check can be +adapted to exclude such results.

    + +

    It is far more common to see duplication of many methods between two classes, leaving just a few +that are actually different. Consider such situations carefully. Are the differences deliberate or +a result of an inconsistent update to one of the clones? If the latter, then treating the classes +as completely duplicate and eliminating one (while preserving any corrections or new features that +may have been introduced) is the best course. If the two classes serve different purposes then it +is possible there is a missing level of abstraction. Consider creating a common superclass of the +duplicate classes.

    + +
    + + +
  • E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, Do Code Clones Matter?, 2009. (available online).
  • + +
    +
    diff --git a/python/ql/src/external/MostlyDuplicateClass.ql b/python/ql/src/external/MostlyDuplicateClass.ql new file mode 100644 index 00000000000..7a6f0b7587d --- /dev/null +++ b/python/ql/src/external/MostlyDuplicateClass.ql @@ -0,0 +1,24 @@ +/** + * @name Mostly duplicate class + * @description More than 80% of the methods in this class are duplicated in another class. Create a common supertype to improve code sharing. + * @kind problem + * @tags testability + * maintainability + * useless-code + * duplicate-code + * statistical + * non-attributable + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/mostly-duplicate-class + */ +import python +import CodeDuplication + +from Class c, Class other, string message +where duplicateScopes(c, other, _, message) + and count(c.getAStmt()) > 3 + and not duplicateScopes(c.getEnclosingModule(), _, _, _) +select c, message, other, other.getName() + diff --git a/python/ql/src/external/MostlyDuplicateFile.qhelp b/python/ql/src/external/MostlyDuplicateFile.qhelp new file mode 100644 index 00000000000..80035aef7f6 --- /dev/null +++ b/python/ql/src/external/MostlyDuplicateFile.qhelp @@ -0,0 +1,31 @@ + + + +

    If two files share a lot of code then there is a lot of unnecessary code duplication. +This makes it difficult to make changes in future and makes the code less easy to read.

    + +
    + +

    While completely duplicated files are rare, they are usually a sign of a simple oversight. +Usually the required action is to remove all but one of them. A common exception to this rule may +arise from generated code that simply occurs in several places in the source tree; the check can be +adapted to exclude such results.

    + +

    It is far more common to see duplication of many lines between two files, leaving just a few that +are actually different. Consider such situations carefully. Are the differences deliberate or a +result of an inconsistent update to one of the clones? If the latter, then treating the files as +completely duplicate and eliminating one (while preserving any corrections or new features that may +have been introduced) is the best course. If two files serve genuinely different purposes but almost +all of their lines are the same, that can be a sign that there is a missing level of abstraction. +Look for ways to share the functionality, by creating a new module for the common parts and +importing that module into the original module.

    + +
    + + +
  • E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, Do Code Clones Matter?, 2009. (available online).
  • + +
    +
    diff --git a/python/ql/src/external/MostlyDuplicateFile.ql b/python/ql/src/external/MostlyDuplicateFile.ql new file mode 100644 index 00000000000..57178d8846e --- /dev/null +++ b/python/ql/src/external/MostlyDuplicateFile.ql @@ -0,0 +1,21 @@ +/** + * @name Mostly duplicate module + * @description There is another file that shares a lot of the code with this file. Merge the two files to improve maintainability. + * @kind problem + * @tags testability + * maintainability + * useless-code + * duplicate-code + * statistical + * non-attributable + * @problem.severity recommendation + * @sub-severity high + * @precision high + * @id py/mostly-duplicate-file + */ +import python +import CodeDuplication + +from Module m, Module other, int percent, string message +where duplicateScopes(m, other, percent, message) +select m, message, other, other.getName() diff --git a/python/ql/src/external/MostlySimilarFile.qhelp b/python/ql/src/external/MostlySimilarFile.qhelp new file mode 100644 index 00000000000..978c8f4450e --- /dev/null +++ b/python/ql/src/external/MostlySimilarFile.qhelp @@ -0,0 +1,25 @@ + + + +

    This rule identifies two files that have a lot of the same lines but with different variable and +method names. This makes it difficult to make changes in future and makes the code less easy to read. +

    + +
    + +

    It is important to determine why there are small differences in the files. Sometimes the files +might have been duplicates but an update was only applied to one copy. If this is the case it should +be simple to merge the files, preserving any changes.

    + +

    If the files are intentionally different then it could be a good idea to consider extracting some +of the shared code into its own module and import that module into the original.

    + +
    + + +
  • E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, Do Code Clones Matter?, 2009. (available online).
  • + +
    +
    diff --git a/python/ql/src/external/MostlySimilarFile.ql b/python/ql/src/external/MostlySimilarFile.ql new file mode 100644 index 00000000000..4bdcce626c9 --- /dev/null +++ b/python/ql/src/external/MostlySimilarFile.ql @@ -0,0 +1,22 @@ +/** + * @name Mostly similar module + * @description There is another module that shares a lot of the code with this module. Notice that names of variables and types may have been changed. Merge the two modules to improve maintainability. + * @kind problem + * @problem.severity recommendation + * @tags testability + * maintainability + * useless-code + * duplicate-code + * statistical + * non-attributable + * @problem.severity recommendation + * @sub-severity low + * @precision high + * @id py/mostly-similar-file + */ +import python +import CodeDuplication + +from Module m, Module other, string message +where similarScopes(m, other, _, message) +select m, message, other, other.getName() diff --git a/python/ql/src/external/SimilarFunction.qhelp b/python/ql/src/external/SimilarFunction.qhelp new file mode 100644 index 00000000000..5f8d0bdb7e9 --- /dev/null +++ b/python/ql/src/external/SimilarFunction.qhelp @@ -0,0 +1,31 @@ + + + +

    If two functions share a lot of code then there is a lot of unnecessary code +duplication. This makes it difficult to make changes in future and makes the code less easy to read. +

    + +
    + +

    While completely duplicated functions are rare, they are usually a sign of a simple oversight. +Usually the required action is to remove all but one of them. A common exception to this rule may +arise from generated code that simply occurs in several places in the source tree; the check can be +adapted to exclude such results.

    + +

    It is far more common to see duplication of many lines between two functions, leaving just a few +that are actually different. Consider such situations carefully. Are the differences deliberate or a +result of an inconsistent update to one of the clones? If the latter, then treating the functions as +completely duplicate and eliminating one (while preserving any corrections or new features that may +have been introduced) is the best course. If two functions serve genuinely different purposes but +almost all of their lines are the same, then consider extracting the same lines to a separate function. +

    + +
    + + +
  • E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, Do Code Clones Matter?, 2009. (available online).
  • + +
    +
    diff --git a/python/ql/src/external/SimilarFunction.ql b/python/ql/src/external/SimilarFunction.ql new file mode 100644 index 00000000000..9d0a3f72cfb --- /dev/null +++ b/python/ql/src/external/SimilarFunction.ql @@ -0,0 +1,35 @@ +/** + * @name Similar function + * @description There is another function that is very similar this one. Extract the common code to a common function to improve sharing. + * @kind problem + * @tags testability + * maintainability + * useless-code + * duplicate-code + * statistical + * non-attributable + * @problem.severity recommendation + * @sub-severity low + * @precision very-high + * @id py/similar-function + */ +import python +import CodeDuplication + +predicate relevant(Function m) { + m.getMetrics().getNumberOfLinesOfCode() > 10 +} + +from Function m, Function other, string message, int percent +where similarScopes(m, other, percent, message) and + relevant(m) and + percent > 95.0 and + not duplicateScopes(m, other, _, _) and + not duplicateScopes(m.getEnclosingModule(), other.getEnclosingModule(), _, _) and + not duplicateScopes(m.getScope(), other.getScope(), _, _) +select m, message, + other, + other.getName() + + + diff --git a/python/ql/src/external/Thrift.qll b/python/ql/src/external/Thrift.qll new file mode 100644 index 00000000000..a10d5aab506 --- /dev/null +++ b/python/ql/src/external/Thrift.qll @@ -0,0 +1,320 @@ +/** + * Provides classes for working with Apache Thrift IDL files. + * This code is under development and may change without warning. + */ + + +import external.ExternalArtifact + +/** An item in the parse tree of the IDL file */ +class ThriftElement extends ExternalData { + + string kind; + + ThriftElement() { + this.getDataPath() = "thrift-" + kind + } + + string getKind() { + result = kind + } + + string getId() { + result = getField(0) + } + + int getIndex() { + result = getFieldAsInt(1) + } + + ThriftElement getParent() { + result.getId() = this.getField(2) + } + + string getValue() { + result = this.getField(3) + } + + ThriftElement getChild(int n) { + result.getIndex() = n and result.getParent() = this + } + + ThriftElement getAChild() { + result = this.getChild(_) + } + + override string toString() { + result = this.getKind() + } + + string getPath() { + result = this.getField(4) + } + + private int line() { + result = this.getFieldAsInt(5) + } + + private int column() { + result = this.getFieldAsInt(6) + } + + predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) { + fp = this.getPath() and + bl = this.line() and + bc = this.column() and + el = this.line() and + ec = this.column() + this.getValue().length()-1 + or + exists(ThriftElement first, ThriftElement last | + first = this.getChild(min(int l | exists(this.getChild(l)))) and + last = this.getChild(max(int l | exists(this.getChild(l)))) and + first.hasLocationInfo(fp, bl, bc, _, _) and + last.hasLocationInfo(fp, _, _, el, ec) + ) + } + + File getFile() { + this.hasLocationInfo(result.getAbsolutePath(), _, _, _, _) + } + +} + +abstract class ThriftNamedElement extends ThriftElement { + + abstract ThriftElement getNameElement(); + + final string getName() { + result = this.getNameElement().getValue() + } + + override string toString() { + result = this.getKind() + " " + this.getName() + or + not exists(this.getName()) and result = this.getKind() + " ???" + } + + override predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) { + exists(ThriftElement first | + first = this.getChild(min(int l | exists(this.getChild(l)))) and + first.hasLocationInfo(fp, bl, bc, _, _) and + this.getNameElement().hasLocationInfo(fp, _, _, el, ec) + ) + } + +} + +class ThriftType extends ThriftNamedElement { + + ThriftType() { + kind.matches("%type") + } + + override ThriftElement getNameElement() { + result = this.getChild(0) + or + result = this.getChild(0).(ThriftType).getNameElement() + } + + override string toString() { + result = "type " + this.getName() + } + + predicate references(ThriftStruct struct) { + this.getName() = struct.getName() and + exists(string path | + this.hasLocationInfo(path, _, _, _, _) and + struct.hasLocationInfo(path, _, _, _, _) + ) + } + +} + +/** A thrift typedef */ +class ThriftTypeDef extends ThriftNamedElement { + + ThriftTypeDef() { + kind.matches("typedef") + } + + override ThriftElement getNameElement() { + result = this.getChild(2).getChild(0) + } +} + +/** A thrift enum declaration */ +class ThriftEnum extends ThriftNamedElement { + + ThriftEnum() { + kind.matches("enum") + } + + override ThriftElement getNameElement() { + result = this.getChild(0).getChild(0) + } + +} + +/** A thrift enum field */ +class ThriftEnumField extends ThriftNamedElement { + + ThriftEnumField() { + kind.matches("enumfield") + } + + override ThriftElement getNameElement() { + result = this.getChild(0).getChild(0) + } + +} + +/** A thrift service declaration */ +class ThriftService extends ThriftNamedElement { + + ThriftService() { + kind.matches("service") + } + + override ThriftElement getNameElement() { + result = this.getChild(0).getChild(0) + } + + ThriftFunction getAFunction() { + result = this.getChild(_) + } + + ThriftFunction getFunction(string name) { + result.getName() = name and + result = this.getAFunction() + } + +} + +/** A thrift function declaration */ +class ThriftFunction extends ThriftNamedElement { + + ThriftFunction() { + kind.matches("function") + } + + override ThriftElement getNameElement() { + result = this.getChild(2).getChild(0) + } + + ThriftField getArgument(int n) { + result = this.getChild(n+3) + } + + ThriftField getAnArgument() { + result = this.getArgument(_) + } + + private ThriftThrows getAllThrows() { + result = this.getChild(_) + } + + ThriftField getAThrows() { + result = this.getAllThrows().getAChild() + } + + ThriftType getReturnType() { + result = this.getChild(1).getChild(0) + } + + override predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) { + this.getChild(1).hasLocationInfo(fp, bl, bc, _, _) and + this.getChild(2).hasLocationInfo(fp, _, _, el, ec) + } + + ThriftService getService() { + result.getAFunction() = this + } + + string getQualifiedName() { + result = this.getService().getName() + "." + this.getName() + } + +} + +class ThriftField extends ThriftNamedElement { + + ThriftField() { + kind.matches("field") + } + + override ThriftElement getNameElement() { + result = this.getChild(4) + } + + ThriftType getType() { + result = this.getChild(2) + } + +} + +class ThriftStruct extends ThriftNamedElement { + + ThriftStruct() { + kind.matches("struct") + } + + override ThriftElement getNameElement() { + result = this.getChild(0).getChild(0) + } + + ThriftField getMember(int n) { + result = this.getChild(n+1) + } + + ThriftField getAMember() { + result = this.getMember(_) + } + +} + + +class ThriftException extends ThriftNamedElement { + + ThriftException() { + kind.matches("exception") + } + + override ThriftElement getNameElement() { + result = this.getChild(0).getChild(0) + } + + ThriftField getMember(int n) { + result = this.getChild(n+1) + } + + ThriftField getAMember() { + result = this.getMember(_) + } + +} + + +class ThriftThrows extends ThriftElement { + + ThriftThrows() { + kind.matches("throws") + } + + ThriftField getAThrows() { + result = this.getChild(_) + } + +} + +/** A parse tree element that holds a primitive value */ +class ThriftValue extends ThriftElement { + + ThriftValue() { + exists(this.getValue()) + } + + override string toString() { + result = this.getKind() + " " + this.getValue() + } + +} diff --git a/python/ql/src/external/VCS.qll b/python/ql/src/external/VCS.qll new file mode 100644 index 00000000000..6b665dde510 --- /dev/null +++ b/python/ql/src/external/VCS.qll @@ -0,0 +1,92 @@ +import python + +class Commit extends @svnentry { + + Commit() { + svnaffectedfiles(this, _, _) and + exists(date svnDate, date snapshotDate | + svnentries(this, _, _, svnDate, _) and + snapshotDate(snapshotDate) and + svnDate <= snapshotDate + ) + } + + string toString() { result = this.getRevisionName() } + + string getRevisionName() { svnentries(this, result, _, _, _) } + + string getAuthor() { svnentries(this, _, result, _, _) } + + date getDate() { svnentries(this, _, _, result, _) } + + int getChangeSize() { svnentries(this, _, _, _, result) } + + string getMessage() { svnentrymsg(this, result) } + + string getAnAffectedFilePath(string action) { + exists(File rawFile | svnaffectedfiles(this, rawFile, action) | + result = rawFile.getName() + ) + } + + string getAnAffectedFilePath() { result = getAnAffectedFilePath(_) } + + File getAnAffectedFile(string action) { + svnaffectedfiles(this,result,action) + } + + File getAnAffectedFile() { exists(string action | result = this.getAnAffectedFile(action)) } + + predicate isRecent() { recentCommit(this) } + + int daysToNow() { + exists(date now | snapshotDate(now) | + result = getDate().daysTo(now) and result >= 0 + ) + } + + int getRecentAdditionsForFile(File f) { + svnchurn(this, f, result, _) + } + + int getRecentDeletionsForFile(File f) { + svnchurn(this, f, _, result) + } + + int getRecentChurnForFile(File f) { + result = getRecentAdditionsForFile(f) + getRecentDeletionsForFile(f) + } + +} + +class Author extends string { + Author() { exists(Commit e | this = e.getAuthor()) } + + Commit getACommit() { result.getAuthor() = this } + + File getAnEditedFile() { result = this.getACommit().getAnAffectedFile() } + +} + +predicate recentCommit(Commit e) { + exists(date snapshotDate, date commitDate, int days | + snapshotDate(snapshotDate) and + e.getDate() = commitDate and + days = commitDate.daysTo(snapshotDate) and + days >= 0 and days <= 60 + ) +} + +date firstChange(File f) { + result = min(Commit e, date toMin | (f = e.getAnAffectedFile()) and (toMin = e.getDate()) | toMin) +} + +predicate firstCommit(Commit e) { + not exists(File f | f = e.getAnAffectedFile() | + firstChange(f) < e.getDate() + ) +} + +predicate artificialChange(Commit e) { + firstCommit(e) or e.getChangeSize() >= 50000 +} \ No newline at end of file diff --git a/python/ql/src/python.qll b/python/ql/src/python.qll new file mode 100644 index 00000000000..cdf33c8019f --- /dev/null +++ b/python/ql/src/python.qll @@ -0,0 +1,40 @@ +import semmle.python.Files +import semmle.python.Operations +import semmle.python.Variables +import semmle.python.AstGenerated +import semmle.python.AstExtended +import semmle.python.AST +import semmle.python.Function +import semmle.python.Module +import semmle.python.Class +import semmle.python.Import +import semmle.python.Stmts +import semmle.python.Exprs +import semmle.python.Keywords +import semmle.python.Comprehensions +import semmle.python.Lists +import semmle.python.Flow +import semmle.python.Metrics +import semmle.python.Constants +import semmle.python.Scope +import semmle.python.Comment +import semmle.python.GuardedControlFlow +import semmle.python.types.ImportTime +import semmle.python.types.Object +import semmle.python.types.ClassObject +import semmle.python.types.FunctionObject +import semmle.python.types.ModuleObject +import semmle.python.types.Version +import semmle.python.types.Descriptors +import semmle.python.protocols +import semmle.python.SSA +import semmle.python.Assigns +import semmle.python.SelfAttribute +import semmle.python.types.Properties +import semmle.python.xml.XML +import semmle.dataflow.SSA +import semmle.python.pointsto.Base +import semmle.python.pointsto.Context +import semmle.python.pointsto.CallGraph + +import site diff --git a/python/ql/src/queries.xml b/python/ql/src/queries.xml new file mode 100644 index 00000000000..27449f34263 --- /dev/null +++ b/python/ql/src/queries.xml @@ -0,0 +1 @@ + diff --git a/python/ql/src/semmle/crypto/Crypto.qll b/python/ql/src/semmle/crypto/Crypto.qll new file mode 100644 index 00000000000..12e81a393ce --- /dev/null +++ b/python/ql/src/semmle/crypto/Crypto.qll @@ -0,0 +1,202 @@ +/** + * Provides classes for modeling cryptographic libraries. + */ + +/* The following information is copied from `/semmlecode-javascript-queries/semmle/javascript/frameworks/CryptoLibraries.qll` + * which should be considered the definitive version (as of Feb 2018) + */ + + +/** + * Names of cryptographic algorithms, separated into strong and weak variants. + * + * The names are normalized: upper-case, no spaces, dashes or underscores. + * + * The names are inspired by the names used in real world crypto libraries. + * + */ +private module AlgorithmNames { + predicate isStrongHashingAlgorithm(string name) { + name = "DSA" or + name = "ED25519" or + name = "ES256" or name = "ECDSA256" or + name = "ES384" or name = "ECDSA384" or + name = "ES512" or name = "ECDSA512" or + name = "SHA2" or + name = "SHA224" or + name = "SHA256" or + name = "SHA384" or + name = "SHA512" or + name = "SHA3" + } + + predicate isWeakHashingAlgorithm(string name) { + name = "HAVEL128" or + name = "MD2" or + name = "MD4" or + name = "MD5" or + name = "PANAMA" or + name = "RIPEMD" or + name = "RIPEMD128" or + name = "RIPEMD256" or + name = "RIPEMD160" or + name = "RIPEMD320" or + name = "SHA0" or + name = "SHA1" + } + + predicate isStrongEncryptionAlgorithm(string name) { + name = "AES" or + name = "AES128" or + name = "AES192" or + name = "AES256" or + name = "AES512" or + name = "RSA" or + name = "RABBIT" or + name = "BLOWFISH" + + } + + predicate isWeakEncryptionAlgorithm(string name) { + name = "DES" or + name = "3DES" or name = "TRIPLEDES" or name = "TDEA" or name = "TRIPLEDEA" or + name = "ARC2" or name = "RC2" or + name = "ARC4" or name = "RC4" or name = "ARCFOUR" or + name = "ARC5" or name = "RC5" + } + + predicate isStrongPasswordHashingAlgorithm(string name) { + name = "ARGON2" or + name = "PBKDF2" or + name = "BCRYPT" or + name = "SCRYPT" + } + + predicate isWeakPasswordHashingAlgorithm(string name) { + none() + } + + /** + * Normalizes `name`: upper-case, no spaces, dashes or underscores. + * + * All names of this module are in this normalized form. + */ + bindingset[name] string normalizeName(string name) { + result = name.toUpperCase().regexpReplaceAll("[-_ ]", "") + } + +} +private import AlgorithmNames + + +/** + * A cryptographic algorithm. + */ +private newtype TCryptographicAlgorithm = +MkHashingAlgorithm(string name, boolean isWeak) { + (isStrongHashingAlgorithm(name) and isWeak = false) or + (isWeakHashingAlgorithm(name) and isWeak = true) +} +or +MkEncryptionAlgorithm(string name, boolean isWeak) { + (isStrongEncryptionAlgorithm(name) and isWeak = false) or + (isWeakEncryptionAlgorithm(name) and isWeak = true) +} +or +MkPasswordHashingAlgorithm(string name, boolean isWeak) { + (isStrongPasswordHashingAlgorithm(name) and isWeak = false) or + (isWeakPasswordHashingAlgorithm(name) and isWeak = true) +} + +/** + * A cryptographic algorithm. + */ +abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { + + /** Gets a textual representation of this element. */ + string toString() { + result = getName() + } + + /** + * Gets the name of the algorithm. + */ + abstract string getName(); + + /** + * Holds if this algorithm is weak. + */ + abstract predicate isWeak(); + +} + +/** + * A hashing algorithm such as `MD5` or `SHA512`. + */ +class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { + + string name; + + boolean isWeak; + + HashingAlgorithm() { + this = MkHashingAlgorithm(name, isWeak) + } + + override string getName() { + result = name + } + + override predicate isWeak() { + isWeak = true + } + +} + +/** + * An encryption algorithm such as `DES` or `AES512`. + */ +class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { + + string name; + + boolean isWeak; + + EncryptionAlgorithm() { + this = MkEncryptionAlgorithm(name, isWeak) + } + + override string getName() { + result = name + } + + override predicate isWeak() { + isWeak = true + } + +} + +/** + * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. + */ +class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { + + string name; + + boolean isWeak; + + PasswordHashingAlgorithm() { + this = MkPasswordHashingAlgorithm(name, isWeak) + } + + override string getName() { + result = name + } + + override predicate isWeak() { + isWeak = true + } +} + + + diff --git a/python/ql/src/semmle/dataflow/SSA.qll b/python/ql/src/semmle/dataflow/SSA.qll new file mode 100755 index 00000000000..0338b6a2e0a --- /dev/null +++ b/python/ql/src/semmle/dataflow/SSA.qll @@ -0,0 +1,554 @@ +/** + * Library for SSA representation (Static Single Assignment form). + */ + +import python +private import SsaCompute + +/* The general intent of this code is to assume only the following interfaces, + * although several Python-specific parts may have crept in. + * + * SsaSourceVariable { ... } // See interface below + * + * + * BasicBlock { + * + * ControlFlowNode getNode(int n); + * + * BasicBlock getImmediateDominator(); + * + * BasicBlock getAPredecessor(); + * + * BasicBlock getATrueSuccessor(); + * + * BasicBlock getAFalseSuccessor(); + * + * predicate dominanceFrontier(BasicBlock other); + * + * predicate strictlyDominates(BasicBlock other); + * + * predicate hasLocationInfo(string f, int bl, int bc, int el, int ec); + * + * } + * + * ControlFlowNode { + * + * Location getLocation(); + * + * BasicBlock getBasicBlock(); + * + * } + * + */ + + + /** A source language variable, to be converted into a set of SSA variables. */ +abstract class SsaSourceVariable extends @py_variable { + + /** Gets the name of this variable */ + abstract string getName(); + + string toString() { + result = "SsaSourceVariable " + this.getName() + } + + /** Gets a use of this variable, either explicit or implicit. */ + abstract ControlFlowNode getAUse(); + + /** Holds if `def` defines an ESSA variable for this variable. */ + abstract predicate hasDefiningNode(ControlFlowNode def); + + /** Holds if the edge `pred`->`succ` defines an ESSA variable for this variable. */ + abstract predicate hasDefiningEdge(BasicBlock pred, BasicBlock succ); + + /** Holds if `def` defines an ESSA variable for this variable in such a way + * that the new variable is a refinement in some way of the variable used at `use`. + */ + abstract predicate hasRefinement(ControlFlowNode use, ControlFlowNode def); + + /** Holds if the edge `pred`->`succ` defines an ESSA variable for this variable in such a way + * that the new variable is a refinement in some way of the variable used at `use`. + */ + abstract predicate hasRefinementEdge(ControlFlowNode use, BasicBlock pred, BasicBlock succ); + + /** Gets a use of this variable that corresponds to an explicit use in the source. */ + abstract ControlFlowNode getASourceUse(); + +} + +/** An (enhanced) SSA variable derived from `SsaSourceVariable`. */ +class EssaVariable extends TEssaDefinition { + + /** Gets the (unique) definition of this variable. */ + EssaDefinition getDefinition() { + this = result + } + + /** Gets a use of this variable, where a "use" is defined by + * `SsaSourceVariable.getAUse()`. + * Note that this differs from `EssaVariable.getASourceUse()`. + */ + ControlFlowNode getAUse() { + result = this.getDefinition().getAUse() + } + + /** Gets the source variable from which this variable is derived. */ + SsaSourceVariable getSourceVariable() { + result = this.getDefinition().getSourceVariable() + } + + /** Gets the name of this variable. */ + string getName() { + result = this.getSourceVariable().getName() + } + + string toString() { + result = "SSA variable " + this.getName() + } + + /** Gets a string representation of this variable. + * WARNING: The format of this may change and it may be very inefficient to compute. + * To used for debugging and testing only. + */ + string getRepresentation() { + result = this.getSourceVariable().getName() + "_" + var_rank(this) + } + + /** Gets a use of this variable, where a "use" is defined by + * `SsaSourceVariable.getASourceUse()`. + * Note that this differs from `EssaVariable.getAUse()`. + */ + ControlFlowNode getASourceUse() { + result = this.getAUse() and + result = this.getSourceVariable().getASourceUse() + } + + /** Gets the scope of this variable. */ + Scope getScope() { + result = this.getDefinition().getScope() + } + +} + +/* Helper for location_string + * NOTE: This is Python specific, to make `getRepresentation()` portable will require further work. + */ +private int exception_handling(BasicBlock b) { + b.reachesExit() and result = 0 + or + not b.reachesExit() and result = 1 +} + +/* Helper for var_index. Come up with a (probably) unique string per location. */ +pragma[noinline] +private string location_string(EssaVariable v) { + exists(EssaDefinition def, BasicBlock b, int index, int line, int col | + def = v.getDefinition() and + (if b.getNode(0).isNormalExit() then + line = 100000 and col = 0 + else + b.hasLocationInfo(_, line, col, _, _) + ) and + /* Add large numbers to values to prevent 1000 sorting before 99 */ + result = (line + 100000) + ":" + (col*2 + 10000 + exception_handling(b)) + ":" + (index + 100003) + | + def = TEssaNodeDefinition(_, b, index) + or + def = TEssaEdgeDefinition(_, _, b) and index = piIndex() + or + def = TPhiFunction(_, b) and index = phiIndex() + ) +} + +/* Helper to compute an index for this SSA variable. */ +private int var_index(EssaVariable v) { + location_string(v) = rank[result](string s | exists(EssaVariable x | location_string(x) = s) | s) +} + +/* Helper for `v.getRepresentation()` */ +private int var_rank(EssaVariable v) { + exists(int r, SsaSourceVariable var | + var = v.getSourceVariable() and + var_index(v) = rank[r](EssaVariable x | x.getSourceVariable() = var | var_index(x)) and + result = r-1 + ) +} + +/** Underlying IPA type for EssaDefinition and EssaVariable. */ +private cached newtype TEssaDefinition = + TEssaNodeDefinition(SsaSourceVariable v, BasicBlock b, int i) { + EssaDefinitions::variableUpdate(v, _, b, _, i) + } + or + TEssaEdgeDefinition(SsaSourceVariable v, BasicBlock pred, BasicBlock succ) { + EssaDefinitions::piNode(v, pred, succ) + } + or + TPhiFunction(SsaSourceVariable v, BasicBlock b) { + EssaDefinitions::phiNode(v, b) + } + +/** Definition of an extended-SSA (ESSA) variable. + * There is exactly one definition for each variable, + * and exactly one variable for each definition. + */ +abstract class EssaDefinition extends TEssaDefinition { + + string toString() { + result = "EssaDefinition" + } + + /** Gets the source variable for which this a definition, either explicit or implicit. */ + abstract SsaSourceVariable getSourceVariable(); + + /** Gets a use of this definition as defined by the `SsaSourceVariable` class. */ + abstract ControlFlowNode getAUse(); + + /** Holds if this definition reaches the end of `b`. */ + abstract predicate reachesEndOfBlock(BasicBlock b); + + /** Gets the location of a control flow node that is indicative of this definition. + * Since definitions may occur on edges of the control flow graph, the given location may + * be imprecise. + * Distinct `EssaDefinitions` may return the same ControlFlowNode even for + * the same variable. + */ + abstract Location getLocation(); + + /** Gets a representation of this SSA definition for debugging purposes. + * Since this is primarily for debugging and testing, performance may be poor. */ + abstract string getRepresentation(); + + abstract Scope getScope(); + + EssaVariable getVariable() { + result.getDefinition() = this + } + +} + +/** An ESSA definition corresponding to an edge refinement of the underlying variable. + * For example, the edges leaving a test on a variable both represent refinements of that + * variable. On one edge the test is true, on the other it is false. + */ +class EssaEdgeRefinement extends EssaDefinition, TEssaEdgeDefinition { + + override string toString() { + result = "SSA filter definition" + } + + boolean getSense() { + this.getPredecessor().getATrueSuccessor() = this.getSuccessor() and result = true + or + this.getPredecessor().getAFalseSuccessor() = this.getSuccessor() and result = false + } + + override SsaSourceVariable getSourceVariable() { + this = TEssaEdgeDefinition(result, _, _) + } + + /** Gets the basic block preceding the edge on which this refinement occurs. */ + BasicBlock getPredecessor() { + this = TEssaEdgeDefinition(_, result, _) + } + + /** Gets the basic block succeeding the edge on which this refinement occurs. */ + BasicBlock getSuccessor() { + this = TEssaEdgeDefinition(_, _, result) + } + + override ControlFlowNode getAUse() { + SsaDefinitions::reachesUse(this.getSourceVariable(), this.getSuccessor(), piIndex(), result) + } + + override predicate reachesEndOfBlock(BasicBlock b) { + SsaDefinitions::reachesEndOfBlock(this.getSourceVariable(), this.getSuccessor(), piIndex(), b) + } + + override Location getLocation() { + result = this.getSuccessor().getNode(0).getLocation() + } + + /** Gets the SSA variable to which this refinement applies. */ + EssaVariable getInput() { + exists(SsaSourceVariable var , EssaDefinition def | + var = this.getSourceVariable() and + var = def.getSourceVariable() and + def.reachesEndOfBlock(this.getPredecessor()) and + result.getDefinition() = def + ) + } + + override string getRepresentation() { + result = this.getAQlClass() + "(" + this.getInput().getRepresentation() + ")" + } + + /** Gets the scope of the variable defined by this definition. */ + override Scope getScope() { + result = this.getPredecessor().getScope() + } + +} + +/** A Phi-function as specified in classic SSA form. */ +class PhiFunction extends EssaDefinition, TPhiFunction { + + override ControlFlowNode getAUse() { + SsaDefinitions::reachesUse(this.getSourceVariable(), this.getBasicBlock(), phiIndex(), result) + } + + override predicate reachesEndOfBlock(BasicBlock b) { + SsaDefinitions::reachesEndOfBlock(this.getSourceVariable(), this.getBasicBlock(), phiIndex(), b) + } + + override SsaSourceVariable getSourceVariable() { + this = TPhiFunction(result, _) + } + + /** Gets an input refinement that exists on one of the incoming edges to this phi node. */ + private EssaEdgeRefinement inputEdgeRefinement(BasicBlock pred) { + result.getSourceVariable() = this.getSourceVariable() and + result.getSuccessor() = this.getBasicBlock() and + result.getPredecessor() = pred + } + + private BasicBlock nonPiInput() { + result = this.getBasicBlock().getAPredecessor() and + not exists(this.inputEdgeRefinement(result)) + } + + /** Gets another definition of the same source variable that reaches this definition. */ + private EssaDefinition reachingDefinition(BasicBlock pred) { + result.getScope() = this.getScope() and + result.getSourceVariable() = this.getSourceVariable() and + pred = this.nonPiInput() and + result.reachesEndOfBlock(pred) + } + + /** Gets the input variable for this phi node on the edge `pred` -> `this.getBasicBlock()`, if any. */ + pragma [noinline] + EssaVariable getInput(BasicBlock pred) { + result.getDefinition() = this.reachingDefinition(pred) + or + result.getDefinition() = this.inputEdgeRefinement(pred) + } + + /** Gets an input variable for this phi node. */ + EssaVariable getAnInput() { + result = this.getInput(_) + } + + /** Holds if forall incoming edges in the flow graph, there is an input variable */ + predicate isComplete() { + forall(BasicBlock pred | + pred = this.getBasicBlock().getAPredecessor() | + exists(this.getInput(pred)) + ) + } + + override string toString() { + result = "SSA Phi Function" + } + + /** Gets the basic block that succeeds this phi node. */ + BasicBlock getBasicBlock() { + this = TPhiFunction(_, result) + } + + override Location getLocation() { + result = this.getBasicBlock().getNode(0).getLocation() + } + + /** Helper for `argList(n)`. */ + private int rankInput(EssaVariable input) { + input = this.getAnInput() and + var_index(input) = rank[result](EssaVariable v | v = this.getAnInput() | var_index(v)) + } + + /** Helper for `argList()`. */ + private string argList(int n) { + exists(EssaVariable input | + n = this.rankInput(input) + | + n = 1 and result = input.getRepresentation() + or + n > 1 and result = this.argList(n-1) + ", " + input.getRepresentation() + ) + } + + /** Helper for `getRepresentation()`. */ + private string argList() { + exists(int last | + last = (max(int x | x = this.rankInput(_))) and + result = this.argList(last) + ) + } + + override string getRepresentation() { + not exists(this.getAnInput()) and result = "phi()" + or + result = "phi(" + this.argList() + ")" + or + exists(this.getAnInput()) and not exists(this.argList()) and + result = "phi(" + this.getSourceVariable().getName() + "??)" + } + + override Scope getScope() { + result = this.getBasicBlock().getScope() + } + + private EssaEdgeRefinement piInputDefinition(EssaVariable input) { + input = this.getAnInput() and + result = input.getDefinition() + or + input = this.getAnInput() and result = input.getDefinition().(PhiFunction).piInputDefinition(_) + } + + /** Gets the variable which is the common and complete input to all pi-nodes that are themselves + * inputs to this phi-node. + * For example: + * ``` + * x = y() + * if complicated_test(x): + * do_a() + * else: + * do_b() + * phi + * ``` + * Which gives us the ESSA form: + * x0 = y() + * x1 = pi(x0, complicated_test(x0)) + * x2 = pi(x0, not complicated_test(x0)) + * x3 = phi(x1, x2) + * However we may not be able to track the value of `x` through `compilated_test` + * meaning that we cannot track `x` from `x0` to `x3`. + * By using `getShortCircuitInput()` we can do so, since the short-circuit input of `x3` is `x0`. + */ + pragma [noinline] + EssaVariable getShortCircuitInput() { + exists(BasicBlock common | + forall(EssaVariable input | + input = this.getAnInput() | + common = this.piInputDefinition(input).getPredecessor() + ) + and + forall(BasicBlock succ | + succ = common.getASuccessor() | + succ = this.piInputDefinition(_).getSuccessor() + ) + and + exists(EssaEdgeRefinement ref | + ref = this.piInputDefinition(_) and + ref.getPredecessor() = common and + ref.getInput() = result + ) + ) + } +} + +library class EssaNode extends EssaDefinition, TEssaNodeDefinition { + + override string toString() { + result = "Essa node definition" + } + + /** Gets the ControlFlowNode corresponding to this definition */ + ControlFlowNode getDefiningNode() { + this.definedBy(_, result) + } + + override Location getLocation() { + result = this.getDefiningNode().getLocation() + } + + override ControlFlowNode getAUse() { + exists(SsaSourceVariable v, BasicBlock b, int i | + this = TEssaNodeDefinition(v, b, i) and + SsaDefinitions::reachesUse(v, b, i, result) + ) + } + + override predicate reachesEndOfBlock(BasicBlock b) { + exists(BasicBlock defb, int i | + this = TEssaNodeDefinition(_, defb, i) and + SsaDefinitions::reachesEndOfBlock(this.getSourceVariable(), defb, i, b) + ) + } + + override SsaSourceVariable getSourceVariable() { + this = TEssaNodeDefinition(result, _, _) + } + + override string getRepresentation() { + result = this.getDefiningNode().toString() + } + + override Scope getScope() { + exists(BasicBlock defb | + this = TEssaNodeDefinition(_, defb, _) and + result = defb.getScope() + ) + } + + predicate definedBy(SsaSourceVariable v, ControlFlowNode def) { + exists(BasicBlock b, int i | + def = b.getNode(i) | + this = TEssaNodeDefinition(v, b, i+i) + or + this = TEssaNodeDefinition(v, b, i+i+1) + ) + } + +} + +/** A definition of an ESSA variable that is not directly linked to + * another ESSA variable. + */ +class EssaNodeDefinition extends EssaNode { + + EssaNodeDefinition() { + this.getSourceVariable().hasDefiningNode(this.getDefiningNode()) + } + +} + +/** A definition of an ESSA variable that takes another ESSA variable as an input. + */ +class EssaNodeRefinement extends EssaNode { + + EssaNodeRefinement() { + exists(SsaSourceVariable v, ControlFlowNode def | + this.definedBy(v, def) and + v.hasRefinement(_, def) + ) + } + + override string toString() { + result = "SSA filter definition" + } + + /** Gets the SSA variable to which this refinement applies. */ + EssaVariable getInput() { + result = potential_input(this) and + not result = potential_input(potential_input(this).getDefinition()) + } + + override string getRepresentation() { + result = this.getAQlClass() + "(" + this.getInput().getRepresentation() + ")" + } + +} + +pragma[noopt] +private EssaVariable potential_input(EssaNodeRefinement ref) { + exists(EssaNode node, ControlFlowNode use, SsaSourceVariable var, ControlFlowNode def | + var.hasRefinement(use, def) and + use = result.getAUse() and + var = result.getSourceVariable() and + def = node.getDefiningNode() and + var = node.getSourceVariable() and + ref = (EssaNodeRefinement)node + ) +} + + diff --git a/python/ql/src/semmle/dataflow/SsaCompute.qll b/python/ql/src/semmle/dataflow/SsaCompute.qll new file mode 100644 index 00000000000..dbdd51e115c --- /dev/null +++ b/python/ql/src/semmle/dataflow/SsaCompute.qll @@ -0,0 +1,314 @@ +/** Provides predicates for computing Enhanced SSA form + * Computation of ESSA form is identical to plain SSA form, + * but what counts as a use of definition differs. + * + * ## Language independent data-flow graph construction + * + * Construction of the data-flow graph is based on the principles behind SSA variables. + * + * The definition of an SSA variable is that (statically): + * + * * Each variable has exactly one definition + * * A variable's definition dominates all its uses. + * + * SSA form was originally designed for compiler use and thus a "definition" of an SSA variable is + * the same as a definition of the underlying source-code variable. For register allocation this is + * sufficient to treat the variable as equivalent to the value held in the variable. + * + * However, this doesn't always work the way we want it for data-flow analysis. + * + * When we start to consider attribute assignment, tests on the value referred to be a variable, + * escaping variables, implicit definitions, etc., we need something finer grained. + * + * A data-flow variable has the same properties as a normal SSA variable, but it also has the property that + * *anything* that may change the way we view an object referred to by a variable should be treated as a definition of that variable. + * + * For example, tests are treated as definitions, so for the following Python code: + * ```python + * x = None + * if not x: + * x = True + * ``` + * The data-flow graph (for `x`) is: + * ``` + * x0 = None + * x1 = pi(x0, not x) + * x2 = True + * x3 = phi(x1, x2) + * ``` + * from which is it possible to infer that `x3` may not be None. + * [ Phi functions are standard SSA, a Pi function is a filter or guard on the possible values that a variable + * may hold] + * + * Attribute assignments are also treated as definitions, so for the following Python code: + * ```python + * x = C() + * x.a = 1 + * y = C() + * y.b = 2 + * ``` + * The data-flow graph is: + * ``` + * x0 = C() + * x1 = attr-assign(x0, .a = 1) + * y0 = C() + * y1 = attr-assign(y0, .b = 1) + * ``` + * From which we can infer that `x1.a` is `1` but we know nothing about `y0.a` despite it being the same type. + * + * We can also insert "definitions" for transfers of values (say in global variables) where we do not yet know the call-graph. For example, + * ```python + * def foo(): + * global g + * g = 1 + * + * def bar(): + * foo() + * g + * ``` + * It should be clear in the above code that the use of `g` will have a value of `1`. + * The data-flow graph looks like: + * ```python + * def foo(): + * g0 = scope-entry(g) + * g1 = 1 + * + * def bar(): + * g2 = scope-entry(g) + * foo() + * g3 = call-site(g, foo()) + * ``` + * Once we have established that `foo()` calls `foo`, then it is possible to link `call-site(g, foo())` to the final value of `g` in `foo`, i.e. `g1`, so effectively `g3 = call-site(g, foo())` becomes `g3 = g1` and the global data-flow graph for `g` effectively becomes: + * ``` + * g0 = scope-entry(g) + * g1 = 1 + * g2 = scope-entry(g) + * g3 = g1 + * ``` + * and thus it falls out that `g3` must be `1`. + * + */ + + +import python +import semmle.dataflow.SSA + + +private cached module SsaComputeImpl { + + cached module EssaDefinitionsImpl { + + /** Whether `n` is a live update that is a definition of the variable `v`. */ + cached predicate variableUpdate(SsaSourceVariable v, ControlFlowNode n, BasicBlock b, int rankix, int i) { + SsaComputeImpl::variableDef(v, n, b, i) and + SsaComputeImpl::defUseRank(v, b, rankix, i) and + ( + SsaComputeImpl::defUseRank(v, b, rankix+1, _) and not SsaComputeImpl::defRank(v, b, rankix+1, _) + or + not SsaComputeImpl::defUseRank(v, b, rankix+1, _) and Liveness::liveAtExit(v, b) + ) + } + + /** Holds if `def` is a pi-node for `v` on the edge `pred` -> `succ` */ + cached predicate piNode(SsaSourceVariable v, BasicBlock pred, BasicBlock succ) { + v.hasRefinementEdge(_, pred, succ) and + Liveness::liveAtEntry(v, succ) + } + + /** A phi node for `v` at the beginning of basic block `b`. */ + cached predicate phiNode(SsaSourceVariable v, BasicBlock b) { + ( + exists(BasicBlock def | def.dominanceFrontier(b) | + SsaComputeImpl::ssaDef(v, def) + ) + or + piNode(v, _, b) and strictcount(b.getAPredecessor()) > 1 + ) and + Liveness::liveAtEntry(v, b) + } + } + + cached predicate variableDef(SsaSourceVariable v, ControlFlowNode n, BasicBlock b, int i) { + (v.hasDefiningNode(n) or v.hasRefinement(_, n)) + and + exists(int j | + n = b.getNode(j) and + i = j*2 + 1 + ) + } + + /** + * A ranking of the indices `i` at which there is an SSA definition or use of + * `v` in the basic block `b`. + * + * Basic block indices are translated to rank indices in order to skip + * irrelevant indices at which there is no definition or use when traversing + * basic blocks. + */ + cached predicate defUseRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) { + i = rank[rankix](int j | variableDef(v, _, b, j) or variableUse(v, _, b, j)) + } + + /** A definition of a variable occurring at the specified rank index in basic block `b`. */ + cached predicate defRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) { + variableDef(v, _, b, i) and + defUseRank(v, b, rankix, i) + } + + /** A `VarAccess` `use` of `v` in `b` at index `i`. */ + cached predicate variableUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) { + (v.getAUse() = use or v.hasRefinement(use, _)) and + exists(int j | + b.getNode(j) = use and + i = 2*j + ) + } + + /** + * A definition of an SSA variable occurring at the specified position. + * This is either a phi node, a `VariableUpdate`, or a parameter. + */ + cached predicate ssaDef(SsaSourceVariable v, BasicBlock b) { + EssaDefinitions::phiNode(v, b) + or + EssaDefinitions::variableUpdate(v, _, b, _, _) + or + EssaDefinitions::piNode(v, _, b) + } + + /* + * The construction of SSA form ensures that each use of a variable is + * dominated by its definition. A definition of an SSA variable therefore + * reaches a `ControlFlowNode` if it is the _closest_ SSA variable definition + * that dominates the node. If two definitions dominate a node then one must + * dominate the other, so therefore the definition of _closest_ is given by the + * dominator tree. Thus, reaching definitions can be calculated in terms of + * dominance. + */ + + /** The maximum rank index for the given variable and basic block. */ + cached int lastRank(SsaSourceVariable v, BasicBlock b) { + result = max(int rankix | defUseRank(v, b, rankix, _)) + or + not defUseRank(v, b, _, _) and (EssaDefinitions::phiNode(v, b) or EssaDefinitions::piNode(v, _, b)) and result = 0 + } + + private predicate ssaDefRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) { + EssaDefinitions::variableUpdate(v, _, b, rankix, i) + or + EssaDefinitions::phiNode(v, b) and rankix = 0 and i = phiIndex() + or + EssaDefinitions::piNode(v, _, b) and EssaDefinitions::phiNode(v, b) and rankix = -1 and i = piIndex() + or + EssaDefinitions::piNode(v, _, b) and not EssaDefinitions::phiNode(v, b) and rankix = 0 and i = piIndex() + } + + /** The SSA definition reaches the rank index `rankix` in its own basic block `b`. */ + cached predicate ssaDefReachesRank(SsaSourceVariable v, BasicBlock b, int i, int rankix) { + ssaDefRank(v, b, rankix, i) or + ssaDefReachesRank(v, b, i, rankix-1) and rankix <= lastRank(v, b) and not ssaDefRank(v, b, rankix, _) + } + + /** + * The SSA definition of `v` at `def` reaches `use` in the same basic block + * without crossing another SSA definition of `v`. + */ + cached predicate ssaDefReachesUseWithinBlock(SsaSourceVariable v, BasicBlock b, int i, ControlFlowNode use) { + exists(int rankix, int useix | + ssaDefReachesRank(v, b, i, rankix) and + defUseRank(v, b, rankix, useix) and + variableUse(v, use, b, useix) + ) + } + + cached module LivenessImpl { + + cached predicate liveAtExit(SsaSourceVariable v, BasicBlock b) { + liveAtEntry(v, b.getASuccessor()) + } + + cached predicate liveAtEntry(SsaSourceVariable v, BasicBlock b) { + SsaComputeImpl::defUseRank(v, b, 1, _) and not SsaComputeImpl::defRank(v, b, 1, _) + or + not SsaComputeImpl::defUseRank(v, b, _, _) and liveAtExit(v, b) + } + + } + + cached module SsaDefinitionsImpl { + + /** + * The SSA definition of `v` at `def` reaches the end of a basic block `b`, at + * which point it is still live, without crossing another SSA definition of `v`. + */ + cached + predicate reachesEndOfBlock(SsaSourceVariable v, BasicBlock defbb, int defindex, BasicBlock b) { + Liveness::liveAtExit(v, b) and + ( + defbb = b and SsaComputeImpl::ssaDefReachesRank(v, defbb, defindex, SsaComputeImpl::lastRank(v, b)) + or + exists(BasicBlock idom | + idom = b.getImmediateDominator() and + // It is sufficient to traverse the dominator graph, cf. discussion above. + reachesEndOfBlock(v, defbb, defindex, idom) and + not SsaComputeImpl::ssaDef(v, b) + ) + ) + } + + /** + * The SSA definition of `v` at `(defbb, defindex)` reaches `use` without crossing another + * SSA definition of `v`. + */ + cached + predicate reachesUse(SsaSourceVariable v, BasicBlock defbb, int defindex, ControlFlowNode use) { + SsaComputeImpl::ssaDefReachesUseWithinBlock(v, defbb, defindex, use) or + exists(BasicBlock b | + SsaComputeImpl::variableUse(v, use, b, _) and + reachesEndOfBlock(v, defbb, defindex, b.getAPredecessor()) and + not SsaComputeImpl::ssaDefReachesUseWithinBlock(v, b, _, use) + ) + } + + /*** + * Holds if `(defbb, defindex)` is an SSA definition of `v` that reaches an exit without crossing another + * SSA definition of `v`. + */ + cached + predicate reachesExit(SsaSourceVariable v, BasicBlock defbb, int defindex) { + exists(BasicBlock last, ControlFlowNode use, int index | + not Liveness::liveAtExit(v, last) and + reachesUse(v, defbb, defindex, use) and + SsaComputeImpl::defUseRank(v, last, SsaComputeImpl::lastRank(v, last), index) and + SsaComputeImpl::variableUse(v, use, last, index) + ) + } + + } + +} + +import SsaComputeImpl::SsaDefinitionsImpl as SsaDefinitions +import SsaComputeImpl::EssaDefinitionsImpl as EssaDefinitions +import SsaComputeImpl::LivenessImpl as Liveness + +/* This is exported primarily for testing */ + + +/* A note on numbering + * In order to create an SSA graph, we need an order of definitions and uses within a basic block. + * To do this we index definitions and uses as follows: + * Phi-functions have an index of -1, so precede all normal uses and definitions in a block. + * Pi-functions (on edges) have an index of -2 in the successor block, so precede all other uses and definitions, including phi-functions + * A use of a variable at at a CFG node is assumed to occur before any definition at the same node, so: + * * a use at the `j`th node of a block is given the index `2*j` and + * * a definition at the `j`th node of a block is given the index `2*j + 1`. + */ + +pragma [inline] +int phiIndex() { result = -1 } + +pragma [inline] +int piIndex() { result = -2 } + + diff --git a/python/ql/src/semmle/files/FileSystem.qll b/python/ql/src/semmle/files/FileSystem.qll new file mode 100644 index 00000000000..4ec67c7c2e6 --- /dev/null +++ b/python/ql/src/semmle/files/FileSystem.qll @@ -0,0 +1,2 @@ +/** Provides classes for working with files and folders. */ +import semmle.python.Files diff --git a/python/ql/src/semmle/python/AST.qll b/python/ql/src/semmle/python/AST.qll new file mode 100644 index 00000000000..0ac8db03e32 --- /dev/null +++ b/python/ql/src/semmle/python/AST.qll @@ -0,0 +1,57 @@ +import python + +/** Syntactic node (Class, Function, Module, Expr, Stmt or Comprehension) corresponding to a flow node */ +abstract class AstNode extends AstNode_ { + + /** Gets the scope that this node occurs in */ + abstract Scope getScope(); + + /** Gets a flow node corresponding directly to this node. + * NOTE: For some statements and other purely syntactic elements, + * there may not be a `ControlFlowNode` */ + ControlFlowNode getAFlowNode() { + py_flow_bb_node(result, this, _, _) + } + + /** Gets the location for this AST node */ + Location getLocation() { + none() + } + + /** Whether this syntactic element is artificial, that is it is generated + * by the compiler and is not present in the source */ + predicate isArtificial() { + none() + } + + /** Gets a child node of this node in the AST. This predicate exists to aid exploration of the AST + * and other experiments. The child-parent relation may not be meaningful. + * For a more meaningful relation in terms of dependency use + * Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or + * Scope.getAStmt(). + */ + abstract AstNode getAChildNode(); + + /** Gets the parent node of this node in the AST. This predicate exists to aid exploration of the AST + * and other experiments. The child-parent relation may not be meaningful. + * For a more meaningful relation in terms of dependency use + * Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or + * Scope.getAStmt() applied to the parent. + */ + AstNode getParentNode() { + result.getAChildNode() = this + } + + /** Whether this contains `inner` syntactically */ + predicate contains(AstNode inner) { + this.getAChildNode+() = inner + } + + /** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */ + predicate containsInScope(AstNode inner) { + this.contains(inner) and + this.getScope() = inner.getScope() and + not inner instanceof Scope + } + +} diff --git a/python/ql/src/semmle/python/Assigns.qll b/python/ql/src/semmle/python/Assigns.qll new file mode 100644 index 00000000000..ad72645ffd5 --- /dev/null +++ b/python/ql/src/semmle/python/Assigns.qll @@ -0,0 +1,19 @@ +/** + * In order to handle data flow and other analyses efficiently the extractor transforms various statements which perform binding in assignments. + * These classes provide a wrapper to provide a more 'natural' interface to the syntactic elements transformed to assignments. + */ + +import python + + +/** An assignment statement */ +class AssignStmt extends Assign { + + AssignStmt() { + not this instanceof FunctionDef and not this instanceof ClassDef + } + + override string toString() { + result = "AssignStmt" + } +} diff --git a/python/ql/src/semmle/python/AstExtended.qll b/python/ql/src/semmle/python/AstExtended.qll new file mode 100644 index 00000000000..b109fda18e2 --- /dev/null +++ b/python/ql/src/semmle/python/AstExtended.qll @@ -0,0 +1,118 @@ +import python + +/* Parents */ + +/** Internal implementation class */ +library class FunctionParent extends FunctionParent_ { + +} + +/** Internal implementation class */ +library class ArgumentsParent extends ArgumentsParent_ { + +} + +/** Internal implementation class */ +library class ExprListParent extends ExprListParent_ { + +} + +/** Internal implementation class */ +library class ExprContextParent extends ExprContextParent_ { + +} + +/** Internal implementation class */ +library class StmtListParent extends StmtListParent_ { + +} + +/** Internal implementation class */ +library class StrListParent extends StrListParent_ { + +} + +/** Internal implementation class */ +library class ExprParent extends ExprParent_ { + +} + +library class DictItem extends DictItem_, AstNode { + + override string toString() { + result = DictItem_.super.toString() + } + + override AstNode getAChildNode() { none() } + + override Scope getScope() { none() } + +} + +/** A comprehension part, the 'for a in seq' part of [ a * a for a in seq ] */ +class Comprehension extends Comprehension_, AstNode { + + /** Gets the scope of this comprehension */ + override Scope getScope() { + /* Comprehensions exists only in Python 2 list comprehensions, so their scope is that of the list comp. */ + exists(ListComp l | + this = l.getAGenerator() | + result = l.getScope() + ) + } + + override string toString() { + result = "Comprehension" + } + + override Location getLocation() { + result = Comprehension_.super.getLocation() + } + + override AstNode getAChildNode() { + result = this.getASubExpression() + } + + Expr getASubExpression() { + result = this.getIter() or + result = this.getAnIf() or + result = this.getTarget() + } + +} + +class BytesOrStr extends BytesOrStr_ { + +} + +/** Part of a string literal formed by implicit concatenation. + * For example the string literal "abc" expressed in the source as `"a" "b" "c"` + * would be composed of three `StringPart`s. + * + */ +class StringPart extends StringPart_, AstNode { + + override Scope getScope() { + exists(Bytes b | this = b.getAnImplicitlyConcatenatedPart() | result = b.getScope()) + or + exists(Unicode u | this = u.getAnImplicitlyConcatenatedPart() | result = u.getScope()) + } + + override AstNode getAChildNode() { + none() + } + + override string toString() { + result = StringPart_.super.toString() + } + + override Location getLocation() { + result = StringPart_.super.getLocation() + } + +} + +class StringPartList extends StringPartList_ { + +} + diff --git a/python/ql/src/semmle/python/AstGenerated.qll b/python/ql/src/semmle/python/AstGenerated.qll new file mode 100644 index 00000000000..d75744398f0 --- /dev/null +++ b/python/ql/src/semmle/python/AstGenerated.qll @@ -0,0 +1,2791 @@ +import python + +library class Add_ extends @py_Add, Operator { + + override string toString() { + result = "Add" + } + +} + +library class And_ extends @py_And, Boolop { + + override string toString() { + result = "And" + } + +} + +library class AnnAssign_ extends @py_AnnAssign, Stmt { + + + /** Gets the value of this annotated assignment. */ + Expr getValue() { + py_exprs(result, _, this, 1) + } + + + /** Gets the annotation of this annotated assignment. */ + Expr getAnnotation() { + py_exprs(result, _, this, 2) + } + + + /** Gets the target of this annotated assignment. */ + Expr getTarget() { + py_exprs(result, _, this, 3) + } + + override string toString() { + result = "AnnAssign" + } + +} + +library class Assert_ extends @py_Assert, Stmt { + + + /** Gets the value being tested of this assert statement. */ + Expr getTest() { + py_exprs(result, _, this, 1) + } + + + /** Gets the failure message of this assert statement. */ + Expr getMsg() { + py_exprs(result, _, this, 2) + } + + override string toString() { + result = "Assert" + } + +} + +library class Assign_ extends @py_Assign, Stmt { + + + /** Gets the value of this assignment statement. */ + Expr getValue() { + py_exprs(result, _, this, 1) + } + + + /** Gets the targets of this assignment statement. */ + ExprList getTargets() { + py_expr_lists(result, this, 2) + } + + + /** Gets the nth target of this assignment statement. */ + Expr getTarget(int index) { + result = this.getTargets().getItem(index) + } + + /** Gets a target of this assignment statement. */ + Expr getATarget() { + result = this.getTargets().getAnItem() + } + + override string toString() { + result = "Assign" + } + +} + +library class Attribute_ extends @py_Attribute, Expr { + + + /** Gets the object of this attribute expression. */ + Expr getValue() { + py_exprs(result, _, this, 2) + } + + + /** Gets the attribute name of this attribute expression. */ + string getAttr() { + py_strs(result, this, 3) + } + + + /** Gets the context of this attribute expression. */ + ExprContext getCtx() { + py_expr_contexts(result, _, this) + } + + override string toString() { + result = "Attribute" + } + +} + +library class AugAssign_ extends @py_AugAssign, Stmt { + + + /** Gets the operation of this augmented assignment statement. */ + BinaryExpr getOperation() { + py_exprs(result, _, this, 1) + } + + override string toString() { + result = "AugAssign" + } + +} + +library class AugLoad_ extends @py_AugLoad, ExprContext { + + override string toString() { + result = "AugLoad" + } + +} + +library class AugStore_ extends @py_AugStore, ExprContext { + + override string toString() { + result = "AugStore" + } + +} + +library class Await_ extends @py_Await, Expr { + + + /** Gets the expression waited upon of this await expression. */ + Expr getValue() { + py_exprs(result, _, this, 2) + } + + override string toString() { + result = "Await" + } + +} + +library class BinaryExpr_ extends @py_BinaryExpr, Expr { + + + /** Gets the left sub-expression of this binary expression. */ + Expr getLeft() { + py_exprs(result, _, this, 2) + } + + + /** Gets the operator of this binary expression. */ + Operator getOp() { + py_operators(result, _, this) + } + + + /** Gets the right sub-expression of this binary expression. */ + Expr getRight() { + py_exprs(result, _, this, 4) + } + + override ExprParent getParent() { + py_exprs(this, _, result, _) + } + + override string toString() { + result = "BinaryExpr" + } + +} + +library class BitAnd_ extends @py_BitAnd, Operator { + + override string toString() { + result = "BitAnd" + } + +} + +library class BitOr_ extends @py_BitOr, Operator { + + override string toString() { + result = "BitOr" + } + +} + +library class BitXor_ extends @py_BitXor, Operator { + + override string toString() { + result = "BitXor" + } + +} + +library class BoolExpr_ extends @py_BoolExpr, Expr { + + + /** Gets the operator of this boolean expression. */ + Boolop getOp() { + py_boolops(result, _, this) + } + + + /** Gets the sub-expressions of this boolean expression. */ + ExprList getValues() { + py_expr_lists(result, this, 3) + } + + + /** Gets the nth sub-expression of this boolean expression. */ + Expr getValue(int index) { + result = this.getValues().getItem(index) + } + + /** Gets a sub-expression of this boolean expression. */ + Expr getAValue() { + result = this.getValues().getAnItem() + } + + override string toString() { + result = "BoolExpr" + } + +} + +library class Break_ extends @py_Break, Stmt { + + override string toString() { + result = "Break" + } + +} + +library class Bytes_ extends @py_Bytes, Expr { + + + /** Gets the value of this bytes expression. */ + string getS() { + py_bytes(result, this, 2) + } + + + /** Gets the prefix of this bytes expression. */ + string getPrefix() { + py_bytes(result, this, 3) + } + + + /** Gets the implicitly_concatenated_parts of this bytes expression. */ + StringPartList getImplicitlyConcatenatedParts() { + py_StringPart_lists(result, this) + } + + + /** Gets the nth implicitly_concatenated_part of this bytes expression. */ + StringPart getImplicitlyConcatenatedPart(int index) { + result = this.getImplicitlyConcatenatedParts().getItem(index) + } + + /** Gets an implicitly_concatenated_part of this bytes expression. */ + StringPart getAnImplicitlyConcatenatedPart() { + result = this.getImplicitlyConcatenatedParts().getAnItem() + } + + override string toString() { + result = "Bytes" + } + +} + +library class BytesOrStr_ extends @py_Bytes_or_Str { + + string toString() { + result = "BytesOrStr" + } + +} + +library class Call_ extends @py_Call, Expr { + + + /** Gets the callable of this call expression. */ + Expr getFunc() { + py_exprs(result, _, this, 2) + } + + + /** Gets the positional arguments of this call expression. */ + ExprList getPositionalArgs() { + py_expr_lists(result, this, 3) + } + + + /** Gets the nth positional argument of this call expression. */ + Expr getPositionalArg(int index) { + result = this.getPositionalArgs().getItem(index) + } + + /** Gets a positional argument of this call expression. */ + Expr getAPositionalArg() { + result = this.getPositionalArgs().getAnItem() + } + + + /** Gets the named arguments of this call expression. */ + DictItemList getNamedArgs() { + py_dict_item_lists(result, this) + } + + + /** Gets the nth named argument of this call expression. */ + DictItem getNamedArg(int index) { + result = this.getNamedArgs().getItem(index) + } + + /** Gets a named argument of this call expression. */ + DictItem getANamedArg() { + result = this.getNamedArgs().getAnItem() + } + + override string toString() { + result = "Call" + } + +} + +library class Class_ extends @py_Class { + + + /** Gets the name of this class. */ + string getName() { + py_strs(result, this, 0) + } + + + /** Gets the body of this class. */ + StmtList getBody() { + py_stmt_lists(result, this, 1) + } + + + /** Gets the nth statement of this class. */ + Stmt getStmt(int index) { + result = this.getBody().getItem(index) + } + + /** Gets a statement of this class. */ + Stmt getAStmt() { + result = this.getBody().getAnItem() + } + + ClassExpr getParent() { + py_Classes(this, result) + } + + string toString() { + result = "Class" + } + +} + +library class ClassExpr_ extends @py_ClassExpr, Expr { + + + /** Gets the name of this class definition. */ + string getName() { + py_strs(result, this, 2) + } + + + /** Gets the bases of this class definition. */ + ExprList getBases() { + py_expr_lists(result, this, 3) + } + + + /** Gets the nth base of this class definition. */ + Expr getBase(int index) { + result = this.getBases().getItem(index) + } + + /** Gets a base of this class definition. */ + Expr getABase() { + result = this.getBases().getAnItem() + } + + + /** Gets the keyword arguments of this class definition. */ + DictItemList getKeywords() { + py_dict_item_lists(result, this) + } + + + /** Gets the nth keyword argument of this class definition. */ + DictItem getKeyword(int index) { + result = this.getKeywords().getItem(index) + } + + /** Gets a keyword argument of this class definition. */ + DictItem getAKeyword() { + result = this.getKeywords().getAnItem() + } + + + /** Gets the class scope of this class definition. */ + Class getInnerScope() { + py_Classes(result, this) + } + + override string toString() { + result = "ClassExpr" + } + +} + +library class Compare_ extends @py_Compare, Expr { + + + /** Gets the left sub-expression of this compare expression. */ + Expr getLeft() { + py_exprs(result, _, this, 2) + } + + + /** Gets the comparison operators of this compare expression. */ + CmpopList getOps() { + py_cmpop_lists(result, this) + } + + + /** Gets the nth comparison operator of this compare expression. */ + Cmpop getOp(int index) { + result = this.getOps().getItem(index) + } + + /** Gets a comparison operator of this compare expression. */ + Cmpop getAnOp() { + result = this.getOps().getAnItem() + } + + + /** Gets the right sub-expressions of this compare expression. */ + ExprList getComparators() { + py_expr_lists(result, this, 4) + } + + + /** Gets the nth right sub-expression of this compare expression. */ + Expr getComparator(int index) { + result = this.getComparators().getItem(index) + } + + /** Gets a right sub-expression of this compare expression. */ + Expr getAComparator() { + result = this.getComparators().getAnItem() + } + + override string toString() { + result = "Compare" + } + +} + +library class Continue_ extends @py_Continue, Stmt { + + override string toString() { + result = "Continue" + } + +} + +library class Del_ extends @py_Del, ExprContext { + + override string toString() { + result = "Del" + } + +} + +library class Delete_ extends @py_Delete, Stmt { + + + /** Gets the targets of this delete statement. */ + ExprList getTargets() { + py_expr_lists(result, this, 1) + } + + + /** Gets the nth target of this delete statement. */ + Expr getTarget(int index) { + result = this.getTargets().getItem(index) + } + + /** Gets a target of this delete statement. */ + Expr getATarget() { + result = this.getTargets().getAnItem() + } + + override string toString() { + result = "Delete" + } + +} + +library class Dict_ extends @py_Dict, Expr { + + + /** Gets the items of this dictionary expression. */ + DictItemList getItems() { + py_dict_item_lists(result, this) + } + + + /** Gets the nth item of this dictionary expression. */ + DictItem getItem(int index) { + result = this.getItems().getItem(index) + } + + /** Gets an item of this dictionary expression. */ + DictItem getAnItem() { + result = this.getItems().getAnItem() + } + + override string toString() { + result = "Dict" + } + +} + +library class DictComp_ extends @py_DictComp, Expr { + + + /** Gets the implementation of this dictionary comprehension. */ + Function getFunction() { + py_Functions(result, this) + } + + + /** Gets the iterable of this dictionary comprehension. */ + Expr getIterable() { + py_exprs(result, _, this, 3) + } + + override string toString() { + result = "DictComp" + } + +} + +library class DictUnpacking_ extends @py_DictUnpacking, DictItem { + + + /** Gets the location of this dictionary unpacking. */ + override Location getLocation() { + py_locations(result, this) + } + + + /** Gets the value of this dictionary unpacking. */ + Expr getValue() { + py_exprs(result, _, this, 1) + } + + override string toString() { + result = "DictUnpacking" + } + +} + +library class Div_ extends @py_Div, Operator { + + override string toString() { + result = "Div" + } + +} + +library class Ellipsis_ extends @py_Ellipsis, Expr { + + override string toString() { + result = "Ellipsis" + } + +} + +library class Eq_ extends @py_Eq, Cmpop { + + override string toString() { + result = "Eq" + } + +} + +library class ExceptStmt_ extends @py_ExceptStmt, Stmt { + + + /** Gets the type of this except block. */ + Expr getType() { + py_exprs(result, _, this, 1) + } + + + /** Gets the name of this except block. */ + Expr getName() { + py_exprs(result, _, this, 2) + } + + + /** Gets the body of this except block. */ + StmtList getBody() { + py_stmt_lists(result, this, 3) + } + + + /** Gets the nth statement of this except block. */ + Stmt getStmt(int index) { + result = this.getBody().getItem(index) + } + + /** Gets a statement of this except block. */ + Stmt getAStmt() { + result = this.getBody().getAnItem() + } + + override string toString() { + result = "ExceptStmt" + } + +} + +library class Exec_ extends @py_Exec, Stmt { + + + /** Gets the body of this exec statement. */ + Expr getBody() { + py_exprs(result, _, this, 1) + } + + + /** Gets the globals of this exec statement. */ + Expr getGlobals() { + py_exprs(result, _, this, 2) + } + + + /** Gets the locals of this exec statement. */ + Expr getLocals() { + py_exprs(result, _, this, 3) + } + + override string toString() { + result = "Exec" + } + +} + +library class ExprStmt_ extends @py_Expr_stmt, Stmt { + + + /** Gets the value of this expr statement. */ + Expr getValue() { + py_exprs(result, _, this, 1) + } + + override string toString() { + result = "ExprStmt" + } + +} + +library class Filter_ extends @py_Filter, Expr { + + + /** Gets the filtered value of this template filter expression. */ + Expr getValue() { + py_exprs(result, _, this, 2) + } + + + /** Gets the filter of this template filter expression. */ + Expr getFilter() { + py_exprs(result, _, this, 3) + } + + override string toString() { + result = "Filter" + } + +} + +library class FloorDiv_ extends @py_FloorDiv, Operator { + + override string toString() { + result = "FloorDiv" + } + +} + +library class For_ extends @py_For, Stmt { + + + /** Gets the target of this for statement. */ + Expr getTarget() { + py_exprs(result, _, this, 1) + } + + + /** Gets the iterable of this for statement. */ + Expr getIter() { + py_exprs(result, _, this, 2) + } + + + /** Gets the body of this for statement. */ + StmtList getBody() { + py_stmt_lists(result, this, 3) + } + + + /** Gets the nth statement of this for statement. */ + Stmt getStmt(int index) { + result = this.getBody().getItem(index) + } + + /** Gets a statement of this for statement. */ + Stmt getAStmt() { + result = this.getBody().getAnItem() + } + + + /** Gets the else block of this for statement. */ + StmtList getOrelse() { + py_stmt_lists(result, this, 4) + } + + + /** Gets the nth else statement of this for statement. */ + Stmt getOrelse(int index) { + result = this.getOrelse().getItem(index) + } + + /** Gets an else statement of this for statement. */ + Stmt getAnOrelse() { + result = this.getOrelse().getAnItem() + } + + + /** Whether the async property of this for statement is true. */ + predicate isAsync() { + py_bools(this, 5) + } + + override string toString() { + result = "For" + } + +} + +library class FormattedValue_ extends @py_FormattedValue, Expr { + + + /** Gets the expression to be formatted of this formatted value. */ + Expr getValue() { + py_exprs(result, _, this, 2) + } + + + /** Gets the type conversion of this formatted value. */ + string getConversion() { + py_strs(result, this, 3) + } + + + /** Gets the format specifier of this formatted value. */ + Fstring getFormatSpec() { + py_exprs(result, _, this, 4) + } + + override string toString() { + result = "FormattedValue" + } + +} + +library class Function_ extends @py_Function { + + + /** Gets the name of this function. */ + string getName() { + py_strs(result, this, 0) + } + + + /** Gets the positional parameter list of this function. */ + ParameterList getArgs() { + py_parameter_lists(result, this) + } + + + /** Gets the nth positional parameter of this function. */ + Parameter getArg(int index) { + result = this.getArgs().getItem(index) + } + + /** Gets a positional parameter of this function. */ + Parameter getAnArg() { + result = this.getArgs().getAnItem() + } + + + /** Gets the tuple (*) parameter of this function. */ + Expr getVararg() { + py_exprs(result, _, this, 2) + } + + + /** Gets the keyword-only parameter list of this function. */ + ExprList getKwonlyargs() { + py_expr_lists(result, this, 3) + } + + + /** Gets the nth keyword-only parameter of this function. */ + Expr getKwonlyarg(int index) { + result = this.getKwonlyargs().getItem(index) + } + + /** Gets a keyword-only parameter of this function. */ + Expr getAKwonlyarg() { + result = this.getKwonlyargs().getAnItem() + } + + + /** Gets the dictionary (**) parameter of this function. */ + Expr getKwarg() { + py_exprs(result, _, this, 4) + } + + + /** Gets the body of this function. */ + StmtList getBody() { + py_stmt_lists(result, this, 5) + } + + + /** Gets the nth statement of this function. */ + Stmt getStmt(int index) { + result = this.getBody().getItem(index) + } + + /** Gets a statement of this function. */ + Stmt getAStmt() { + result = this.getBody().getAnItem() + } + + + /** Whether the async property of this function is true. */ + predicate isAsync() { + py_bools(this, 6) + } + + FunctionParent getParent() { + py_Functions(this, result) + } + + string toString() { + result = "Function" + } + +} + +library class FunctionExpr_ extends @py_FunctionExpr, Expr { + + + /** Gets the name of this function definition. */ + string getName() { + py_strs(result, this, 2) + } + + + /** Gets the parameters of this function definition. */ + Arguments getArgs() { + py_arguments(result, this) + } + + + /** Gets the return annotation of this function definition. */ + Expr getReturns() { + py_exprs(result, _, this, 4) + } + + + /** Gets the function scope of this function definition. */ + Function getInnerScope() { + py_Functions(result, this) + } + + override string toString() { + result = "FunctionExpr" + } + +} + +library class FunctionParent_ extends @py_Function_parent { + + string toString() { + result = "FunctionParent" + } + +} + +library class GeneratorExp_ extends @py_GeneratorExp, Expr { + + + /** Gets the implementation of this generator expression. */ + Function getFunction() { + py_Functions(result, this) + } + + + /** Gets the iterable of this generator expression. */ + Expr getIterable() { + py_exprs(result, _, this, 3) + } + + override string toString() { + result = "GeneratorExp" + } + +} + +library class Global_ extends @py_Global, Stmt { + + + /** Gets the names of this global statement. */ + StringList getNames() { + py_str_lists(result, this) + } + + + /** Gets the nth name of this global statement. */ + string getName(int index) { + result = this.getNames().getItem(index) + } + + /** Gets a name of this global statement. */ + string getAName() { + result = this.getNames().getAnItem() + } + + override string toString() { + result = "Global" + } + +} + +library class Gt_ extends @py_Gt, Cmpop { + + override string toString() { + result = "Gt" + } + +} + +library class GtE_ extends @py_GtE, Cmpop { + + override string toString() { + result = "GtE" + } + +} + +library class If_ extends @py_If, Stmt { + + + /** Gets the test of this if statement. */ + Expr getTest() { + py_exprs(result, _, this, 1) + } + + + /** Gets the if-true block of this if statement. */ + StmtList getBody() { + py_stmt_lists(result, this, 2) + } + + + /** Gets the nth if-true statement of this if statement. */ + Stmt getStmt(int index) { + result = this.getBody().getItem(index) + } + + /** Gets an if-true statement of this if statement. */ + Stmt getAStmt() { + result = this.getBody().getAnItem() + } + + + /** Gets the if-false block of this if statement. */ + StmtList getOrelse() { + py_stmt_lists(result, this, 3) + } + + + /** Gets the nth if-false statement of this if statement. */ + Stmt getOrelse(int index) { + result = this.getOrelse().getItem(index) + } + + /** Gets an if-false statement of this if statement. */ + Stmt getAnOrelse() { + result = this.getOrelse().getAnItem() + } + + override string toString() { + result = "If" + } + +} + +library class IfExp_ extends @py_IfExp, Expr { + + + /** Gets the test of this if expression. */ + Expr getTest() { + py_exprs(result, _, this, 2) + } + + + /** Gets the if-true expression of this if expression. */ + Expr getBody() { + py_exprs(result, _, this, 3) + } + + + /** Gets the if-false expression of this if expression. */ + Expr getOrelse() { + py_exprs(result, _, this, 4) + } + + override string toString() { + result = "IfExp" + } + +} + +library class Import_ extends @py_Import, Stmt { + + + /** Gets the alias list of this import statement. */ + AliasList getNames() { + py_alias_lists(result, this) + } + + + /** Gets the nth alias of this import statement. */ + Alias getName(int index) { + result = this.getNames().getItem(index) + } + + /** Gets an alias of this import statement. */ + Alias getAName() { + result = this.getNames().getAnItem() + } + + override string toString() { + result = "Import" + } + +} + +library class ImportExpr_ extends @py_ImportExpr, Expr { + + + /** Gets the level of this import expression. */ + int getLevel() { + py_ints(result, this) + } + + + /** Gets the name of this import expression. */ + string getName() { + py_strs(result, this, 3) + } + + + /** Whether the top level property of this import expression is true. */ + predicate isTop() { + py_bools(this, 4) + } + + override string toString() { + result = "ImportExpr" + } + +} + +library class ImportStar_ extends @py_ImportStar, Stmt { + + + /** Gets the module of this import * statement. */ + Expr getModule() { + py_exprs(result, _, this, 1) + } + + override string toString() { + result = "ImportStar" + } + +} + +library class ImportMember_ extends @py_ImportMember, Expr { + + + /** Gets the module of this from import. */ + Expr getModule() { + py_exprs(result, _, this, 2) + } + + + /** Gets the name of this from import. */ + string getName() { + py_strs(result, this, 3) + } + + override string toString() { + result = "ImportMember" + } + +} + +library class In_ extends @py_In, Cmpop { + + override string toString() { + result = "In" + } + +} + +library class Invert_ extends @py_Invert, Unaryop { + + override string toString() { + result = "Invert" + } + +} + +library class Is_ extends @py_Is, Cmpop { + + override string toString() { + result = "Is" + } + +} + +library class IsNot_ extends @py_IsNot, Cmpop { + + override string toString() { + result = "IsNot" + } + +} + +library class Fstring_ extends @py_Fstring, Expr { + + + /** Gets the values of this formatted string literal. */ + ExprList getValues() { + py_expr_lists(result, this, 2) + } + + + /** Gets the nth value of this formatted string literal. */ + Expr getValue(int index) { + result = this.getValues().getItem(index) + } + + /** Gets a value of this formatted string literal. */ + Expr getAValue() { + result = this.getValues().getAnItem() + } + + override ExprParent getParent() { + py_exprs(this, _, result, _) + } + + override string toString() { + result = "Fstring" + } + +} + +library class KeyValuePair_ extends @py_KeyValuePair, DictItem { + + + /** Gets the location of this key-value pair. */ + override Location getLocation() { + py_locations(result, this) + } + + + /** Gets the value of this key-value pair. */ + Expr getValue() { + py_exprs(result, _, this, 1) + } + + + /** Gets the key of this key-value pair. */ + Expr getKey() { + py_exprs(result, _, this, 2) + } + + override string toString() { + result = "KeyValuePair" + } + +} + +library class LShift_ extends @py_LShift, Operator { + + override string toString() { + result = "LShift" + } + +} + +library class Lambda_ extends @py_Lambda, Expr { + + + /** Gets the arguments of this lambda expression. */ + Arguments getArgs() { + py_arguments(result, this) + } + + + /** Gets the function scope of this lambda expression. */ + Function getInnerScope() { + py_Functions(result, this) + } + + override string toString() { + result = "Lambda" + } + +} + +library class List_ extends @py_List, Expr { + + + /** Gets the element list of this list expression. */ + ExprList getElts() { + py_expr_lists(result, this, 2) + } + + + /** Gets the nth element of this list expression. */ + Expr getElt(int index) { + result = this.getElts().getItem(index) + } + + /** Gets an element of this list expression. */ + Expr getAnElt() { + result = this.getElts().getAnItem() + } + + + /** Gets the context of this list expression. */ + ExprContext getCtx() { + py_expr_contexts(result, _, this) + } + + override string toString() { + result = "List" + } + +} + +library class ListComp_ extends @py_ListComp, Expr { + + + /** Gets the implementation of this list comprehension. */ + Function getFunction() { + py_Functions(result, this) + } + + + /** Gets the iterable of this list comprehension. */ + Expr getIterable() { + py_exprs(result, _, this, 3) + } + + + /** Gets the generators of this list comprehension. */ + ComprehensionList getGenerators() { + py_comprehension_lists(result, this) + } + + + /** Gets the nth generator of this list comprehension. */ + Comprehension getGenerator(int index) { + result = this.getGenerators().getItem(index) + } + + /** Gets a generator of this list comprehension. */ + Comprehension getAGenerator() { + result = this.getGenerators().getAnItem() + } + + + /** Gets the elements of this list comprehension. */ + Expr getElt() { + py_exprs(result, _, this, 5) + } + + override string toString() { + result = "ListComp" + } + +} + +library class Load_ extends @py_Load, ExprContext { + + override string toString() { + result = "Load" + } + +} + +library class Lt_ extends @py_Lt, Cmpop { + + override string toString() { + result = "Lt" + } + +} + +library class LtE_ extends @py_LtE, Cmpop { + + override string toString() { + result = "LtE" + } + +} + +library class MatMult_ extends @py_MatMult, Operator { + + override string toString() { + result = "MatMult" + } + +} + +library class Mod_ extends @py_Mod, Operator { + + override string toString() { + result = "Mod" + } + +} + +library class Module_ extends @py_Module { + + + /** Gets the name of this module. */ + string getName() { + py_strs(result, this, 0) + } + + + /** Gets the hash (not populated) of this module. */ + string getHash() { + py_strs(result, this, 1) + } + + + /** Gets the body of this module. */ + StmtList getBody() { + py_stmt_lists(result, this, 2) + } + + + /** Gets the nth statement of this module. */ + Stmt getStmt(int index) { + result = this.getBody().getItem(index) + } + + /** Gets a statement of this module. */ + Stmt getAStmt() { + result = this.getBody().getAnItem() + } + + + /** Gets the kind of this module. */ + string getKind() { + py_strs(result, this, 3) + } + + string toString() { + result = "Module" + } + +} + +library class Mult_ extends @py_Mult, Operator { + + override string toString() { + result = "Mult" + } + +} + +library class Name_ extends @py_Name, Expr { + + + /** Gets the variable of this name expression. */ + Variable getVariable() { + py_variables(result, this) + } + + + /** Gets the context of this name expression. */ + ExprContext getCtx() { + py_expr_contexts(result, _, this) + } + + override ExprParent getParent() { + py_exprs(this, _, result, _) + } + + override string toString() { + result = "Name" + } + +} + +library class Nonlocal_ extends @py_Nonlocal, Stmt { + + + /** Gets the names of this nonlocal statement. */ + StringList getNames() { + py_str_lists(result, this) + } + + + /** Gets the nth name of this nonlocal statement. */ + string getName(int index) { + result = this.getNames().getItem(index) + } + + /** Gets a name of this nonlocal statement. */ + string getAName() { + result = this.getNames().getAnItem() + } + + override string toString() { + result = "Nonlocal" + } + +} + +library class Not_ extends @py_Not, Unaryop { + + override string toString() { + result = "Not" + } + +} + +library class NotEq_ extends @py_NotEq, Cmpop { + + override string toString() { + result = "NotEq" + } + +} + +library class NotIn_ extends @py_NotIn, Cmpop { + + override string toString() { + result = "NotIn" + } + +} + +library class Num_ extends @py_Num, Expr { + + + /** Gets the value of this numeric literal. */ + string getN() { + py_numbers(result, this, 2) + } + + + /** Gets the text of this numeric literal. */ + string getText() { + py_numbers(result, this, 3) + } + + override string toString() { + result = "Num" + } + +} + +library class Or_ extends @py_Or, Boolop { + + override string toString() { + result = "Or" + } + +} + +library class Param_ extends @py_Param, ExprContext { + + override string toString() { + result = "Param" + } + +} + +library class Pass_ extends @py_Pass, Stmt { + + override string toString() { + result = "Pass" + } + +} + +library class PlaceHolder_ extends @py_PlaceHolder, Expr { + + + /** Gets the variable of this template place-holder expression. */ + Variable getVariable() { + py_variables(result, this) + } + + + /** Gets the context of this template place-holder expression. */ + ExprContext getCtx() { + py_expr_contexts(result, _, this) + } + + override string toString() { + result = "PlaceHolder" + } + +} + +library class Pow_ extends @py_Pow, Operator { + + override string toString() { + result = "Pow" + } + +} + +library class Print_ extends @py_Print, Stmt { + + + /** Gets the destination of this print statement. */ + Expr getDest() { + py_exprs(result, _, this, 1) + } + + + /** Gets the values of this print statement. */ + ExprList getValues() { + py_expr_lists(result, this, 2) + } + + + /** Gets the nth value of this print statement. */ + Expr getValue(int index) { + result = this.getValues().getItem(index) + } + + /** Gets a value of this print statement. */ + Expr getAValue() { + result = this.getValues().getAnItem() + } + + + /** Whether the new line property of this print statement is true. */ + predicate isNl() { + py_bools(this, 3) + } + + override string toString() { + result = "Print" + } + +} + +library class RShift_ extends @py_RShift, Operator { + + override string toString() { + result = "RShift" + } + +} + +library class Raise_ extends @py_Raise, Stmt { + + + /** Gets the exception of this raise statement. */ + Expr getExc() { + py_exprs(result, _, this, 1) + } + + + /** Gets the cause of this raise statement. */ + Expr getCause() { + py_exprs(result, _, this, 2) + } + + + /** Gets the type of this raise statement. */ + Expr getType() { + py_exprs(result, _, this, 3) + } + + + /** Gets the instance of this raise statement. */ + Expr getInst() { + py_exprs(result, _, this, 4) + } + + + /** Gets the traceback of this raise statement. */ + Expr getTback() { + py_exprs(result, _, this, 5) + } + + override string toString() { + result = "Raise" + } + +} + +library class Repr_ extends @py_Repr, Expr { + + + /** Gets the value of this backtick expression. */ + Expr getValue() { + py_exprs(result, _, this, 2) + } + + override string toString() { + result = "Repr" + } + +} + +library class Return_ extends @py_Return, Stmt { + + + /** Gets the value of this return statement. */ + Expr getValue() { + py_exprs(result, _, this, 1) + } + + override string toString() { + result = "Return" + } + +} + +library class Set_ extends @py_Set, Expr { + + + /** Gets the elements of this set expression. */ + ExprList getElts() { + py_expr_lists(result, this, 2) + } + + + /** Gets the nth element of this set expression. */ + Expr getElt(int index) { + result = this.getElts().getItem(index) + } + + /** Gets an element of this set expression. */ + Expr getAnElt() { + result = this.getElts().getAnItem() + } + + override string toString() { + result = "Set" + } + +} + +library class SetComp_ extends @py_SetComp, Expr { + + + /** Gets the implementation of this set comprehension. */ + Function getFunction() { + py_Functions(result, this) + } + + + /** Gets the iterable of this set comprehension. */ + Expr getIterable() { + py_exprs(result, _, this, 3) + } + + override string toString() { + result = "SetComp" + } + +} + +library class Slice_ extends @py_Slice, Expr { + + + /** Gets the start of this slice. */ + Expr getStart() { + py_exprs(result, _, this, 2) + } + + + /** Gets the stop of this slice. */ + Expr getStop() { + py_exprs(result, _, this, 3) + } + + + /** Gets the step of this slice. */ + Expr getStep() { + py_exprs(result, _, this, 4) + } + + override string toString() { + result = "Slice" + } + +} + +library class Starred_ extends @py_Starred, Expr { + + + /** Gets the value of this starred expression. */ + Expr getValue() { + py_exprs(result, _, this, 2) + } + + + /** Gets the context of this starred expression. */ + ExprContext getCtx() { + py_expr_contexts(result, _, this) + } + + override string toString() { + result = "Starred" + } + +} + +library class Store_ extends @py_Store, ExprContext { + + override string toString() { + result = "Store" + } + +} + +library class Str_ extends @py_Str, Expr { + + + /** Gets the text of this string literal. */ + string getS() { + py_strs(result, this, 2) + } + + + /** Gets the prefix of this string literal. */ + string getPrefix() { + py_strs(result, this, 3) + } + + + /** Gets the implicitly_concatenated_parts of this string literal. */ + StringPartList getImplicitlyConcatenatedParts() { + py_StringPart_lists(result, this) + } + + + /** Gets the nth implicitly_concatenated_part of this string literal. */ + StringPart getImplicitlyConcatenatedPart(int index) { + result = this.getImplicitlyConcatenatedParts().getItem(index) + } + + /** Gets an implicitly_concatenated_part of this string literal. */ + StringPart getAnImplicitlyConcatenatedPart() { + result = this.getImplicitlyConcatenatedParts().getAnItem() + } + + override string toString() { + result = "Str" + } + +} + +library class StringPart_ extends @py_StringPart { + + + /** Gets the text of this implicitly concatenated part. */ + string getText() { + py_strs(result, this, 0) + } + + + /** Gets the location of this implicitly concatenated part. */ + Location getLocation() { + py_locations(result, this) + } + + StringPartList getParent() { + py_StringParts(this, result, _) + } + + string toString() { + result = "StringPart" + } + +} + +library class StringPartList_ extends @py_StringPart_list { + + BytesOrStr getParent() { + py_StringPart_lists(this, result) + } + + /** Gets an item of this implicitly concatenated part list */ + StringPart getAnItem() { + py_StringParts(result, this, _) + } + + /** Gets the nth item of this implicitly concatenated part list */ + StringPart getItem(int index) { + py_StringParts(result, this, index) + } + + string toString() { + result = "StringPartList" + } + +} + +library class Sub_ extends @py_Sub, Operator { + + override string toString() { + result = "Sub" + } + +} + +library class Subscript_ extends @py_Subscript, Expr { + + + /** Gets the value of this subscript expression. */ + Expr getValue() { + py_exprs(result, _, this, 2) + } + + + /** Gets the index of this subscript expression. */ + Expr getIndex() { + py_exprs(result, _, this, 3) + } + + + /** Gets the context of this subscript expression. */ + ExprContext getCtx() { + py_expr_contexts(result, _, this) + } + + override string toString() { + result = "Subscript" + } + +} + +library class TemplateDottedNotation_ extends @py_TemplateDottedNotation, Expr { + + + /** Gets the object of this template dotted notation expression. */ + Expr getValue() { + py_exprs(result, _, this, 2) + } + + + /** Gets the attribute name of this template dotted notation expression. */ + string getAttr() { + py_strs(result, this, 3) + } + + + /** Gets the context of this template dotted notation expression. */ + ExprContext getCtx() { + py_expr_contexts(result, _, this) + } + + override string toString() { + result = "TemplateDottedNotation" + } + +} + +library class TemplateWrite_ extends @py_TemplateWrite, Stmt { + + + /** Gets the value of this template write statement. */ + Expr getValue() { + py_exprs(result, _, this, 1) + } + + override string toString() { + result = "TemplateWrite" + } + +} + +library class Try_ extends @py_Try, Stmt { + + + /** Gets the body of this try statement. */ + StmtList getBody() { + py_stmt_lists(result, this, 1) + } + + + /** Gets the nth statement of this try statement. */ + Stmt getStmt(int index) { + result = this.getBody().getItem(index) + } + + /** Gets a statement of this try statement. */ + Stmt getAStmt() { + result = this.getBody().getAnItem() + } + + + /** Gets the else block of this try statement. */ + StmtList getOrelse() { + py_stmt_lists(result, this, 2) + } + + + /** Gets the nth else statement of this try statement. */ + Stmt getOrelse(int index) { + result = this.getOrelse().getItem(index) + } + + /** Gets an else statement of this try statement. */ + Stmt getAnOrelse() { + result = this.getOrelse().getAnItem() + } + + + /** Gets the exception handlers of this try statement. */ + StmtList getHandlers() { + py_stmt_lists(result, this, 3) + } + + + /** Gets the nth exception handler of this try statement. */ + Stmt getHandler(int index) { + result = this.getHandlers().getItem(index) + } + + /** Gets an exception handler of this try statement. */ + Stmt getAHandler() { + result = this.getHandlers().getAnItem() + } + + + /** Gets the finally block of this try statement. */ + StmtList getFinalbody() { + py_stmt_lists(result, this, 4) + } + + + /** Gets the nth finally statement of this try statement. */ + Stmt getFinalstmt(int index) { + result = this.getFinalbody().getItem(index) + } + + /** Gets a finally statement of this try statement. */ + Stmt getAFinalstmt() { + result = this.getFinalbody().getAnItem() + } + + override string toString() { + result = "Try" + } + +} + +library class Tuple_ extends @py_Tuple, Expr { + + + /** Gets the elements of this tuple expression. */ + ExprList getElts() { + py_expr_lists(result, this, 2) + } + + + /** Gets the nth element of this tuple expression. */ + Expr getElt(int index) { + result = this.getElts().getItem(index) + } + + /** Gets an element of this tuple expression. */ + Expr getAnElt() { + result = this.getElts().getAnItem() + } + + + /** Gets the context of this tuple expression. */ + ExprContext getCtx() { + py_expr_contexts(result, _, this) + } + + override ExprParent getParent() { + py_exprs(this, _, result, _) + } + + override string toString() { + result = "Tuple" + } + +} + +library class UAdd_ extends @py_UAdd, Unaryop { + + override string toString() { + result = "UAdd" + } + +} + +library class USub_ extends @py_USub, Unaryop { + + override string toString() { + result = "USub" + } + +} + +library class UnaryExpr_ extends @py_UnaryExpr, Expr { + + + /** Gets the operator of this unary expression. */ + Unaryop getOp() { + py_unaryops(result, _, this) + } + + + /** Gets the operand of this unary expression. */ + Expr getOperand() { + py_exprs(result, _, this, 3) + } + + override string toString() { + result = "UnaryExpr" + } + +} + +library class While_ extends @py_While, Stmt { + + + /** Gets the test of this while statement. */ + Expr getTest() { + py_exprs(result, _, this, 1) + } + + + /** Gets the body of this while statement. */ + StmtList getBody() { + py_stmt_lists(result, this, 2) + } + + + /** Gets the nth statement of this while statement. */ + Stmt getStmt(int index) { + result = this.getBody().getItem(index) + } + + /** Gets a statement of this while statement. */ + Stmt getAStmt() { + result = this.getBody().getAnItem() + } + + + /** Gets the else block of this while statement. */ + StmtList getOrelse() { + py_stmt_lists(result, this, 3) + } + + + /** Gets the nth else statement of this while statement. */ + Stmt getOrelse(int index) { + result = this.getOrelse().getItem(index) + } + + /** Gets an else statement of this while statement. */ + Stmt getAnOrelse() { + result = this.getOrelse().getAnItem() + } + + override string toString() { + result = "While" + } + +} + +library class With_ extends @py_With, Stmt { + + + /** Gets the context manager of this with statement. */ + Expr getContextExpr() { + py_exprs(result, _, this, 1) + } + + + /** Gets the optional variable of this with statement. */ + Expr getOptionalVars() { + py_exprs(result, _, this, 2) + } + + + /** Gets the body of this with statement. */ + StmtList getBody() { + py_stmt_lists(result, this, 3) + } + + + /** Gets the nth statement of this with statement. */ + Stmt getStmt(int index) { + result = this.getBody().getItem(index) + } + + /** Gets a statement of this with statement. */ + Stmt getAStmt() { + result = this.getBody().getAnItem() + } + + + /** Whether the async property of this with statement is true. */ + predicate isAsync() { + py_bools(this, 4) + } + + override string toString() { + result = "With" + } + +} + +library class Yield_ extends @py_Yield, Expr { + + + /** Gets the value of this yield expression. */ + Expr getValue() { + py_exprs(result, _, this, 2) + } + + override string toString() { + result = "Yield" + } + +} + +library class YieldFrom_ extends @py_YieldFrom, Expr { + + + /** Gets the value of this yield-from expression. */ + Expr getValue() { + py_exprs(result, _, this, 2) + } + + override string toString() { + result = "YieldFrom" + } + +} + +library class Alias_ extends @py_alias { + + + /** Gets the value of this alias. */ + Expr getValue() { + py_exprs(result, _, this, 0) + } + + + /** Gets the name of this alias. */ + Expr getAsname() { + py_exprs(result, _, this, 1) + } + + AliasList getParent() { + py_aliases(this, result, _) + } + + string toString() { + result = "Alias" + } + +} + +library class AliasList_ extends @py_alias_list { + + Import getParent() { + py_alias_lists(this, result) + } + + /** Gets an item of this alias list */ + Alias getAnItem() { + py_aliases(result, this, _) + } + + /** Gets the nth item of this alias list */ + Alias getItem(int index) { + py_aliases(result, this, index) + } + + string toString() { + result = "AliasList" + } + +} + +library class Arguments_ extends @py_arguments { + + + /** Gets the keyword default values of this parameters definition. */ + ExprList getKwDefaults() { + py_expr_lists(result, this, 0) + } + + + /** Gets the nth keyword default value of this parameters definition. */ + Expr getKwDefault(int index) { + result = this.getKwDefaults().getItem(index) + } + + /** Gets a keyword default value of this parameters definition. */ + Expr getAKwDefault() { + result = this.getKwDefaults().getAnItem() + } + + + /** Gets the default values of this parameters definition. */ + ExprList getDefaults() { + py_expr_lists(result, this, 1) + } + + + /** Gets the nth default value of this parameters definition. */ + Expr getDefault(int index) { + result = this.getDefaults().getItem(index) + } + + /** Gets a default value of this parameters definition. */ + Expr getADefault() { + result = this.getDefaults().getAnItem() + } + + + /** Gets the annotations of this parameters definition. */ + ExprList getAnnotations() { + py_expr_lists(result, this, 2) + } + + + /** Gets the nth annotation of this parameters definition. */ + Expr getAnnotation(int index) { + result = this.getAnnotations().getItem(index) + } + + /** Gets an annotation of this parameters definition. */ + Expr getAnAnnotation() { + result = this.getAnnotations().getAnItem() + } + + + /** Gets the *arg annotation of this parameters definition. */ + Expr getVarargannotation() { + py_exprs(result, _, this, 3) + } + + + /** Gets the **kwarg annotation of this parameters definition. */ + Expr getKwargannotation() { + py_exprs(result, _, this, 4) + } + + + /** Gets the kw_annotations of this parameters definition. */ + ExprList getKwAnnotations() { + py_expr_lists(result, this, 5) + } + + + /** Gets the nth kw_annotation of this parameters definition. */ + Expr getKwAnnotation(int index) { + result = this.getKwAnnotations().getItem(index) + } + + /** Gets a kw_annotation of this parameters definition. */ + Expr getAKwAnnotation() { + result = this.getKwAnnotations().getAnItem() + } + + ArgumentsParent getParent() { + py_arguments(this, result) + } + + string toString() { + result = "Arguments" + } + +} + +library class ArgumentsParent_ extends @py_arguments_parent { + + string toString() { + result = "ArgumentsParent" + } + +} + +library class AstNode_ extends @py_ast_node { + + string toString() { + result = "AstNode" + } + +} + +library class BoolParent_ extends @py_bool_parent { + + string toString() { + result = "BoolParent" + } + +} + +library class Boolop_ extends @py_boolop { + + BoolExpr getParent() { + py_boolops(this, _, result) + } + + string toString() { + result = "Boolop" + } + +} + +library class Cmpop_ extends @py_cmpop { + + CmpopList getParent() { + py_cmpops(this, _, result, _) + } + + string toString() { + result = "Cmpop" + } + +} + +library class CmpopList_ extends @py_cmpop_list { + + Compare getParent() { + py_cmpop_lists(this, result) + } + + /** Gets an item of this comparison operator list */ + Cmpop getAnItem() { + py_cmpops(result, _, this, _) + } + + /** Gets the nth item of this comparison operator list */ + Cmpop getItem(int index) { + py_cmpops(result, _, this, index) + } + + string toString() { + result = "CmpopList" + } + +} + +library class Comprehension_ extends @py_comprehension { + + + /** Gets the location of this comprehension. */ + Location getLocation() { + py_locations(result, this) + } + + + /** Gets the iterable of this comprehension. */ + Expr getIter() { + py_exprs(result, _, this, 1) + } + + + /** Gets the target of this comprehension. */ + Expr getTarget() { + py_exprs(result, _, this, 2) + } + + + /** Gets the conditions of this comprehension. */ + ExprList getIfs() { + py_expr_lists(result, this, 3) + } + + + /** Gets the nth condition of this comprehension. */ + Expr getIf(int index) { + result = this.getIfs().getItem(index) + } + + /** Gets a condition of this comprehension. */ + Expr getAnIf() { + result = this.getIfs().getAnItem() + } + + ComprehensionList getParent() { + py_comprehensions(this, result, _) + } + + string toString() { + result = "Comprehension" + } + +} + +library class ComprehensionList_ extends @py_comprehension_list { + + ListComp getParent() { + py_comprehension_lists(this, result) + } + + /** Gets an item of this comprehension list */ + Comprehension getAnItem() { + py_comprehensions(result, this, _) + } + + /** Gets the nth item of this comprehension list */ + Comprehension getItem(int index) { + py_comprehensions(result, this, index) + } + + string toString() { + result = "ComprehensionList" + } + +} + +library class DictItem_ extends @py_dict_item { + + DictItemList getParent() { + py_dict_items(this, _, result, _) + } + + string toString() { + result = "DictItem" + } + +} + +library class DictItemList_ extends @py_dict_item_list { + + DictItemListParent getParent() { + py_dict_item_lists(this, result) + } + + /** Gets an item of this dict_item list */ + DictItem getAnItem() { + py_dict_items(result, _, this, _) + } + + /** Gets the nth item of this dict_item list */ + DictItem getItem(int index) { + py_dict_items(result, _, this, index) + } + + string toString() { + result = "DictItemList" + } + +} + +library class DictItemListParent_ extends @py_dict_item_list_parent { + + string toString() { + result = "DictItemListParent" + } + +} + +library class Expr_ extends @py_expr { + + + /** Gets the location of this expression. */ + Location getLocation() { + py_locations(result, this) + } + + + /** Whether the parenthesised property of this expression is true. */ + predicate isParenthesised() { + py_bools(this, 1) + } + + ExprParent getParent() { + py_exprs(this, _, result, _) + } + + string toString() { + result = "Expr" + } + +} + +library class ExprContext_ extends @py_expr_context { + + ExprContextParent getParent() { + py_expr_contexts(this, _, result) + } + + string toString() { + result = "ExprContext" + } + +} + +library class ExprContextParent_ extends @py_expr_context_parent { + + string toString() { + result = "ExprContextParent" + } + +} + +library class ExprList_ extends @py_expr_list { + + ExprListParent getParent() { + py_expr_lists(this, result, _) + } + + /** Gets an item of this expression list */ + Expr getAnItem() { + py_exprs(result, _, this, _) + } + + /** Gets the nth item of this expression list */ + Expr getItem(int index) { + py_exprs(result, _, this, index) + } + + string toString() { + result = "ExprList" + } + +} + +library class ExprListParent_ extends @py_expr_list_parent { + + string toString() { + result = "ExprListParent" + } + +} + +library class ExprOrStmt_ extends @py_expr_or_stmt { + + string toString() { + result = "ExprOrStmt" + } + +} + +library class ExprParent_ extends @py_expr_parent { + + string toString() { + result = "ExprParent" + } + +} + +library class Keyword_ extends @py_keyword, DictItem { + + + /** Gets the location of this keyword argument. */ + override Location getLocation() { + py_locations(result, this) + } + + + /** Gets the value of this keyword argument. */ + Expr getValue() { + py_exprs(result, _, this, 1) + } + + + /** Gets the arg of this keyword argument. */ + string getArg() { + py_strs(result, this, 2) + } + + override string toString() { + result = "Keyword" + } + +} + +library class LocationParent_ extends @py_location_parent { + + string toString() { + result = "LocationParent" + } + +} + +library class Operator_ extends @py_operator { + + BinaryExpr getParent() { + py_operators(this, _, result) + } + + string toString() { + result = "Operator" + } + +} + +library class Parameter_ extends @py_parameter { + + string toString() { + result = "Parameter" + } + +} + +library class Scope_ extends @py_scope { + + string toString() { + result = "Scope" + } + +} + +library class Stmt_ extends @py_stmt { + + + /** Gets the location of this statement. */ + Location getLocation() { + py_locations(result, this) + } + + StmtList getParent() { + py_stmts(this, _, result, _) + } + + string toString() { + result = "Stmt" + } + +} + +library class StmtList_ extends @py_stmt_list { + + StmtListParent getParent() { + py_stmt_lists(this, result, _) + } + + /** Gets an item of this statement list */ + Stmt getAnItem() { + py_stmts(result, _, this, _) + } + + /** Gets the nth item of this statement list */ + Stmt getItem(int index) { + py_stmts(result, _, this, index) + } + + string toString() { + result = "StmtList" + } + +} + +library class StmtListParent_ extends @py_stmt_list_parent { + + string toString() { + result = "StmtListParent" + } + +} + +library class StringList_ extends @py_str_list { + + StrListParent getParent() { + py_str_lists(this, result) + } + + /** Gets an item of this string list */ + string getAnItem() { + py_strs(result, this, _) + } + + /** Gets the nth item of this string list */ + string getItem(int index) { + py_strs(result, this, index) + } + + string toString() { + result = "StringList" + } + +} + +library class StrListParent_ extends @py_str_list_parent { + + string toString() { + result = "StrListParent" + } + +} + +library class StrParent_ extends @py_str_parent { + + string toString() { + result = "StrParent" + } + +} + +library class Unaryop_ extends @py_unaryop { + + UnaryExpr getParent() { + py_unaryops(this, _, result) + } + + string toString() { + result = "Unaryop" + } + +} + +library class VariableParent_ extends @py_variable_parent { + + string toString() { + result = "VariableParent" + } + +} + diff --git a/python/ql/src/semmle/python/Class.qll b/python/ql/src/semmle/python/Class.qll new file mode 100644 index 00000000000..1f754536bdd --- /dev/null +++ b/python/ql/src/semmle/python/Class.qll @@ -0,0 +1,219 @@ +import python + + +/** An (artificial) expression corresponding to a class definition. + * It is recommended to use `ClassDef` instead. + */ +class ClassExpr extends ClassExpr_ { + + /** Gets the metaclass expression */ + Expr getMetaClass() { + if major_version() = 3 then + exists(Keyword metacls | this.getAKeyword() = metacls and metacls.getArg() = "metaclass" and result = metacls.getValue()) + else + exists(Assign a | a = this.getInnerScope().getAStmt() and ((Name)a.getATarget()).getId() = "__metaclass__" and result = a.getValue()) + } + + + /** Gets the nth keyword argument of this class definition. */ + override DictUnpackingOrKeyword getKeyword(int index) { + result = this.getKeywords().getItem(index) + } + + /** Gets a keyword argument of this class definition. */ + override DictUnpackingOrKeyword getAKeyword() { + result = this.getKeywords().getAnItem() + } + + override Expr getASubExpression() { + result = this.getABase() or + result = this.getAKeyword().getValue() or + result = this.getKwargs() or + result = this.getStarargs() + } + + Call getADecoratorCall() { + result.getArg(0) = this or + result.getArg(0) = this.getADecoratorCall() + } + + /** Gets a decorator of this function expression */ + Expr getADecorator() { + result = this.getADecoratorCall().getFunc() + } + + override AstNode getAChildNode() { + result = this.getASubExpression() + or + result = this.getInnerScope() + } + + /** Gets a tuple (*) argument of this class definition. */ + Expr getStarargs() { + result = this.getABase().(Starred).getValue() + } + + /** Gets a dictionary (**) argument of this class definition. */ + Expr getKwargs() { + result = this.getAKeyword().(DictUnpacking).getValue() + } + +} + +/** A class statement. Note that ClassDef extends Assign as a class definition binds the newly created class */ +class ClassDef extends Assign { + + ClassDef() { + /* This is an artificial assignment the rhs of which is a (possibly decorated) ClassExpr */ + exists(ClassExpr c | this.getValue() = c or this.getValue() = c.getADecoratorCall()) + } + + override string toString() { + result = "ClassDef" + } + + /** Gets the class for this statement */ + Class getDefinedClass() { + exists(ClassExpr c | + this.getValue() = c or this.getValue() = c.getADecoratorCall() | + result = c.getInnerScope() + ) + } + + override Stmt getLastStatement() { + result = this.getDefinedClass().getLastStatement() + } + +} + +/** The scope of a class. This is the scope of all the statements within the class definition */ +class Class extends Class_, Scope, AstNode { + + /** Use getADecorator() instead of getDefinition().getADecorator() + * Use getMetaClass() instead of getDefinition().getMetaClass() + */ + deprecated ClassExpr getDefinition() { + result = this.getParent() + } + + /** Gets a defined init method of this class */ + Function getInitMethod() { + result.getScope() = this and result.isInitMethod() + } + + /** Gets a method defined in this class */ + Function getAMethod() { + result.getScope() = this + } + + override Location getLocation() { + py_scope_location(result, this) + } + + /** Gets the scope (module, class or function) in which this class is defined */ + override Scope getEnclosingScope() { + result = this.getParent().getScope() + } + + /** Use getEnclosingScope() instead */ + override Scope getScope() { + result = this.getParent().getScope() + } + + override string toString() { + result = "Class " + this.getName() + } + + /** Gets the statements forming the body of this class */ + override StmtList getBody() { + result = Class_.super.getBody() + } + + /** Gets the nth statement in the class */ + override Stmt getStmt(int index) { + result = Class_.super.getStmt(index) + } + + /** Gets a statement in the class */ + override Stmt getAStmt() { + result = Class_.super.getAStmt() + } + + /** Gets the name used to define this class */ + override string getName() { + result = Class_.super.getName() + } + + predicate hasSideEffects() { + any() + } + + /** Whether this is probably a mixin (has 'mixin' or similar in name or docstring) */ + predicate isProbableMixin() { + (this.getName().toLowerCase().matches("%mixin%") + or + this.getDocString().getText().toLowerCase().matches("%mixin%") + or + this.getDocString().getText().toLowerCase().matches("%mix-in%") + ) + } + + override AstNode getAChildNode() { + result = this.getAStmt() + } + + Expr getADecorator() { + result = this.getParent().getADecorator() + } + + /** Gets the metaclass expression */ + Expr getMetaClass() { + result = this.getParent().getMetaClass() + } + + /** Gets the ClassObject corresponding to this class */ + ClassObject getClassObject() { + result.getOrigin() = this.getParent() + } + + /** Gets the nth base of this class definition. */ + Expr getBase(int index) { + result = this.getParent().getBase(index) + } + + /** Gets a base of this class definition. */ + Expr getABase() { + result = this.getParent().getABase() + } + + /** Gets the metrics for this class */ + ClassMetrics getMetrics() { + result = this + } + + /** Gets the qualified name for this class. + * Should return the same name as the `__qualname__` attribute on classes in Python 3. + */ + string getQualifiedName() { + this.getScope() instanceof Module and result = this.getName() + or + exists(string enclosing_name | + enclosing_name = this.getScope().(Function).getQualifiedName() + or + enclosing_name = this.getScope().(Class).getQualifiedName() | + result = enclosing_name + "." + this.getName() + ) + } + + override + predicate containsInScope(AstNode inner) { + Scope.super.containsInScope(inner) + } + + override + predicate contains(AstNode inner) { + Scope.super.contains(inner) + } + +} + diff --git a/python/ql/src/semmle/python/Comment.qll b/python/ql/src/semmle/python/Comment.qll new file mode 100644 index 00000000000..c40de34478b --- /dev/null +++ b/python/ql/src/semmle/python/Comment.qll @@ -0,0 +1,110 @@ +import python + +/** A source code comment */ +class Comment extends @py_comment { + + /** Gets the full text of the comment including the leading '#' */ + string getText() { + py_comments(this, result, _) + } + + /** Gets the contents of the comment excluding the leading '#' */ + string getContents() { + result = this.getText().suffix(1) + } + + Location getLocation() { + py_comments(this, _, result) + + } + + string toString() { + result = "Comment " + this.getText() + } + + /** Gets this immediately following comment. + * Blanks line are allowed between this comment and the following comment, + * but code or other comments are not. + */ + Comment getFollowing() { + exists(File f, int n | + this.file_line(f, n) | + result.file_line(f, n+1) + or + result.file_line(f, n+2) and f.emptyLine(n+1) + or + result.file_line(f, n+3) and f.emptyLine(n+2) and f.emptyLine(n+1) + ) + } + + private predicate file_line(File f, int n) { + this.getLocation().getFile() = f and + this.getLocation().getStartLine() = n + } + +} + +private predicate comment_block_part(Comment start, Comment part, int i) { + not exists(Comment prev | prev.getFollowing() = part) and + exists(Comment following | part.getFollowing() = following) and + start = part and i = 1 + or + exists(Comment prev | + comment_block_part(start, prev, i-1) and + part = prev.getFollowing() + ) +} + +/** A block of consecutive comments */ +class CommentBlock extends @py_comment { + + CommentBlock() { + comment_block_part(this, _, _) + } + + private Comment last() { + comment_block_part(this, result, this.length()) + } + + string toString() { + result = "Comment block" + } + + /** The length of this comment block (in comments) */ + int length() { + result = max(int i | comment_block_part(this, _, i)) + } + + predicate hasLocationInfo(string filepath, int bl, int bc, int el, int ec) { + ((Comment)this).getLocation().hasLocationInfo(filepath, bl, bc, _, _) + and + exists(Comment end | + end = this.last() | + end.getLocation().hasLocationInfo(_, _, _, el, ec) + ) + } + + predicate contains(Comment c) { + comment_block_part(this, c, _) + or + this = c + } + + string getContents() { + result = concat(Comment c,int i | + comment_block_part(this, c, i) or this = c and i = 0 | + c.getContents() order by i + ) + } + +} + +/** A type-hint comment. Any comment that starts with `# type:` */ +class TypeHintComment extends Comment { + + TypeHintComment() { + this.getText().regexpMatch("# +type:.*") + } + +} + diff --git a/python/ql/src/semmle/python/Comparisons.qll b/python/ql/src/semmle/python/Comparisons.qll new file mode 100644 index 00000000000..4967456e7be --- /dev/null +++ b/python/ql/src/semmle/python/Comparisons.qll @@ -0,0 +1,482 @@ + +import python + +/* A class representing the six comparison operators, ==, !=, <, <=, > and >=. + * */ +class CompareOp extends int { + + CompareOp() { + this in [1..6] + } + + /** Gets the logical inverse operator */ + CompareOp invert() { + this = eq() and result = ne() or + this = ne() and result = eq() or + this = lt() and result = ge() or + this = gt() and result = le() or + this = le() and result = gt() or + this = ge() and result = lt() + } + + /** Gets the reverse operator (swapping the operands) */ + CompareOp reverse() { + this = eq() and result = eq() or + this = ne() and result = ne() or + this = lt() and result = gt() or + this = gt() and result = lt() or + this = le() and result = ge() or + this = ge() and result = le() + } + + string repr() { + this = eq() and result = "==" or + this = ne() and result = "!=" or + this = lt() and result = "<" or + this = gt() and result = ">" or + this = le() and result = "<=" or + this = ge() and result = ">=" + } + + predicate forOp(Cmpop op) { + op instanceof Eq and this = eq() or + op instanceof NotEq and this = ne() or + op instanceof Lt and this = lt() or + op instanceof LtE and this = le() or + op instanceof Gt and this = gt() or + op instanceof GtE and this = ge() + } + + /** Return this if isTrue is true, otherwise returns the inverse */ + CompareOp conditional(boolean isTrue) { + result = this and isTrue = true + or + result = this.invert() and isTrue = false + } + +} + +CompareOp eq() { result = 1 } +CompareOp ne() { result = 2 } +CompareOp lt() { result = 3 } +CompareOp le() { result = 4 } +CompareOp gt() { result = 5 } +CompareOp ge() { result = 6 } + +/* Workaround precision limits in floating point numbers */ +bindingset[x] private predicate ok_magnitude(float x) { + x > -9007199254740992.0 // -2**53 + and + x < 9007199254740992.0 // 2**53 +} + +bindingset[x,y] private float add(float x, float y) { + ok_magnitude(x) and + ok_magnitude(y) and + ok_magnitude(result) and + result = x + y +} + +bindingset[x,y] private float sub(float x, float y) { + ok_magnitude(x) and + ok_magnitude(y) and + ok_magnitude(result) and + result = x - y +} + +/** Normalise equality cmp into the form `left op right + k`. */ +private predicate test(ControlFlowNode cmp, ControlFlowNode left, CompareOp op, ControlFlowNode right, float k) { + simple_test(cmp, left, op, right) and k = 0 + or + add_test(cmp, left, op, right, k) + or + not_test(cmp, left, op, right, k) + or + subtract_test(cmp, left, op, right, k) + or + exists(float c | test(cmp, right, op.reverse(), left, c) and k = -c) +} + +/** Various simple tests in left op right + k form. */ +private predicate simple_test(CompareNode cmp, ControlFlowNode l, CompareOp cmpop, ControlFlowNode r) { + exists(Cmpop op | + cmp.operands(l, op, r) and cmpop.forOp(op) + ) +} + +private predicate add_test_left(CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k) { + exists(BinaryExprNode lhs, float c, float x, Num n | + lhs.getNode().getOp() instanceof Add and + test(cmp, lhs, op, r, c) and x = n.getN().toFloat() and k = sub(c, x) | + l = lhs.getLeft() and n = lhs.getRight().getNode() + or + l = lhs.getRight() and n = lhs.getLeft().getNode() + ) +} + +private predicate add_test_right(CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k) { + exists(BinaryExprNode rhs, float c, float x, Num n | + rhs.getNode().getOp() instanceof Add and + test(cmp, l, op, rhs, c) and x = n.getN().toFloat() and k = add(c, x) | + r = rhs.getLeft() and n = rhs.getRight().getNode() + or + r = rhs.getRight() and n = rhs.getLeft().getNode() + ) +} + +/* left + x op right + c => left op right + (c-x) + left op (right + x) + c => left op right + (c+x) */ +private predicate add_test(CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k) { + add_test_left(cmp, l, op, r, k) + or + add_test_right(cmp, l, op ,r, k) +} + +private predicate subtract_test_left(CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k) { + exists(BinaryExprNode lhs, float c, float x, Num n | + lhs.getNode().getOp() instanceof Sub and + test(cmp, lhs, op, r, c) and + l = lhs.getLeft() and n = lhs.getRight().getNode() and + x = n.getN().toFloat() | + k = add(c, x) + ) +} + +private predicate subtract_test_right(CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k) { + exists(BinaryExprNode rhs, float c, float x, Num n | + rhs.getNode().getOp() instanceof Sub and + test(cmp, l, op, rhs, c) and + r = rhs.getRight() and n = rhs.getLeft().getNode() and + x = n.getN().toFloat() | + k = sub(c, x) + ) +} + +/* left - x op right + c => left op right + (c+x) + left op (right - x) + c => left op right + (c-x) */ +private predicate subtract_test(CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k) { + subtract_test_left(cmp, l, op, r, k) + or + subtract_test_right(cmp, l, op, r, k) +} + +private predicate not_test(UnaryExprNode u, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k) { + u.getNode().getOp() instanceof Not + and + test(u.getOperand(), l, op.invert(), r, k) +} + + +/** A comparison which can be simplified to the canonical form `x OP y + k` where `x` and `y` are `ControlFlowNode`s, + * `k` is a floating point constant and `OP` is one of `<=`, `>`, `==` or `!=`. + */ +class Comparison extends ControlFlowNode { + + Comparison() { + test(this, _, _, _, _) + } + + /** Whether this condition tests `l op r + k` */ + predicate tests(ControlFlowNode l, CompareOp op, ControlFlowNode r, float k) { + test(this, l, op, r, k) + } + + /** Whether this condition tests `l op k` */ + predicate tests(ControlFlowNode l, CompareOp op, float k) { + exists(ControlFlowNode r, float x, float c | + test(this, l, op, r, c) | + x = r.getNode().(Num).getN().toFloat() and + k = add(c, x) + ) + } + + /* The following predicates determine whether this test, when its result is `thisIsTrue`, + * is equivalent to the predicate `v OP k` or `v1 OP v2 + k`. + * For example, the test `x <= y` being false, is equivalent to the predicate `x > y`. + */ + + private predicate equivalentToEq(boolean thisIsTrue, SsaVariable v, float k) { + this.tests(v.getAUse(), eq().conditional(thisIsTrue), k) + } + + private predicate equivalentToNotEq(boolean thisIsTrue, SsaVariable v, float k) { + this.tests(v.getAUse(), ne().conditional(thisIsTrue), k) + } + + private predicate equivalentToLt(boolean thisIsTrue, SsaVariable v, float k) { + this.tests(v.getAUse(), lt().conditional(thisIsTrue), k) + } + + private predicate equivalentToLtEq(boolean thisIsTrue, SsaVariable v, float k) { + this.tests(v.getAUse(), le().conditional(thisIsTrue), k) + } + + private predicate equivalentToGt(boolean thisIsTrue, SsaVariable v, float k) { + this.tests(v.getAUse(), gt().conditional(thisIsTrue), k) + } + + private predicate equivalentToGtEq(boolean thisIsTrue, SsaVariable v, float k) { + this.tests(v.getAUse(), ge().conditional(thisIsTrue), k) + } + + private predicate equivalentToEq(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) { + this.tests(v1.getAUse(), eq().conditional(thisIsTrue), v2.getAUse(), k) + } + + private predicate equivalentToNotEq(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) { + this.tests(v1.getAUse(), ne().conditional(thisIsTrue), v2.getAUse(), k) + } + + private predicate equivalentToLt(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) { + this.tests(v1.getAUse(), lt().conditional(thisIsTrue), v2.getAUse(), k) + } + + private predicate equivalentToLtEq(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) { + this.tests(v1.getAUse(), le().conditional(thisIsTrue), v2.getAUse(), k) + } + + private predicate equivalentToGt(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) { + this.tests(v1.getAUse(), gt().conditional(thisIsTrue), v2.getAUse(), k) + } + + private predicate equivalentToGtEq(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) { + this.tests(v1.getAUse(), ge().conditional(thisIsTrue), v2.getAUse(), k) + } + + /** Whether the result of this comparison being `thisIsTrue` implies that the result of `that` is `isThatTrue`. + * In other words, does the predicate that is equivalent to the result of `this` being `thisIsTrue` + * imply the predicate that is equivalent to the result of `that` being `thatIsTrue`. + * For example, assume that there are two tests, which when normalised have the form `x < y` and `x > y + 1`. + * Then the test `x < y` having a true result, implies that the test `x > y + 1` will have a false result. + * (`x < y` having a false result implies nothing about `x > y + 1`) + */ + predicate impliesThat(boolean thisIsTrue, Comparison that, boolean thatIsTrue) { + /* `v == k` => `v == k` */ + exists(SsaVariable v, float k1, float k2 | + this.equivalentToEq(thisIsTrue, v, k1) and + that.equivalentToEq(thatIsTrue, v, k2) and + eq(k1, k2) + or + this.equivalentToNotEq(thisIsTrue, v, k1) and + that.equivalentToNotEq(thatIsTrue, v, k2) and + eq(k1, k2) + ) + or + exists(SsaVariable v, float k1, float k2 | + /* `v < k1` => `v != k2` iff k1 <= k2 */ + this.equivalentToLt(thisIsTrue, v, k1) and + that.equivalentToNotEq(thatIsTrue, v, k2) and + le(k1, k2) + or + /* `v <= k1` => `v != k2` iff k1 < k2 */ + this.equivalentToLtEq(thisIsTrue, v, k1) and + that.equivalentToNotEq(thatIsTrue, v, k2) and + lt(k1, k2) + or + /* `v > k1` => `v != k2` iff k1 >= k2 */ + this.equivalentToGt(thisIsTrue, v, k1) and + that.equivalentToNotEq(thatIsTrue, v, k2) and + ge(k1, k2) + or + /* `v >= k1` => `v != k2` iff k1 > k2 */ + this.equivalentToGtEq(thisIsTrue, v, k1) and + that.equivalentToNotEq(thatIsTrue, v, k2) and + gt(k1, k2) + ) + or + exists(SsaVariable v, float k1, float k2 | + /* `v < k1` => `v < k2` iff k1 <= k2 */ + this.equivalentToLt(thisIsTrue, v, k1) and + that.equivalentToLt(thatIsTrue, v, k2) and + le(k1, k2) + or + /* `v < k1` => `v <= k2` iff k1 <= k2 */ + this.equivalentToLt(thisIsTrue, v, k1) and + that.equivalentToLtEq(thatIsTrue, v, k2) and + le(k1, k2) + or + /* `v <= k1` => `v < k2` iff k1 < k2 */ + this.equivalentToLtEq(thisIsTrue, v, k1) and + that.equivalentToLt(thatIsTrue, v, k2) and + lt(k1, k2) + or + /* `v <= k1` => `v <= k2` iff k1 <= k2 */ + this.equivalentToLtEq(thisIsTrue, v, k1) and + that.equivalentToLtEq(thatIsTrue, v, k2) and + le(k1, k2) + ) + or + exists(SsaVariable v, float k1, float k2 | + /* `v > k1` => `v >= k2` iff k1 >= k2 */ + this.equivalentToGt(thisIsTrue, v, k1) and + that.equivalentToGt(thatIsTrue, v, k2) and + ge(k1, k2) + or + /* `v > k1` => `v >= k2` iff k1 >= k2 */ + this.equivalentToGt(thisIsTrue, v, k1) and + that.equivalentToGtEq(thatIsTrue, v, k2) and + ge(k1, k2) + or + /* `v >= k1` => `v > k2` iff k1 > k2 */ + this.equivalentToGtEq(thisIsTrue, v, k1) and + that.equivalentToGt(thatIsTrue, v, k2) and + gt(k1, k2) + or + /* `v >= k1` => `v >= k2` iff k1 >= k2 */ + this.equivalentToGtEq(thisIsTrue, v, k1) and + that.equivalentToGtEq(thatIsTrue, v, k2) and + ge(k1, k2) + ) + or + exists(SsaVariable v1, SsaVariable v2, float k | + /* `v1 == v2 + k` => `v1 == v2 + k` */ + this.equivalentToEq(thisIsTrue, v1, v2, k) and + that.equivalentToEq(thatIsTrue, v1, v2, k) + or + this.equivalentToNotEq(thisIsTrue, v1, v2, k) and + that.equivalentToNotEq(thatIsTrue, v1, v2, k) + ) + or + exists(SsaVariable v1, SsaVariable v2, float k1, float k2 | + /* `v1 < v2 + k1` => `v1 != v2 + k2` iff k1 <= k2 */ + this.equivalentToLt(thisIsTrue, v1, v2, k1) and + that.equivalentToNotEq(thatIsTrue, v1, v2, k2) and + le(k1, k2) + or + /* `v1 <= v2 + k1` => `v1 != v2 + k2` iff k1 < k2 */ + this.equivalentToLtEq(thisIsTrue, v1, v2, k1) and + that.equivalentToNotEq(thatIsTrue, v1, v2, k2) and + lt(k1, k2) + or + /* `v1 > v2 + k1` => `v1 != v2 + k2` iff k1 >= k2 */ + this.equivalentToGt(thisIsTrue, v1, v2, k1) and + that.equivalentToNotEq(thatIsTrue, v1, v2, k2) and + ge(k1, k2) + or + /* `v1 >= v2 + k1` => `v1 != v2 + k2` iff k1 > k2 */ + this.equivalentToGtEq(thisIsTrue, v1, v2, k1) and + that.equivalentToNotEq(thatIsTrue, v1, v2, k2) and + gt(k1, k2) + ) + or + exists(SsaVariable v1, SsaVariable v2, float k1, float k2 | + /* `v1 <= v2 + k1` => `v1 <= v2 + k2` iff k1 <= k2 */ + this.equivalentToLtEq(thisIsTrue, v1, v2, k1) and + that.equivalentToLtEq(thatIsTrue, v1, v2, k2) and + le(k1, k2) + or + /* `v1 < v2 + k1` => `v1 <= v2 + k2` iff k1 <= k2 */ + this.equivalentToLt(thisIsTrue, v1, v2, k1) and + that.equivalentToLtEq(thatIsTrue, v1, v2, k2) and + le(k1, k2) + or + /* `v1 <= v2 + k1` => `v1 < v2 + k2` iff k1 < k2 */ + this.equivalentToLtEq(thisIsTrue, v1, v2, k1) and + that.equivalentToLt(thatIsTrue, v1, v2, k2) and + lt(k1, k2) + or + /* `v1 <= v2 + k1` => `v1 <= v2 + k2` iff k1 <= k2 */ + this.equivalentToLtEq(thisIsTrue, v1, v2, k1) and + that.equivalentToLtEq(thatIsTrue, v1, v2, k2) and + le(k1, k2) + ) + or + exists(SsaVariable v1, SsaVariable v2, float k1, float k2 | + /* `v1 > v2 + k1` => `v1 > v2 + k2` iff k1 >= k2 */ + this.equivalentToGt(thisIsTrue, v1, v2, k1) and + that.equivalentToGt(thatIsTrue, v1, v2, k2) and + ge(k1, k2) + or + /* `v1 > v2 + k1` => `v2 >= v2 + k2` iff k1 >= k2 */ + this.equivalentToGt(thisIsTrue, v1, v2, k1) and + that.equivalentToGtEq(thatIsTrue, v1, v2, k2) and + ge(k1, k2) + or + /* `v1 >= v2 + k1` => `v2 > v2 + k2` iff k1 > k2 */ + this.equivalentToGtEq(thisIsTrue, v1, v2, k1) and + that.equivalentToGt(thatIsTrue, v1, v2, k2) and + gt(k1, k2) + or + /* `v1 >= v2 + k1` => `v2 >= v2 + k2` iff k1 >= k2 */ + this.equivalentToGtEq(thisIsTrue, v1, v2, k1) and + that.equivalentToGtEq(thatIsTrue, v1, v2, k2) and + ge(k1, k2) + ) + } + +} + +/* Work around differences in floating-point comparisons between Python and QL */ +private predicate is_zero(float x) { + x = 0.0 + or + x = -0.0 +} + +bindingset[x,y] private predicate lt(float x, float y) { + if is_zero(x) then + y > 0 + else + x < y +} + +bindingset[x,y] private predicate eq(float x, float y) { + if is_zero(x) then + is_zero(y) + else + x = y +} + +bindingset[x,y] private predicate gt(float x, float y) { + lt(y, x) +} + +bindingset[x,y] private predicate le(float x, float y) { + lt(x, y) or eq(x, y) +} + +bindingset[x,y] private predicate ge(float x, float y) { + lt(y, x) or eq(x, y) +} + + +/** A basic block which terminates in a condition, splitting the subsequent control flow, + * in which the condition is an instance of `Comparison` + */ +class ComparisonControlBlock extends ConditionBlock { + + ComparisonControlBlock() { + this.getLastNode() instanceof Comparison + } + + /** Whether this conditional guard determines that, in block `b`, `l == r + k` if `eq` is true, or `l != r + k` if `eq` is false, */ + predicate controls(ControlFlowNode l, CompareOp op, ControlFlowNode r, float k, BasicBlock b) { + exists(boolean control | + this.controls(b, control) and this.getTest().tests(l, op, r, k) and control = true + or + this.controls(b, control) and this.getTest().tests(l, op.invert(), r, k) and control = false + ) + } + + /** Whether this conditional guard determines that, in block `b`, `l == r + k` if `eq` is true, or `l != r + k` if `eq` is false, */ + predicate controls(ControlFlowNode l, CompareOp op, float k, BasicBlock b) { + exists(boolean control | + this.controls(b, control) and this.getTest().tests(l, op, k) and control = true + or + this.controls(b, control) and this.getTest().tests(l, op.invert(), k) and control = false + ) + } + + Comparison getTest() { + this.getLastNode() = result + } + + /** Whether this conditional guard implies that, in block `b`, the result of `that` is `thatIsTrue` */ + predicate impliesThat(BasicBlock b, Comparison that, boolean thatIsTrue) { + exists(boolean controlSense | + this.controls(b, controlSense) and + this.getTest().impliesThat(controlSense, that, thatIsTrue) + ) + } + +} diff --git a/python/ql/src/semmle/python/Comprehensions.qll b/python/ql/src/semmle/python/Comprehensions.qll new file mode 100644 index 00000000000..eec5dd372f6 --- /dev/null +++ b/python/ql/src/semmle/python/Comprehensions.qll @@ -0,0 +1,154 @@ +import python + +/** Base class for list, set and dictionary comprehensions, and generator expressions. */ +abstract class Comp extends Expr { + + abstract Function getFunction(); + + /** Gets the iteration variable for the nth innermost generator of this list comprehension */ + Variable getIterationVariable(int n) { + result.getAnAccess() = this.getNthInnerLoop(n).getTarget() + } + + private For getNthInnerLoop(int n) { + n = 0 and result = this.getFunction().getStmt(0) + or + result = this.getNthInnerLoop(n-1).getStmt(0) + } + + /** Gets the iteration variable for a generator of this list comprehension */ + Variable getAnIterationVariable() { + result = this.getIterationVariable(_) + } + + /** Gets the scope in which the body of this list comprehension evaluates. */ + Scope getEvaluatingScope() { + result = this.getFunction() + } + + /** Gets the expression for elements of this comprehension. */ + Expr getElt() { + exists(Yield yield, Stmt body | + result = yield.getValue() and + body = this.getNthInnerLoop(_).getAStmt() | + yield = body.(ExprStmt).getValue() + or + yield = body.(If).getStmt(0).(ExprStmt).getValue() + ) + } + +} + +/** A list comprehension, such as `[ chr(x) for x in range(ord('A'), ord('Z')+1) ]` */ +class ListComp extends ListComp_, Comp { + + override Expr getASubExpression() { + result = this.getAGenerator().getASubExpression() or + result = this.getElt() or + result = this.getIterable() + } + + override AstNode getAChildNode() { + result = this.getAGenerator() or + result = this.getIterable() or + result = this.getFunction() + } + + override predicate hasSideEffects() { + any() + } + + /** Gets the scope in which the body of this list comprehension evaluates. */ + override Scope getEvaluatingScope() { + major_version() = 2 and result = this.getScope() + or + major_version() = 3 and result = this.getFunction() + } + + /** Gets the iteration variable for the nth innermost generator of this list comprehension */ + override Variable getIterationVariable(int n) { + result = Comp.super.getIterationVariable(n) + } + + override Function getFunction() { + result = ListComp_.super.getFunction() + } + + override string toString() { + result = ListComp_.super.toString() + } + + override Expr getElt() { + result = Comp.super.getElt() + } + +} + + +/** A set comprehension such as `{ v for v in "0123456789" }` */ +class SetComp extends SetComp_, Comp { + + override Expr getASubExpression() { + result = this.getIterable() + } + + override AstNode getAChildNode() { + result = this.getASubExpression() or + result = this.getFunction() + } + + override predicate hasSideEffects() { + any() + } + + override Function getFunction() { + result = SetComp_.super.getFunction() + } + +} + +/** A dictionary comprehension, such as `{ k:v for k, v in enumerate("0123456789") }` */ +class DictComp extends DictComp_, Comp { + + override Expr getASubExpression() { + result = this.getIterable() + } + + override AstNode getAChildNode() { + result = this.getASubExpression() or + result = this.getFunction() + } + + override predicate hasSideEffects() { + any() + } + + override Function getFunction() { + result = DictComp_.super.getFunction() + } + +} + + +/** A generator expression, such as `(var for var in iterable)` */ +class GeneratorExp extends GeneratorExp_, Comp { + + override Expr getASubExpression() { + result = this.getIterable() + } + + override AstNode getAChildNode() { + result = this.getASubExpression() or + result = this.getFunction() + } + + override predicate hasSideEffects() { + any() + } + + override Function getFunction() { + result = GeneratorExp_.super.getFunction() + } + +} + diff --git a/python/ql/src/semmle/python/Constants.qll b/python/ql/src/semmle/python/Constants.qll new file mode 100644 index 00000000000..c95f6bf9dc5 --- /dev/null +++ b/python/ql/src/semmle/python/Constants.qll @@ -0,0 +1,39 @@ +/** Standard builtin types and modules */ + +import python + +/** the Python major version number */ +int major_version() { + explicit_major_version(result) + or + not explicit_major_version(_) and + /* If there is more than one version, prefer 2 for backwards compatibilty */ + ( + if py_flags_versioned("version.major", "2", "2") then + result = 2 + else + result = 3 + ) +} + +/** the Python minor version number */ +int minor_version() { + exists(string v | py_flags_versioned("version.minor", v, major_version().toString()) | + result = v.toInt()) + +} + +/** the Python micro version number */ +int micro_version() { + exists(string v | py_flags_versioned("version.micro", v, major_version().toString()) | + result = v.toInt()) +} + +private predicate explicit_major_version(int v) { + exists(string version | + py_flags_versioned("language.version", version, _) | + version.charAt(0) = "2" and v = 2 + or + version.charAt(0) = "3" and v = 3 + ) +} diff --git a/python/ql/src/semmle/python/Exprs.qll b/python/ql/src/semmle/python/Exprs.qll new file mode 100644 index 00000000000..30f34b0a549 --- /dev/null +++ b/python/ql/src/semmle/python/Exprs.qll @@ -0,0 +1,854 @@ +import python +private import semmle.python.pointsto.PointsTo + +/** An expression */ +class Expr extends Expr_, AstNode { + + /** Gets the scope of this expression */ + override Scope getScope() { + py_scopes(this, result) + } + + override string toString() { + result = "Expression" + } + + /** Gets the module in which this expression occurs */ + Module getEnclosingModule() { + result = this.getScope().getEnclosingModule() + } + + /** Whether this expression defines variable `v` + * If doing dataflow, then consider using SsaVariable.getDefinition() for more precision. */ + predicate defines(Variable v) { + this.getASubExpression+().defines(v) + } + + /** Whether this expression may have a side effect (as determined purely from its syntax) */ + predicate hasSideEffects() { + /* If an exception raised by this expression handled, count that as a side effect */ + this.getAFlowNode().getASuccessor().getNode() instanceof ExceptStmt + or + this.getASubExpression().hasSideEffects() + } + + /** Whether this expression is a constant */ + predicate isConstant() { + not this.isVariable() + } + + /** Use isParenthesized instead. */ + override deprecated predicate isParenthesised() { + this.isParenthesized() + } + + /** Whether the parenthesized property of this expression is true. */ + predicate isParenthesized() { + Expr_.super.isParenthesised() + } + + private predicate isVariable() { + this.hasSideEffects() or + this instanceof Name or + exists(Expr e | e = this.getASubExpression() and e.isVariable()) + } + + override Location getLocation() { + result = Expr_.super.getLocation() + } + + /** Gets an immediate (non-nested) sub-expression of this expression */ + Expr getASubExpression() { + none() + } + + /** Use StrConst.getText() instead */ + deprecated string strValue() { + none() + } + + override AstNode getAChildNode() { + result = this.getASubExpression() + } + + /** Gets what this expression might "refer-to". Performs a combination of localized (intra-procedural) points-to + * analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly + * precise, but may not provide information for a significant number of flow-nodes. + * If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead. + * NOTE: For complex dataflow, involving multiple stages of points-to analysis, it may be more precise to use + * `ControlFlowNode.refersTo(...)` instead. + */ + predicate refersTo(Object value, ClassObject cls, AstNode origin) { + not py_special_objects(cls, "_semmle_unknown_type") + and + not value = unknownValue() + and + PointsTo::points_to(this.getAFlowNode(), _, value, cls, origin.getAFlowNode()) + } + + /** Gets what this expression might "refer-to" in the given `context`. + */ + predicate refersTo(Context context, Object value, ClassObject cls, AstNode origin) { + not py_special_objects(cls, "_semmle_unknown_type") + and + PointsTo::points_to(this.getAFlowNode(), context, value, cls, origin.getAFlowNode()) + } + + /** Whether this expression might "refer-to" to `value` which is from `origin` + * Unlike `this.refersTo(value, _, origin)`, this predicate includes results + * where the class cannot be inferred. + */ + predicate refersTo(Object value, AstNode origin) { + PointsTo::points_to(this.getAFlowNode(), _, value, _, origin.getAFlowNode()) + and + not value = unknownValue() + } + + /** Equivalent to `this.refersTo(value, _)` */ + predicate refersTo(Object value) { + PointsTo::points_to(this.getAFlowNode(), _, value, _, _) + and + not value = unknownValue() + } + +} + +/** An attribute expression, such as `value.attr` */ +class Attribute extends Attribute_ { + + override Expr getASubExpression() { + result = this.getObject() + } + + override AttrNode getAFlowNode() { result = super.getAFlowNode() } + + + /** Gets the name of this attribute. That is the `name` in `obj.name` */ + string getName() { + result = Attribute_.super.getAttr() + } + + /** Gets the object of this attribute. That is the `obj` in `obj.name` */ + Expr getObject() { + result = Attribute_.super.getValue() + } + + /** Gets the expression corresponding to the object of the attribute, if the name of the attribute is `name`. + * Equivalent to `this.getObject() and this.getName() = name`. */ + Expr getObject(string name) { + result = Attribute_.super.getValue() and + name = Attribute_.super.getAttr() + } + +} + +/** A subscript expression, such as `value[slice]` */ +class Subscript extends Subscript_ { + + override Expr getASubExpression() { + result = this.getIndex() + or + result = this.getObject() + } + + Expr getObject() { + result = Subscript_.super.getValue() + } + + override SubscriptNode getAFlowNode() { result = super.getAFlowNode() } +} + +/** A call expression, such as `func(...)` */ +class Call extends Call_ { + + override Expr getASubExpression() { + result = this.getAPositionalArg() or + result = this.getAKeyword().getValue() or + result = this.getFunc() + } + + override predicate hasSideEffects() { + any() + } + + override string toString() { + result = this.getFunc().toString() + "()" + } + + override CallNode getAFlowNode() { result = super.getAFlowNode() } + + /** Gets a tuple (*) argument of this class definition. */ + Expr getStarargs() { + result = this.getAPositionalArg().(Starred).getValue() + } + + /** Gets a dictionary (**) argument of this class definition. */ + Expr getKwargs() { + result = this.getANamedArg().(DictUnpacking).getValue() + } + + /* Backwards compatibility */ + + /** Gets the nth keyword argument of this call expression, provided it is not preceded by a double-starred argument. + * This exists primarily for backwards compatibility. You are recommended to use + * Call.getNamedArg(index) instead. + * */ + Keyword getKeyword(int index) { + result = this.getNamedArg(index) and not exists(DictUnpacking d, int lower | d = this.getNamedArg(lower) and lower < index) + } + + /** Gets a keyword argument of this call expression, provided it is not preceded by a double-starred argument. + * This exists primarily for backwards compatibility. You are recommended to use + * Call.getANamedArg() instead. + * */ + Keyword getAKeyword() { + result = this.getKeyword(_) + } + + /** Gets the positional argument at `index`, provided it is not preceded by a starred argument. + * This exists primarily for backwards compatibility. You are recommended to use + * Call.getPositionalArg(index) instead. + */ + Expr getArg(int index) { + result = this.getPositionalArg(index) and + not result instanceof Starred and + not exists(Starred s, int lower | s = this.getPositionalArg(lower) and lower < index) + } + + /** Gets a positional argument, provided it is not preceded by a starred argument. + * This exists primarily for backwards compatibility. You are recommended to use + * Call.getAPositionalArg() instead. + */ + Expr getAnArg() { + result = this.getArg(_) + } + + override AstNode getAChildNode() { + result = this.getAPositionalArg() or + result = this.getANamedArg() or + result = this.getFunc() + } + + /** Gets the name of a named argument, including those passed in dict literals. */ + string getANamedArgumentName() { + result = this.getAKeyword().getArg() + or + result = this.getKwargs().(Dict).getAKey().(StrConst).getText() + } + +} + +/** A conditional expression such as, `body if test else orelse` */ +class IfExp extends IfExp_ { + + override Expr getASubExpression() { + result = this.getTest() or result = this.getBody() or result = this.getOrelse() + } + + override IfExprNode getAFlowNode() { result = super.getAFlowNode() } +} + +/** A starred expression, such as the `*rest` in the assignment `first, *rest = seq` */ +class Starred extends Starred_ { + + override Expr getASubExpression() { + result = this.getValue() + } + +} + + +/** A yield expression, such as `yield value` */ +class Yield extends Yield_ { + + override Expr getASubExpression() { + result = this.getValue() + } + + override predicate hasSideEffects() { + any() + } + +} + +/** A yield expression, such as `yield from value` */ +class YieldFrom extends YieldFrom_ { + + override Expr getASubExpression() { + result = this.getValue() + } + + override predicate hasSideEffects() { + any() + } + +} + +/** A repr (backticks) expression, such as `` `value` `` */ +class Repr extends Repr_ { + + override Expr getASubExpression() { + result = this.getValue() + } + + override predicate hasSideEffects() { + any() + } + +} + +/* Constants */ + +/** A bytes constant, such as `b'ascii'`. Note that unadorned string constants such as + `"hello"` are treated as Bytes for Python2, but Unicode for Python3. */ +class Bytes extends StrConst { + + Bytes() { + not this.isUnicode() + } + + override Object getLiteralObject() { + py_cobjecttypes(result, theBytesType()) and + py_cobjectnames(result, this.quotedString()) + } + + /** The extractor puts quotes into the name of each string (to prevent "0" clashing with 0). + * The following predicate help us match up a string/byte literals in the source + * which the equivalent object. + */ + private string quotedString() { + exists(string b_unquoted | + b_unquoted = this.getS() | + result = "b'" + b_unquoted + "'" + ) + } + +} + +/** An ellipsis expression, such as `...` */ +class Ellipsis extends Ellipsis_ { + + override Expr getASubExpression() { + none() + } + +} + +/** Immutable literal expressions (except tuples). + * Consists of string (both unicode and byte) literals + * and numeric literals. + */ +abstract class ImmutableLiteral extends Expr { + + abstract Object getLiteralObject(); + + abstract boolean booleanValue(); + +} + +/** A numerical constant expression, such as `7` or `4.2` */ +abstract class Num extends Num_, ImmutableLiteral { + + override Expr getASubExpression() { + none() + } + + /* We want to declare this abstract, but currently we cannot. */ + override string toString() { + none() + } + +} + +/** An integer numeric constant, such as `7` or `0x9` */ +class IntegerLiteral extends Num { + + IntegerLiteral() { + not this instanceof FloatLiteral and not this instanceof ImaginaryLiteral + } + + /** Gets the (integer) value of this constant. Will not return a result if the value does not fit into + a 32 bit signed value */ + int getValue() { + result = this.getN().toInt() + } + + override string toString() { + result = "IntegerLiteral" + } + + override Object getLiteralObject() { + py_cobjecttypes(result, theIntType()) and py_cobjectnames(result, this.getN()) + or + py_cobjecttypes(result, theLongType()) and py_cobjectnames(result, this.getN()) + } + + override boolean booleanValue() { + this.getValue() = 0 and result = false + or + this.getValue() != 0 and result = true + } + +} + +/** A floating point numeric constant, such as `0.4` or `4e3` */ +class FloatLiteral extends Num { + + FloatLiteral() { + not this instanceof ImaginaryLiteral and + exists(string n | n = this.getN() | n.charAt(_) = "." or n.charAt(_) = "e" or n.charAt(_) = "E") + } + + float getValue() { + result = this.getN().toFloat() + } + + override string toString() { + result = "FloatLiteral" + } + + override Object getLiteralObject() { + py_cobjecttypes(result, theFloatType()) and py_cobjectnames(result, this.getN()) + } + + override boolean booleanValue() { + this.getValue() = 0.0 and result = false + or + // In QL 0.0 != -0.0 + this.getValue() = -0.0 and result = false + or + this.getValue() != 0.0 and this.getValue() != -0.0 and result = true + } + +} + +/** An imaginary numeric constant, such as `3j` */ +class ImaginaryLiteral extends Num { + + ImaginaryLiteral() { + exists(string n | n = this.getN() | n.charAt(_) = "j") + } + + /** Gets the value of this constant as a floating point value */ + float getValue() { + exists(string s, int j | s = this.getN() and s.charAt(j) = "j" | + result = s.prefix(j).toFloat()) + } + + override string toString() { + result = "ImaginaryLiteral" + } + + override Object getLiteralObject() { + py_cobjecttypes(result, theComplexType()) and py_cobjectnames(result, this.getN()) + } + + override boolean booleanValue() { + this.getValue() = 0.0 and result = false + or + // In QL 0.0 != -0.0 + this.getValue() = -0.0 and result = false + or + this.getValue() != 0.0 and this.getValue() != -0.0 and result = true + } + +} + +/** A unicode string expression, such as `u"\u20ac"`. Note that unadorned string constants such as + "hello" are treated as Bytes for Python2, but Unicode for Python3. */ +class Unicode extends StrConst { + + Unicode() { + this.isUnicode() + } + + override Object getLiteralObject() { + py_cobjecttypes(result, theUnicodeType()) and + py_cobjectnames(result, this.quotedString()) + } + + /** The extractor puts quotes into the name of each string (to prevent "0" clashing with 0). + * The following predicate help us match up a string/byte literals in the source + * which the equivalent object. + */ + string quotedString() { + exists(string u_unquoted | + u_unquoted = this.getS() | + result = "u'" + u_unquoted + "'" + ) + } + +} + + +/* Compound Values */ + +/** A dictionary expression, such as `{'key':'value'}` */ +class Dict extends Dict_ { + + /** Gets the value of an item of this dict display */ + Expr getAValue() { + result = this.getAnItem().(DictDisplayItem).getValue() + } + + /** Gets the key of an item of this dict display, for those items that have keys + * E.g, in {'a':1, **b} this returns only 'a' + */ + Expr getAKey() { + result = this.getAnItem().(KeyValuePair).getKey() + } + + override Expr getASubExpression() { + result = this.getAValue() or result = this.getAKey() + } + + AstNode getAChildNode() { + result = this.getAnItem() + } + +} + +/** A list expression, such as `[ 1, 3, 5, 7, 9 ]` */ +class List extends List_ { + + override Expr getASubExpression() { + result = this.getAnElt() + } + +} + +/** A set expression such as `{ 1, 3, 5, 7, 9 }` */ +class Set extends Set_ { + + override Expr getASubExpression() { + result = this.getAnElt() + } + +} + +class PlaceHolder extends PlaceHolder_ { + + string getId() { + result = this.getVariable().getId() + } + + override Expr getASubExpression() { + none() + } + + override string toString() { + result = "$" + this.getId() + } + + override NameNode getAFlowNode() { result = super.getAFlowNode() } +} + +/** A tuple expression such as `( 1, 3, 5, 7, 9 )` */ +class Tuple extends Tuple_ { + + override Expr getASubExpression() { + result = this.getAnElt() + } + +} + +/** A (plain variable) name expression, such as `var`. + * `None`, `True` and `False` are excluded. + */ +class Name extends Name_ { + + string getId() { + result = this.getVariable().getId() + } + + /** Whether this expression is a definition */ + predicate isDefinition() { + py_expr_contexts(_, 5, this) or + /* Treat Param as a definition (which it is) */ + py_expr_contexts(_, 4, this) or + /* The target in an augmented assignment is also a definition (and a use) */ + exists(AugAssign aa | aa.getTarget() = this) + } + + /** Whether this expression defines variable `v` + * If doing dataflow, then consider using SsaVariable.getDefinition() for more precision. */ + override predicate defines(Variable v) { + this.isDefinition() + and + v = this.getVariable() + } + + /** Whether this expression is a definition */ + predicate isDeletion() { + py_expr_contexts(_, 2, this) + } + + /** Whether this expression deletes variable `v`. + * If doing dataflow, then consider using SsaVariable.getDefinition() for more precision. */ + predicate deletes(Variable v) { + this.isDeletion() + and + v = this.getVariable() + } + + /** Whether this expression is a use */ + predicate isUse() { + py_expr_contexts(_, 3, this) + } + + /** Whether this expression is a use of variable `v` + * If doing dataflow, then consider using SsaVariable.getAUse() for more precision. */ + predicate uses(Variable v) { + this.isUse() + and + v = this.getVariable() + } + + override predicate isConstant() { + none() + } + + override Expr getASubExpression() { + none() + } + + override string toString() { + result = this.getId() + } + + override NameNode getAFlowNode() { result = super.getAFlowNode() } + + override predicate isArtificial() { + /* Artificial variable names in comprehensions all start with "." */ + this.getId().charAt(0) = "." + } + +} + +class Filter extends Filter_ { + + override Expr getASubExpression() { + result = this.getFilter() + or + result = this.getValue() + } + +} + + +/** A slice. E.g `0:1` in the expression `x[0:1]` */ +class Slice extends Slice_ { + + override Expr getASubExpression() { + result = this.getStart() or + result = this.getStop() or + result = this.getStep() + } + +} + +/** A string constant. */ +class StrConst extends Str_, ImmutableLiteral { + + predicate isUnicode() { + this.getPrefix().charAt(_) = "u" + or + this.getPrefix().charAt(_) = "U" + or + not this.getPrefix().charAt(_) = "b" and major_version() = 3 + or + not this.getPrefix().charAt(_) = "b" and this.getEnclosingModule().hasFromFuture("unicode_literals") + } + + override + string strValue() { + result = this.getS() + } + + override Expr getASubExpression() { + none() + } + + override AstNode getAChildNode() { + result = this.getAnImplicitlyConcatenatedPart() + } + + /** Gets the text of this str constant */ + string getText() { + result = this.getS() + } + + /** Whether this is a docstring */ + predicate isDocString() { + exists(Scope s | s.getDocString() = this) + } + + override boolean booleanValue() { + this.getText() = "" and result = false + or + this.getText() != "" and result = true + } + + override Object getLiteralObject() { none() } + +} + +private predicate name_consts(Name_ n, string id) { + exists(Variable v | + py_variables(v, n) and id = v.getId() | + id = "True" or id = "False" or id = "None" + ) +} + +/** A named constant, one of `None`, `True` or `False` */ +abstract class NameConstant extends Name, ImmutableLiteral { + + NameConstant() { + name_consts(this, _) + } + + override Expr getASubExpression() { + none() + } + + override string toString() { + name_consts(this, result) + } + + override predicate isConstant() { + any() + } + + override NameConstantNode getAFlowNode() { result = Name.super.getAFlowNode() } + + override predicate isArtificial() { + none() + } + +} + +/** A boolean named constant, either `True` or `False` */ +abstract class BooleanLiteral extends NameConstant { + +} + +/** The boolean named constant `True` */ +class True extends BooleanLiteral { + + True() { + name_consts(this, "True") + } + + override Object getLiteralObject() { + name_consts(this, "True") and result = theTrueObject() + } + + override boolean booleanValue() { + result = true + } + +} + +/** The boolean named constant `False` */ +class False extends BooleanLiteral { + + False() { + name_consts(this, "False") + } + + override Object getLiteralObject() { + name_consts(this, "False") and result = theFalseObject() + } + + override boolean booleanValue() { + result = false + } + +} + +/** `None` */ +class None extends NameConstant { + + None() { + name_consts(this, "None") + } + + override Object getLiteralObject() { + name_consts(this, "None") and result = theNoneObject() + } + + override boolean booleanValue() { + result = false + } + +} + +/** An await expression such as `await coro`. */ +class Await extends Await_ { + + override Expr getASubExpression() { + result = this.getValue() + } + +} + +/** A formatted string literal expression, such as `f'hello {world!s}'` */ +class Fstring extends Fstring_ { + + override Expr getASubExpression() { + result = this.getAValue() + } + +} + +/** A formatted value (within a formatted string literal). + * For example, in the string `f'hello {world!s}'` the formatted value is `world!s`. + */ +class FormattedValue extends FormattedValue_ { + + override Expr getASubExpression() { + result = this.getValue() or + result = this.getFormatSpec() + } + + +} + +/* Expression Contexts */ + +/** A context in which an expression used */ +class ExprContext extends ExprContext_ { + +} + +/** Load context, the context of var in len(var) */ +class Load extends Load_ { + +} + +/** Store context, the context of var in var = 0 */ +class Store extends Store_ { + +} + +/** Delete context, the context of var in del var */ +class Del extends Del_ { + +} + +/** This is an artifact of the Python grammar which includes an AugLoad context, even though it is never used. */ +library class AugLoad extends AugLoad_ { + +} + +/** Augmented store context, the context of var in var += 1 */ +class AugStore extends AugStore_ { + +} + +/** Parameter context, the context of var in def f(var): pass */ +class Param extends Param_ { + +} + + diff --git a/python/ql/src/semmle/python/Files.qll b/python/ql/src/semmle/python/Files.qll new file mode 100644 index 00000000000..c743b0373c5 --- /dev/null +++ b/python/ql/src/semmle/python/Files.qll @@ -0,0 +1,500 @@ + +import python + +/** A file */ +class File extends Container { + + File() { + files(this, _, _, _, _) + } + + /** DEPRECATED: Use `getAbsolutePath` instead. */ + override string getName() { + files(this, result, _, _, _) + } + + /** DEPRECATED: Use `getAbsolutePath` instead. */ + string getFullName() { + result = getName() + } + + predicate hasLocationInfo(string filepath, int bl, int bc, int el, int ec) { + this.getName() = filepath and bl = 0 and bc = 0 and el = 0 and ec = 0 + } + + /** Whether this file is a source code file. */ + predicate fromSource() { + /* If we start to analyse .pyc files, then this will have to change. */ + any() + } + + /** Gets a short name for this file (just the file name) */ + string getShortName() { + exists(string simple, string ext | files(this, _, simple, ext, _) | + result = simple + ext) + } + + private int lastLine() { + result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i)) + } + + /** Whether line n is empty (it contains neither code nor comment). */ + predicate emptyLine(int n) { + n in [0..this.lastLine()] + and + not occupied_line(this, n) + } + + string getSpecifiedEncoding() { + exists(Comment c, Location l | + l = c.getLocation() and l.getFile() = this | + l.getStartLine() < 3 and + result = c.getText().regexpCapture(".*coding[:=]\\s*([-\\w.]+).*", 1) + ) + } + + override string getAbsolutePath() { + files(this, result, _, _, _) + } + + /** Gets the URL of this file. */ + override string getURL() { + result = "file://" + this.getAbsolutePath() + ":0:0:0:0" + } + + override Container getImportRoot(int n) { + /* File stem must be a legal Python identifier */ + this.getStem().regexpMatch("[^\\d\\W]\\w*") and + result = this.getParent().getImportRoot(n) + } + + /** Gets the contents of this file as a string. + * This will only work for those non-python files that + * are specified to be extracted. + */ + string getContents() { + file_contents(this, result) + } + +} + +private predicate occupied_line(File f, int n) { + exists(Location l | + l.getFile() = f | + l.getStartLine() = n + or + exists(StrConst s | s.getLocation() = l | + n in [l.getStartLine() .. l.getEndLine()] + ) + ) +} + +/** A folder (directory) */ +class Folder extends Container { + + Folder() { + folders(this, _, _) + } + + /** DEPRECATED: Use `getAbsolutePath` instead. */ + override string getName() { + folders(this, result, _) + } + + /** DEPRECATED: Use `getBaseName` instead. */ + string getSimple() { + folders(this, _, result) + } + + predicate hasLocationInfo(string filepath, int bl, int bc, int el, int ec) { + this.getName() = filepath and bl = 0 and bc = 0 and el = 0 and ec = 0 + } + + override string getAbsolutePath() { + folders(this, result, _) + } + + /** Gets the URL of this folder. */ + override string getURL() { + result = "folder://" + this.getAbsolutePath() + } + + override Container getImportRoot(int n) { + this.isImportRoot(n) and result = this + or + /* Folder must be a legal Python identifier */ + this.getBaseName().regexpMatch("[^\\d\\W]\\w*") and + result = this.getParent().getImportRoot(n) + } + +} + +/** A container is an abstract representation of a file system object that can + hold elements of interest. */ +abstract class Container extends @container { + + Container getParent() { + containerparent(result, this) + } + + /** Gets a child of this container */ + deprecated Container getChild() { + containerparent(this, result) + } + + /** + * Gets a textual representation of the path of this container. + * + * This is the absolute path of the container. + */ + string toString() { + result = this.getAbsolutePath() + } + + /** Gets the name of this container */ + abstract string getName(); + + /** + * Gets the relative path of this file or folder from the root folder of the + * analyzed source location. The relative path of the root folder itself is + * the empty string. + * + * This has no result if the container is outside the source root, that is, + * if the root folder is not a reflexive, transitive parent of this container. + */ + string getRelativePath() { + exists (string absPath, string pref | + absPath = this.getAbsolutePath() and sourceLocationPrefix(pref) | + absPath = pref and result = "" + or + absPath = pref.regexpReplaceAll("/$", "") + "/" + result and + not result.matches("/%") + ) + } + + /** Whether this file or folder is part of the standard library */ + predicate inStdlib() { + this.inStdlib(_, _) + } + + /** Whether this file or folder is part of the standard library + * for version `major.minor` + */ + predicate inStdlib(int major, int minor) { + exists(Module m | + m.getPath() = this and + m.inStdLib(major, minor) + ) + } + + /* Standard cross-language API */ + + /** Gets a file or sub-folder in this container. */ + Container getAChildContainer() { + containerparent(this, result) + } + + /** Gets a file in this container. */ + File getAFile() { + result = this.getAChildContainer() + } + + /** Gets a sub-folder in this container. */ + Folder getAFolder() { + result = this.getAChildContainer() + } + + /** + * Gets the absolute, canonical path of this container, using forward slashes + * as path separator. + * + * The path starts with a _root prefix_ followed by zero or more _path + * segments_ separated by forward slashes. + * + * The root prefix is of one of the following forms: + * + * 1. A single forward slash `/` (Unix-style) + * 2. An upper-case drive letter followed by a colon and a forward slash, + * such as `C:/` (Windows-style) + * 3. Two forward slashes, a computer name, and then another forward slash, + * such as `//FileServer/` (UNC-style) + * + * Path segments are never empty (that is, absolute paths never contain two + * contiguous slashes, except as part of a UNC-style root prefix). Also, path + * segments never contain forward slashes, and no path segment is of the + * form `.` (one dot) or `..` (two dots). + * + * Note that an absolute path never ends with a forward slash, except if it is + * a bare root prefix, that is, the path has no path segments. A container + * whose absolute path has no segments is always a `Folder`, not a `File`. + */ + abstract string getAbsolutePath(); + + /** + * Gets the base name of this container including extension, that is, the last + * segment of its absolute path, or the empty string if it has no segments. + * + * Here are some examples of absolute paths and the corresponding base names + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + * + *
    Absolute pathBase name
    "/tmp/tst.py""tst.py"
    "C:/Program Files (x86)""Program Files (x86)"
    "/"""
    "C:/"""
    "D:/"""
    "//FileServer/"""
    + */ + string getBaseName() { + result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1) + } + + /** + * Gets the extension of this container, that is, the suffix of its base name + * after the last dot character, if any. + * + * In particular, + * + * - if the name does not include a dot, there is no extension, so this + * predicate has no result; + * - if the name ends in a dot, the extension is the empty string; + * - if the name contains multiple dots, the extension follows the last dot. + * + * Here are some examples of absolute paths and the corresponding extensions + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + *
    Absolute pathExtension
    "/tmp/tst.py""py"
    "/tmp/.gitignore""gitignore"
    "/bin/bash"not defined
    "/tmp/tst2."""
    "/tmp/x.tar.gz""gz"
    + */ + string getExtension() { + result = getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) + } + + /** + * Gets the stem of this container, that is, the prefix of its base name up to + * (but not including) the last dot character if there is one, or the entire + * base name if there is not. + * + * Here are some examples of absolute paths and the corresponding stems + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + *
    Absolute pathStem
    "/tmp/tst.py""tst"
    "/tmp/.gitignore"""
    "/bin/bash""bash"
    "/tmp/tst2.""tst2"
    "/tmp/x.tar.gz""x.tar"
    + */ + string getStem() { + result = getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) + } + + File getFile(string baseName) { + result = this.getAFile() and + result.getBaseName() = baseName + } + + Folder getFolder(string baseName) { + result = this.getAFolder() and + result.getBaseName() = baseName + } + + Container getParentContainer() { + this = result.getAChildContainer() + } + + Container getChildContainer(string baseName) { + result = this.getAChildContainer() and + result.getBaseName() = baseName + } + + /** + * Gets a URL representing the location of this container. + * + * For more information see https://lgtm.com/help/ql/locations#providing-urls. + */ + abstract string getURL(); + + /** Holds if this folder is on the import path. */ + predicate isImportRoot() { + this.isImportRoot(_) + } + + /** Holds if this folder is on the import path, at index `n` in the list of + * paths. The list of paths is composed of the paths passed to the extractor and + * `sys.path`. */ + predicate isImportRoot(int n) { + this.getName() = import_path_element(n) + } + + /** Holds if this folder is the root folder for the standard library. */ + predicate isStdLibRoot(int major, int minor) { + allowable_version(major, minor) and + this.isImportRoot() and + this.getBaseName().regexpMatch("python" + major + "." + minor) + } + + /** Gets the path element from which this container would be loaded. */ + Container getImportRoot() { + exists(int n | + result = this.getImportRoot(n) and + not exists(int m | + exists(this.getImportRoot(m)) and + m < n + ) + ) + } + + /** Gets the path element from which this container would be loaded, given the index into the list of possible paths `n`. */ + abstract Container getImportRoot(int n); + +} + +private string import_path_element(int n) { + exists(string path, string pathsep, int k | + path = get_path("extractor.path") and k = 0 + or + path = get_path("sys.path") and k = count(get_path("extractor.path").splitAt(pathsep)) + | + py_flags_versioned("os.pathsep", pathsep, _) and + result = path.splitAt(pathsep, n-k).replaceAll("\\", "/") + ) +} + +private string get_path(string name) { + py_flags_versioned(name, result, _) +} + +private predicate allowable_version(int major, int minor) { + major = 2 and minor in [6..7] + or + major = 3 and minor in [3..6] +} + +class Location extends @location { + + /** Gets the file for this location */ + File getFile() { + locations_default(this, result, _, _, _, _) + or + exists(Module m | locations_ast(this, m, _, _, _, _) | + result = m.getFile() + ) + } + + /** Gets the start line of this location */ + int getStartLine() { + locations_default(this, _, result, _, _, _) + or locations_ast(this,_,result,_,_,_) + } + + /** Gets the start column of this location */ + int getStartColumn() { + locations_default(this, _, _, result, _, _) + or locations_ast(this, _, _, result, _, _) + } + + /** Gets the end line of this location */ + int getEndLine() { + locations_default(this, _, _, _, result, _) + or locations_ast(this, _, _, _, result, _) + } + + /** Gets the end column of this location */ + int getEndColumn() { + locations_default(this, _, _, _, _, result) + or locations_ast(this, _, _, _, _, result) + } + + string toString() { + result = this.getFile().getName() + ":" + this.getStartLine().toString() + } + + predicate hasLocationInfo(string filepath, int bl, int bc, int el, int ec) { + exists(File f | f.getName() = filepath | + locations_default(this, f, bl, bc, el, ec) + or + exists(Module m | m.getFile() = f | + locations_ast(this, m, bl, bc, el, ec)) + ) + } + +} + +/** A non-empty line in the source code */ +class Line extends @py_line { + + predicate hasLocationInfo(string filepath, int bl, int bc, int el, int ec) { + exists(Module m | m.getFile().getName() = filepath and + el = bl and bc = 1 and + py_line_lengths(this, m, bl, ec)) + } + + string toString() { + exists(Module m | py_line_lengths(this, m, _, _) | + result = m.getFile().getShortName() + ":" + this.getLineNumber().toString() + ) + } + + /** Gets the line number of this line */ + int getLineNumber() { + py_line_lengths(this, _, result, _) + } + + /** Gets the length of this line */ + int getLength() { + py_line_lengths(this, _, _, result) + } + + /** Gets the file for this line */ + Module getModule() { + py_line_lengths(this, result, _, _) + } + +} + +/** A syntax error. Note that if there is a syntax error in a module, + much information about that module will be lost */ +class SyntaxError extends Location { + + SyntaxError() { + py_syntax_error_versioned(this, _, major_version().toString()) + } + + override string toString() { + result = "Syntax Error" + } + + /** Gets the message corresponding to this syntax error */ + string getMessage() { + py_syntax_error_versioned(this, result, major_version().toString()) + } + +} + +/** An encoding error. Note that if there is an encoding error in a module, + much information about that module will be lost */ +class EncodingError extends SyntaxError { + + EncodingError() { + /* Leave spaces around 'decode' in unlikely event it occurs as a name in a syntax error */ + this.getMessage().toLowerCase().matches("% decode %") + } + + override string toString() { + result = "Encoding Error" + } + +} + + diff --git a/python/ql/src/semmle/python/Flow.qll b/python/ql/src/semmle/python/Flow.qll new file mode 100755 index 00000000000..a465d88533a --- /dev/null +++ b/python/ql/src/semmle/python/Flow.qll @@ -0,0 +1,1047 @@ +import python +import semmle.python.flow.NameNode +private import semmle.python.pointsto.PointsTo + + +/* Note about matching parent and child nodes and CFG splitting: + * + * As a result of CFG splitting a single AST node may have multiple CFG nodes. + * Therefore, when matching CFG nodes to children, we need to make sure that + * we don't match the child of one CFG node to the wrong parent. + * We do this by checking dominance. If the CFG node for the parent precedes that of + * the child, then he child node matches the parent node if it is dominated by it. + * Vice versa for child nodes that precede the parent. + */ + + +private predicate augstore(ControlFlowNode load, ControlFlowNode store) { + exists(Expr load_store | exists(AugAssign aa | aa.getTarget() = load_store) | + toAst(load) = load_store and + toAst(store) = load_store and + load.strictlyDominates(store) + ) +} + +/** A non-dispatched getNode() to avoid negative recursion issues */ +private AstNode toAst(ControlFlowNode n) { + py_flow_bb_node(n, result, _, _) +} + +/** A control flow node. Control flow nodes have a many-to-one relation with syntactic nodes, + * although most syntactic nodes have only one corresponding control flow node. +* Edges between control flow nodes include exceptional as well as normal control flow. +*/ +class ControlFlowNode extends @py_flow_node { + + /** Whether this control flow node is a load (including those in augmented assignments) */ + predicate isLoad() { + exists(Expr e | e = toAst(this) | py_expr_contexts(_, 3, e) and not augstore(_, this)) + } + + /** Whether this control flow node is a store (including those in augmented assignments) */ + predicate isStore() { + exists(Expr e | e = toAst(this) | py_expr_contexts(_, 5, e) or augstore(_, this)) + } + + /** Whether this control flow node is a delete */ + predicate isDelete() { + exists(Expr e | e = toAst(this) | py_expr_contexts(_, 2, e)) + } + + /** Whether this control flow node is a parameter */ + predicate isParameter() { + exists(Expr e | e = toAst(this) | py_expr_contexts(_, 4, e)) + } + + /** Whether this control flow node is a store in an augmented assignment */ + predicate isAugStore() { + augstore(_, this) + } + + /** Whether this control flow node is a load in an augmented assignment */ + predicate isAugLoad() { + augstore(this, _) + } + + /** Whether this flow node corresponds to a literal */ + predicate isLiteral() { + toAst(this) instanceof Bytes + or + toAst(this) instanceof Dict + or + toAst(this) instanceof DictComp + or + toAst(this) instanceof Set + or + toAst(this) instanceof SetComp + or + toAst(this) instanceof Ellipsis + or + toAst(this) instanceof GeneratorExp + or + toAst(this) instanceof Lambda + or + toAst(this) instanceof ListComp + or + toAst(this) instanceof List + or + toAst(this) instanceof Num + or + toAst(this) instanceof Tuple + or + toAst(this) instanceof Unicode + or + toAst(this) instanceof NameConstant + } + + /** Use NameNode.isLoad() instead */ + deprecated predicate isUse() { + toAst(this) instanceof Name and this.isLoad() + } + + /** Use NameNode.isStore() */ + deprecated predicate isDefinition() { + toAst(this) instanceof Name and this.isStore() + } + + /** Whether this flow node corresponds to an attribute expression */ + predicate isAttribute() { + toAst(this) instanceof Attribute + } + + /** Use AttrNode.isLoad() instead */ + deprecated predicate isAttributeLoad() { + toAst(this) instanceof Attribute and this.isLoad() + } + + /** Use AttrNode.isStore() instead */ + deprecated predicate isAttributeStore() { + toAst(this) instanceof Attribute and this.isStore() + } + + /** Whether this flow node corresponds to an subscript expression */ + predicate isSubscript() { + toAst(this) instanceof Subscript + } + + /** Use SubscriptNode.isLoad() instead */ + deprecated predicate isSubscriptLoad() { + toAst(this) instanceof Subscript and this.isLoad() + } + + /** Use SubscriptNode.isStore() instead */ + deprecated predicate isSubscriptStore() { + toAst(this) instanceof Subscript and this.isStore() + } + + /** Whether this flow node corresponds to an import member */ + predicate isImportMember() { + toAst(this) instanceof ImportMember + } + + /** Whether this flow node corresponds to a call */ + predicate isCall() { + toAst(this) instanceof Call + } + + /** Whether this flow node is the first in a module */ + predicate isModuleEntry() { + this.isEntryNode() and toAst(this) instanceof Module + } + + /** Whether this flow node corresponds to an import */ + predicate isImport() { + toAst(this) instanceof ImportExpr + } + + /** Whether this flow node corresponds to a conditional expression */ + predicate isIfExp() { + toAst(this) instanceof IfExp + } + + /** Whether this flow node corresponds to a function definition expression */ + predicate isFunction() { + toAst(this) instanceof FunctionExpr + } + + /** Whether this flow node corresponds to a class definition expression */ + predicate isClass() { + toAst(this) instanceof ClassExpr + } + + /** Gets a predecessor of this flow node */ + ControlFlowNode getAPredecessor() { + py_successors(result, this) + } + + /** Gets a successor of this flow node */ + ControlFlowNode getASuccessor() { + py_successors(this, result) + } + + /** Gets the immediate dominator of this flow node */ + ControlFlowNode getImmediateDominator() { + py_idoms(this, result) + } + + /** Gets the syntactic element corresponding to this flow node */ + AstNode getNode() { + py_flow_bb_node(this, result, _, _) + } + + string toString() { + exists(Scope s | s.getEntryNode() = this | + result = "Entry node for " + s.toString() + ) + or + exists(Scope s | s.getANormalExit() = this | + result = "Exit node for " + s.toString() + ) + or + not exists(Scope s | s.getEntryNode() = this or s.getANormalExit() = this) and + result = "ControlFlowNode for " + this.getNode().toString() + } + + /** Gets the location of this ControlFlowNode */ + Location getLocation() { + result = this.getNode().getLocation() + } + + /** Whether this flow node is the first in its scope */ + predicate isEntryNode() { + py_scope_flow(this, _, -1) + } + + /** Use ControlFlowNode.refersTo() instead. */ + deprecated Object pointsTo() { + this.refersTo(result) + } + + /** Gets what this flow node might "refer-to". Performs a combination of localized (intra-procedural) points-to + * analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly + * precise, but may not provide information for a significant number of flow-nodes. + * If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead. + */ + predicate refersTo(Object value, ClassObject cls, ControlFlowNode origin) { + not py_special_objects(cls, "_semmle_unknown_type") + and + not value = unknownValue() + and + PointsTo::points_to(this, _, value, cls, origin) + } + + /** Gets what this expression might "refer-to" in the given `context`. + */ + predicate refersTo(Context context, Object value, ClassObject cls, ControlFlowNode origin) { + not py_special_objects(cls, "_semmle_unknown_type") + and + PointsTo::points_to(this, context, value, cls, origin) + } + + /** Whether this flow node might "refer-to" to `value` which is from `origin` + * Unlike `this.refersTo(value, _, origin)` this predicate includes results + * where the class cannot be inferred. + */ + predicate refersTo(Object value, ControlFlowNode origin) { + PointsTo::points_to(this, _, value, _, origin) + and + not value = unknownValue() + } + + /** Equivalent to `this.refersTo(value, _)` */ + predicate refersTo(Object value) { + PointsTo::points_to(this, _, value, _, _) + and + not value = unknownValue() + } + + /** Gets the basic block containing this flow node */ + BasicBlock getBasicBlock() { + result.contains(this) + } + + /** Gets the scope containing this flow node */ + Scope getScope() { + if this.getNode() instanceof Scope then + /* Entry or exit node */ + result = this.getNode() + else + result = this.getNode().getScope() + } + + /** Gets the enclosing module */ + Module getEnclosingModule() { + result = this.getScope().getEnclosingModule() + } + + /** Gets a successor for this node if the relevant condition is True. */ + ControlFlowNode getATrueSuccessor() { + py_true_successors(this, result) + } + + /** Gets a successor for this node if the relevant condition is False. */ + ControlFlowNode getAFalseSuccessor() { + py_false_successors(this, result) + } + + /** Gets a successor for this node if an exception is raised. */ + ControlFlowNode getAnExceptionalSuccessor() { + py_exception_successors(this, result) + } + + /** Gets a successor for this node if no exception is raised. */ + ControlFlowNode getANormalSuccessor() { + py_successors(this, result) and not + py_exception_successors(this, result) + } + + /** Whether the scope may be exited as a result of this node raising an exception */ + predicate isExceptionalExit(Scope s) { + py_scope_flow(this, s, 1) + } + + /** Whether this node is a normal (non-exceptional) exit */ + predicate isNormalExit() { + py_scope_flow(this, _, 0) or py_scope_flow(this, _, 2) + } + + /** Whether it is unlikely that this ControlFlowNode can be reached */ + predicate unlikelyReachable() { + not start_bb_likely_reachable(this.getBasicBlock()) + or + exists(BasicBlock b | + start_bb_likely_reachable(b) and + not end_bb_likely_reachable(b) and + /* If there is an unlikely successor edge earlier in the BB + * than this node, then this node must be unreachable */ + exists(ControlFlowNode p, int i, int j | + p.(RaisingNode).unlikelySuccessor(_) and + p = b.getNode(i) and + this = b.getNode(j) and + i < j + ) + ) + } + + /** Check whether this control-flow node has complete points-to information. + * This would mean that the analysis managed to infer an over approximation + * of possible values at runtime. + */ + predicate hasCompletePointsToSet() { + ( + // If the tracking failed, then `this` will be its own "origin". In that + // case, we want to exclude nodes for which there is also a different + // origin, as that would indicate that some paths failed and some did not. + this.refersTo(_, _, this) and + not exists(ControlFlowNode other | other != this and this.refersTo(_, _, other)) + ) or ( + // If `this` is a use of a variable, then we must have complete points-to + // for that variable. + exists(SsaVariable v | v.getAUse() = this | + varHasCompletePointsToSet(v) + ) + ) + } + + /** Whether this strictly dominates other. */ + pragma [inline] predicate strictlyDominates(ControlFlowNode other) { + // This predicate is gigantic, so it must be inlined. + // About 1.4 billion tuples for OpenStack Cinder. + this.getBasicBlock().strictlyDominates(other.getBasicBlock()) + or + exists(BasicBlock b, int i, int j | + this = b.getNode(i) and other = b.getNode(j) and i < j + ) + } + + /** Whether this dominates other. + * Note that all nodes dominate themselves. + */ + pragma [inline] predicate dominates(ControlFlowNode other) { + // This predicate is gigantic, so it must be inlined. + this.getBasicBlock().strictlyDominates(other.getBasicBlock()) + or + exists(BasicBlock b, int i, int j | + this = b.getNode(i) and other = b.getNode(j) and i <= j + ) + } + + /** Whether this strictly reaches other. */ + pragma [inline] predicate strictlyReaches(ControlFlowNode other) { + // This predicate is gigantic, even larger than strictlyDominates, + // so it must be inlined. + this.getBasicBlock().strictlyReaches(other.getBasicBlock()) + or + exists(BasicBlock b, int i, int j | + this = b.getNode(i) and other = b.getNode(j) and i < j + ) + } + + /* Holds if this CFG node is a branch */ + predicate isBranch() { + py_true_successors(this, _) or py_false_successors(this, _) + } + + /* Gets a CFG node that corresponds to a child of the AST node for this node */ + pragma [noinline] + ControlFlowNode getAChild() { + this.getNode().getAChildNode() = result.getNode() and + result.getBasicBlock().dominates(this.getBasicBlock()) + } + +} + + +/* This class exists to provide an implementation over ControlFlowNode.getNode() + * that subsumes all the others in an way that's obvious to the optimiser. + * This avoids wasting time on the trivial overrides on the ControlFlowNode subclasses. + */ +private class AnyNode extends ControlFlowNode { + + override AstNode getNode() { + result = super.getNode() + } +} + + +/** Check whether a SSA variable has complete points-to information. + * This would mean that the analysis managed to infer an overapproximation + * of possible values at runtime. + */ +private predicate varHasCompletePointsToSet(SsaVariable var) { + // Global variables may be modified non-locally or concurrently. + not var.getVariable() instanceof GlobalVariable and + ( + // If we have complete points-to information on the definition of + // this variable, then the variable has complete information. + var.getDefinition().(DefinitionNode).getValue().hasCompletePointsToSet() + or + // If this variable is a phi output, then we have complete + // points-to information about it if all phi inputs had complete + // information. + forex(SsaVariable phiInput | phiInput = var.getAPhiInput() | + varHasCompletePointsToSet(phiInput) + ) + ) +} + +/** A control flow node corresponding to a call expression, such as `func(...)` */ +class CallNode extends ControlFlowNode { + + CallNode() { + toAst(this) instanceof Call + } + + /** Gets the flow node corresponding to the function expression for the call corresponding to this flow node */ + ControlFlowNode getFunction() { + exists(Call c | this.getNode() = c and c.getFunc() = result.getNode() and + result.getBasicBlock().dominates(this.getBasicBlock())) + } + + /** Gets the flow node corresponding to the nth argument of the call corresponding to this flow node */ + ControlFlowNode getArg(int n) { + exists(Call c | this.getNode() = c and c.getArg(n) = result.getNode() and + result.getBasicBlock().dominates(this.getBasicBlock())) + } + + /** Gets the flow node corresponding to the named argument of the call corresponding to this flow node */ + ControlFlowNode getArgByName(string name) { + exists(Call c, Keyword k | this.getNode() = c and k = c.getAKeyword() and + k.getValue() = result.getNode() and k.getArg() = name and + result.getBasicBlock().dominates(this.getBasicBlock())) + } + + /** Gets the flow node corresponding to an argument of the call corresponding to this flow node */ + ControlFlowNode getAnArg() { + exists(int n | result = this.getArg(n)) + or + exists(string name | result = this.getArgByName(name)) + } + + override Call getNode() { result = super.getNode() } + +} + +/** A control flow corresponding to an attribute expression, such as `value.attr` */ +class AttrNode extends ControlFlowNode { + AttrNode() { + toAst(this) instanceof Attribute + } + + /** Gets the flow node corresponding to the object of the attribute expression corresponding to this flow node */ + ControlFlowNode getObject() { + exists(Attribute a | this.getNode() = a and a.getObject() = result.getNode() and + result.getBasicBlock().dominates(this.getBasicBlock())) + } + + /** Use getObject() instead */ + deprecated ControlFlowNode getValue() { + result = this.getObject() + } + + /** Use getObject(name) instead */ + deprecated ControlFlowNode getValue(string name) { + result = this.getObject(name) + } + + /** Gets the flow node corresponding to the object of the attribute expression corresponding to this flow node, + with the matching name */ + ControlFlowNode getObject(string name) { + exists(Attribute a | + this.getNode() = a and a.getObject() = result.getNode() and + a.getName() = name and + result.getBasicBlock().dominates(this.getBasicBlock())) + } + + /** Gets the attribute name of the attribute expression corresponding to this flow node */ + string getName() { + exists(Attribute a | this.getNode() = a and a.getName() = result) + } + + override Attribute getNode() { result = super.getNode() } + +} + +/** A control flow node corresponding to a `from ... import ...` expression */ +class ImportMemberNode extends ControlFlowNode { + ImportMemberNode() { + toAst(this) instanceof ImportMember + } + + /** Gets the flow node corresponding to the module in the import-member expression corresponding to this flow node, + with the matching name*/ + ControlFlowNode getModule(string name) { + exists(ImportMember i | + this.getNode() = i and i.getModule() = result.getNode() | + i.getName() = name and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + override ImportMember getNode() { result = super.getNode() } +} + + +/** A control flow node corresponding to an artificial expression representing an import */ +class ImportExprNode extends ControlFlowNode { + + ImportExprNode() { + toAst(this) instanceof ImportExpr + } + + override ImportExpr getNode() { result = super.getNode() } + +} + +/** A control flow node corresponding to a `from ... import *` statement */ +class ImportStarNode extends ControlFlowNode { + + ImportStarNode() { + toAst(this) instanceof ImportStar + } + + /** Gets the flow node corresponding to the module in the import-star corresponding to this flow node */ + ControlFlowNode getModule() { + exists(ImportStar i | + this.getNode() = i and i.getModuleExpr() = result.getNode() | + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + override ImportStar getNode() { result = super.getNode() } + +} + +/** A control flow node corresponding to a subscript expression, such as `value[slice]` */ +class SubscriptNode extends ControlFlowNode { + SubscriptNode() { + toAst(this) instanceof Subscript + } + + /** DEPRECATED: Use `getObject()` instead. + * This will be formally deprecated before the end 2018 and removed in 2019.*/ + ControlFlowNode getValue() { + exists(Subscript s | this.getNode() = s and s.getObject() = result.getNode() and + result.getBasicBlock().dominates(this.getBasicBlock())) + } + + /** flow node corresponding to the value of the sequence in a subscript operation */ + ControlFlowNode getObject() { + exists(Subscript s | this.getNode() = s and s.getObject() = result.getNode() and + result.getBasicBlock().dominates(this.getBasicBlock())) + } + + /** flow node corresponding to the index in a subscript operation */ + ControlFlowNode getIndex() { + exists(Subscript s | this.getNode() = s and s.getIndex() = result.getNode() and + result.getBasicBlock().dominates(this.getBasicBlock())) + } + + override Subscript getNode() { result = super.getNode() } +} + +/** A control flow node corresponding to a comparison operation, such as `x DeletionNode -> NameNode('b') -> AttrNode('y') -> DeletionNode`. + */ +class DeletionNode extends ControlFlowNode { + + DeletionNode() { + toAst(this) instanceof Delete + } + + /** Gets the unique target of this deletion node. */ + ControlFlowNode getTarget() { + result.getASuccessor() = this + } + +} + +/** A control flow node corresponding to a sequence (tuple or list) literal */ +abstract class SequenceNode extends ControlFlowNode { + SequenceNode() { + toAst(this) instanceof Tuple + or + toAst(this) instanceof List + } + + /** Gets the control flow node for an element of this sequence */ + ControlFlowNode getAnElement() { + result = this.getElement(_) + } + + /** Gets the control flow node for the nth element of this sequence */ + abstract ControlFlowNode getElement(int n); + +} + +/** A control flow node corresponding to a tuple expression such as `( 1, 3, 5, 7, 9 )` */ +class TupleNode extends SequenceNode { + TupleNode() { + toAst(this) instanceof Tuple + } + + override ControlFlowNode getElement(int n) { + exists(Tuple t | this.getNode() = t and result.getNode() = t.getElt(n)) and + ( + result.getBasicBlock().dominates(this.getBasicBlock()) + or + this.getBasicBlock().dominates(result.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to a list expression, such as `[ 1, 3, 5, 7, 9 ]` */ +class ListNode extends SequenceNode { + ListNode() { + toAst(this) instanceof List + } + + override ControlFlowNode getElement(int n) { + exists(List l | this.getNode() = l and result.getNode() = l.getElt(n)) and + ( + result.getBasicBlock().dominates(this.getBasicBlock()) + or + this.getBasicBlock().dominates(result.getBasicBlock()) + ) + } + +} + +class SetNode extends ControlFlowNode { + + SetNode() { + toAst(this) instanceof Set + } + + ControlFlowNode getAnElement() { + exists(Set s | this.getNode() = s and result.getNode() = s.getElt(_)) and + ( + result.getBasicBlock().dominates(this.getBasicBlock()) + or + this.getBasicBlock().dominates(result.getBasicBlock()) + ) + } + +} + +/** A control flow node corresponding to a dictionary literal, such as `{ 'a': 1, 'b': 2 }` */ +class DictNode extends ControlFlowNode { + + DictNode() { + toAst(this) instanceof Dict + } + + /** Gets a key of this dictionary literal node, for those items that have keys + * E.g, in {'a':1, **b} this returns only 'a' + */ + ControlFlowNode getAKey() { + exists(Dict d | this.getNode() = d and result.getNode() = d.getAKey()) and + result.getBasicBlock().dominates(this.getBasicBlock()) + } + + /** Gets a value of this dictionary literal node*/ + ControlFlowNode getAValue() { + exists(Dict d | this.getNode() = d and result.getNode() = d.getAValue()) and + result.getBasicBlock().dominates(this.getBasicBlock()) + } + +} + +private Expr assigned_value(Expr lhs) { + /* lhs = result */ + exists(Assign a | a.getATarget() = lhs and result = a.getValue()) + or + /* import result as lhs */ + exists(Alias a | a.getAsname() = lhs and result = a.getValue()) + or + /* lhs += x => result = (lhs + x) */ + exists(AugAssign a, BinaryExpr b | b = a.getOperation() and result = b and lhs = b.getLeft()) + or + /* ..., lhs, ... = ..., result, ... */ + exists(Assign a, Tuple target, Tuple values, int index | + a.getATarget() = target and + a.getValue() = values and + lhs = target.getElt(index) and + result = values.getElt(index) + ) +} + +/** A flow node for a `for` statement. */ +class ForNode extends ControlFlowNode { + + ForNode() { + toAst(this) instanceof For + } + + override For getNode() { result = super.getNode() } + + /** Whether this `for` statement causes iteration over `sequence` storing each step of the iteration in `target` */ + predicate iterates(ControlFlowNode target, ControlFlowNode sequence) { + exists(For for | + toAst(this) = for and + for.getTarget() = target.getNode() and + for.getIter() = sequence.getNode() | + sequence.getBasicBlock().dominates(this.getBasicBlock()) and + sequence.getBasicBlock().dominates(target.getBasicBlock()) + ) + } + +} + +/** A flow node for a `raise` statement */ +class RaiseStmtNode extends ControlFlowNode { + + RaiseStmtNode() { + toAst(this) instanceof Raise + } + + /** Gets the control flow node for the exception raised by this raise statement */ + ControlFlowNode getException() { + exists(Raise r | + r = toAst(this) and + r.getException() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + +} + +private +predicate defined_by(NameNode def, Variable v) { + def.defines(v) or + exists(NameNode p | defined_by(p, v) and p.getASuccessor() = def and not p.defines(v)) +} + +/** A basic block (ignoring exceptional flow edges to scope exit) */ +class BasicBlock extends @py_flow_node { + + BasicBlock() { + py_flow_bb_node(_, _, this, _) + } + + /** Whether this basic block contains the specified node */ + predicate contains(ControlFlowNode node) { + py_flow_bb_node(node, _, this, _) + } + + /** Gets the nth node in this basic block */ + ControlFlowNode getNode(int n) { + py_flow_bb_node(result, _, this, n) + } + + string toString() { + result = "BasicBlock" + } + + /** Whether this basic block strictly dominates the other */ + pragma[nomagic] predicate strictlyDominates(BasicBlock other) { + other.getImmediateDominator+() = this + } + + /** Whether this basic block dominates the other */ + pragma[nomagic] predicate dominates(BasicBlock other) { + this = other + or + this.strictlyDominates(other) + } + + BasicBlock getImmediateDominator() { + this.firstNode().getImmediateDominator().getBasicBlock() = result + } + + /** Dominance frontier of a node x is the set of all nodes `other` such that `this` dominates a predecessor + * of `other` but does not strictly dominate `other` */ + predicate dominanceFrontier(BasicBlock other) { + this.dominates(other.getAPredecessor()) and not this.strictlyDominates(other) + } + + private ControlFlowNode firstNode() { + result = this + } + + /** Gets the last node in this basic block */ + ControlFlowNode getLastNode() { + exists(int i | + this.getNode(i) = result and + i = max(int j | py_flow_bb_node(_, _, this, j)) + ) + } + + private predicate oneNodeBlock() { + this.firstNode() = this.getLastNode() + } + + private predicate startLocationInfo(string file, int line, int col) { + if this.firstNode().getNode() instanceof Scope then + this.firstNode().getASuccessor().getLocation().hasLocationInfo(file, line, col, _, _) + else + this.firstNode().getLocation().hasLocationInfo(file, line, col, _, _) + } + + private predicate endLocationInfo(int endl, int endc) { + if (this.getLastNode().getNode() instanceof Scope and not this.oneNodeBlock()) then + this.getLastNode().getAPredecessor().getLocation().hasLocationInfo(_, _, _, endl, endc) + else + this.getLastNode().getLocation().hasLocationInfo(_, _, _, endl, endc) + } + + /** Gets a successor to this basic block */ + BasicBlock getASuccessor() { + result = this.getLastNode().getASuccessor().getBasicBlock() + } + + /** Gets a predecessor to this basic block */ + BasicBlock getAPredecessor() { + result.getASuccessor() = this + } + + /** Whether flow from this basic block reaches a normal exit from its scope */ + predicate reachesExit() { + exists(Scope s | s.getANormalExit().getBasicBlock() = this) + or + this.getASuccessor().reachesExit() + } + + predicate hasLocationInfo(string file, int line, int col, int endl, int endc) { + this.startLocationInfo(file, line, col) + and + this.endLocationInfo(endl, endc) + } + + /** Gets a true successor to this basic block */ + BasicBlock getATrueSuccessor() { + result = this.getLastNode().getATrueSuccessor().getBasicBlock() + } + + /** Gets a false successor to this basic block */ + BasicBlock getAFalseSuccessor() { + result = this.getLastNode().getAFalseSuccessor().getBasicBlock() + } + + /** Gets the scope of this block */ + pragma [nomagic] Scope getScope() { + exists(ControlFlowNode n | + n.getBasicBlock() = this | + /* Take care not to use an entry or exit node as that node's scope will be the outer scope */ + not py_scope_flow(n, _, -1) and + not py_scope_flow(n, _, 0) and + not py_scope_flow(n, _, 2) and + result = n.getScope() + or + py_scope_flow(n, result, _) + ) + } + + /** Whether (as inferred by type inference) it is highly unlikely (or impossible) for control to flow from this to succ. + */ + predicate unlikelySuccessor(BasicBlock succ) { + this.getLastNode().(RaisingNode).unlikelySuccessor(succ.firstNode()) + or + not end_bb_likely_reachable(this) and succ = this.getASuccessor() + } + + /** Holds if this basic block strictly reaches the other. Is the start of other reachable from the end of this. */ + predicate strictlyReaches(BasicBlock other) { + this.getASuccessor+() = other + } + + /** Holds if this basic block reaches the other. Is the start of other reachable from the end of this. */ + predicate reaches(BasicBlock other) { + this = other or this.strictlyReaches(other) + } + + /** Whether (as inferred by type inference) this basic block is likely to be reachable. + */ + predicate likelyReachable() { + start_bb_likely_reachable(this) + } +} + +private predicate start_bb_likely_reachable(BasicBlock b) { + exists(Scope s | s.getEntryNode() = b.getNode(_)) + or + exists(BasicBlock pred | + pred = b.getAPredecessor() and + end_bb_likely_reachable(pred) and + not pred.getLastNode().(RaisingNode).unlikelySuccessor(b) + ) +} + +private predicate end_bb_likely_reachable(BasicBlock b) { + start_bb_likely_reachable(b) and + not exists(ControlFlowNode p, ControlFlowNode s | + p.(RaisingNode).unlikelySuccessor(s) and + p = b.getNode(_) and + s = b.getNode(_) and + not p = b.getLastNode() + ) +} + diff --git a/python/ql/src/semmle/python/Function.qll b/python/ql/src/semmle/python/Function.qll new file mode 100644 index 00000000000..b2c1678b24d --- /dev/null +++ b/python/ql/src/semmle/python/Function.qll @@ -0,0 +1,377 @@ +import python + +/** A function, independent of defaults and binding. + It is the syntactic entity that is compiled to a code object. */ +class Function extends Function_, Scope, AstNode { + + /** The expression defining this function */ + CallableExpr getDefinition() { + result = this.getParent() + } + + /** The scope in which this function occurs, will be a class for a method, + * another function for nested functions, generator expressions or comprehensions, + * or a module for a plain function. */ + override Scope getEnclosingScope() { + result = this.getParent().(Expr).getScope() + } + + override Scope getScope() { + result = this.getEnclosingScope() + } + + /** Whether this function is declared in a class */ + predicate isMethod() { + exists(Class cls | this.getEnclosingScope() = cls) + } + + /** Whether this is a special method, that is does its name have the form `__xxx__` (except `__init__`) */ + predicate isSpecialMethod() { + this.isMethod() and + exists(string name | this.getName() = name | + name.matches("\\_\\_%\\_\\_") and + name != "__init__") + } + + /** Whether this function is a generator function, + that is whether it contains a yield or yield-from expression */ + predicate isGenerator() { + exists(Yield y | y.getScope() = this) + or + exists(YieldFrom y | y.getScope() = this) + } + + /** Whether this function is declared in a class and is named "__init__" */ + predicate isInitMethod() { + this.isMethod() and this.getName() = "__init__" + } + + /** Gets a decorator of this function */ + Expr getADecorator() { + result = ((FunctionExpr)this.getDefinition()).getADecorator() + } + + /** Gets the name of the nth argument (for simple arguments) */ + string getArgName(int index) { + result = ((Name)this.getArg(index)).getId() + } + + Parameter getArgByName(string name) { + result = this.getAnArg() and + result.(Name).getId() = name + } + + override Location getLocation() { + py_scope_location(result, this) + } + + override string toString() { + result = "Function " + this.getName() + } + + /** Gets the statements forming the body of this function */ + override StmtList getBody() { + result = Function_.super.getBody() + } + + /** Gets the nth statement in the function */ + override Stmt getStmt(int index) { + result = Function_.super.getStmt(index) + } + + /** Gets a statement in the function */ + override Stmt getAStmt() { + result = Function_.super.getAStmt() + } + + /** Gets the name used to define this function */ + override string getName() { + result = Function_.super.getName() + } + + /** Gets the metrics for this function */ + FunctionMetrics getMetrics() { + result = this + } + + /** Gets the FunctionObject corresponding to this function */ + FunctionObject getFunctionObject() { + result.getOrigin() = this.getDefinition() + } + + /** Whether this function is a procedure, that is, it has no explicit return statement and always returns None. + * Note that generator and async functions are not procedures as they return generators and coroutines respectively. */ + predicate isProcedure() { + not exists(this.getReturnNode()) and exists(this.getFallthroughNode()) and not this.isGenerator() and not this.isAsync() + } + + /** Gets the number of positional parameters */ + int getPositionalParameterCount() { + result = count(this.getAnArg()) + } + + /** Gets the number of keyword-only parameters */ + int getKeywordOnlyParameterCount() { + result = count(this.getAKwonlyarg()) + } + + /** Whether this function accepts a variable number of arguments. That is, whether it has a starred (*arg) parameter. */ + predicate hasVarArg() { + exists(this.getVararg()) + } + + /** Whether this function accepts arbitrary keyword arguments. That is, whether it has a double-starred (**kwarg) parameter. */ + predicate hasKwArg() { + exists(this.getKwarg()) + } + + override AstNode getAChildNode() { + result = this.getAStmt() or + result = this.getAnArg() or + result = this.getVararg() or + result = this.getKwarg() + } + + /** Gets the qualified name for this function. + * Should return the same name as the `__qualname__` attribute on functions in Python 3. + */ + string getQualifiedName() { + this.getEnclosingScope() instanceof Module and result = this.getName() + or + exists(string enclosing_name | + enclosing_name = this.getEnclosingScope().(Function).getQualifiedName() + or + enclosing_name = this.getEnclosingScope().(Class).getQualifiedName() | + result = enclosing_name + "." + this.getName() + ) + } + + /** Gets the nth keyword-only parameter of this function. */ + Name getKeywordOnlyArg(int n) { + result = Function_.super.getKwonlyarg(n) + } + + /** Gets a keyword-only parameter of this function. */ + Name getAKeywordOnlyArg() { + result = this.getKeywordOnlyArg(_) + } + + override Scope getEvaluatingScope() { + major_version() = 2 and exists(Comp comp | comp.getFunction() = this | result = comp.getEvaluatingScope()) + or + not exists(Comp comp | comp.getFunction() = this) and result = this + or + major_version() = 3 and result = this + } + + override + predicate containsInScope(AstNode inner) { + Scope.super.containsInScope(inner) + } + + override + predicate contains(AstNode inner) { + Scope.super.contains(inner) + } + +} + +/** A def statement. Note that FunctionDef extends Assign as a function definition binds the newly created function */ +class FunctionDef extends Assign { + + FunctionDef() { + /* This is an artificial assignment the rhs of which is a (possibly decorated) FunctionExpr */ + exists(FunctionExpr f | this.getValue() = f or this.getValue() = f.getADecoratorCall()) + } + + override string toString() { + result = "FunctionDef" + } + + /** Gets the function for this statement */ + Function getDefinedFunction() { + exists(FunctionExpr func | this.containsInScope(func) and result = func.getInnerScope()) + } + + override Stmt getLastStatement() { + result = this.getDefinedFunction().getLastStatement() + } + +} + +class FastLocalsFunction extends Function { + + /** A function that uses 'fast' locals, stored in the frame not in a dictionary. */ + FastLocalsFunction () { + not exists(ImportStar i | i.getScope() = this) + and + not exists(Exec e | e.getScope() = this) + } + +} + +/** A parameter. Either a Tuple or a Name (always a Name for Python 3) */ +class Parameter extends Parameter_ { + + Parameter() { + /* Parameter_ is just defined as a Name or Tuple, narrow to actual parameters */ + exists(ParameterList pl | py_exprs(this, _, pl, _)) + } + + Location getLocation() { + result = this.asName().getLocation() + or + result = this.asTuple().getLocation() + } + + /** Gets this parameter if it is a Name (not a Tuple) */ + Name asName() { + result = this + } + + /** Gets this parameter if it is a Tuple (not a Name) */ + Tuple asTuple() { + result = this + } + + Expr getDefault() { + exists(Function f, int n, int c, int d, Arguments args | + args = f.getDefinition().getArgs() | + f.getArg(n) = this and + c = count(f.getAnArg()) and + d = count(args.getADefault()) and + result = args.getDefault(d-c+n) + ) + } + + Variable getVariable() { + result.getAnAccess() = this.asName() + } + + /** Gets the position of this parameter */ + int getPosition() { + exists(Function f | + f.getArg(result) = this + ) + } + + /** Gets the name of this parameter */ + string getName() { + result = this.asName().getId() + } + + /** Holds if this parameter is the first parameter of a method. It is not necessarily called "self" */ + predicate isSelf() { + exists(Function f | + f.getArg(0) = this and + f.isMethod() + ) + } + + /** Holds if this parameter is a 'varargs' parameter. + * The `varargs` in `f(a, b, *varargs)`. + */ + predicate isVarargs() { + exists(Function func | func.getVararg() = this) + } + + /** Holds if this parameter is a 'kwargs' parameter. + * The `kwargs` in `f(a, b, **kwargs)`. + */ + predicate isKwargs() { + exists(Function func | func.getKwarg() = this) + } + +} + +/** An expression that generates a callable object, either a function expression or a lambda */ +abstract class CallableExpr extends Expr { + + /** Gets the parameters of this callable. + * This predicate is called getArgs(), rather than getParameters() for compatibility with Python's AST module. */ + abstract Arguments getArgs(); + + /** Gets the function scope of this code expression. */ + abstract Function getInnerScope(); + +} + +/** An (artificial) expression corresponding to a function definition. */ +class FunctionExpr extends FunctionExpr_, CallableExpr { + + override Expr getASubExpression() { + result = this.getArgs().getASubExpression() or + result = this.getReturns() + } + + override predicate hasSideEffects() { + any() + } + + Call getADecoratorCall() { + result.getArg(0) = this or + result.getArg(0) = this.getADecoratorCall() + } + + /** Gets a decorator of this function expression */ + Expr getADecorator() { + result = this.getADecoratorCall().getFunc() + } + + override AstNode getAChildNode() { + result = this.getASubExpression() + or + result = this.getInnerScope() + } + + override Function getInnerScope() { + result = FunctionExpr_.super.getInnerScope() + } + + override Arguments getArgs() { + result = FunctionExpr_.super.getArgs() + } + +} + +/** A lambda expression, such as lambda x:x*x */ +class Lambda extends Lambda_, CallableExpr { + + /** Gets the expression to the right of the colon in this lambda expression */ + Expr getExpression() { + exists(Return ret | ret = this.getInnerScope().getStmt(0) | + result = ret.getValue()) + } + + override Expr getASubExpression() { + result = this.getArgs().getASubExpression() + } + + override AstNode getAChildNode() { + result = this.getASubExpression() or + result = this.getInnerScope() + } + + override Function getInnerScope() { + result = Lambda_.super.getInnerScope() + } + + override Arguments getArgs() { + result = Lambda_.super.getArgs() + } + +} + +/** The arguments in a function definition */ +class Arguments extends Arguments_ { + + Expr getASubExpression() { + result = this.getAKwDefault() or + result = this.getAnAnnotation() or + result = this.getKwargannotation() or + result = this.getVarargannotation() or + result = this.getADefault() + } +} + + diff --git a/python/ql/src/semmle/python/GuardedControlFlow.qll b/python/ql/src/semmle/python/GuardedControlFlow.qll new file mode 100644 index 00000000000..d51eb1b741e --- /dev/null +++ b/python/ql/src/semmle/python/GuardedControlFlow.qll @@ -0,0 +1,67 @@ +import python + +/** A basic block which terminates in a condition, splitting the subsequent control flow */ +class ConditionBlock extends BasicBlock { + + ConditionBlock() { + exists(ControlFlowNode succ | succ = this.getATrueSuccessor() or succ = this.getAFalseSuccessor()) + } + + /** Basic blocks controlled by this condition, i.e. those BBs for which the condition is testIsTrue */ + predicate controls(BasicBlock controlled, boolean testIsTrue) { + /* For this block to control the block 'controlled' with 'testIsTrue' the following must be true: + Execution must have passed through the test i.e. 'this' must strictly dominate 'controlled'. + Execution must have passed through the 'testIsTrue' edge leaving 'this'. + + Although "passed through the true edge" implies that this.getATrueSuccessor() dominates 'controlled', + the reverse is not true, as flow may have passed through another edge to get to this.getATrueSuccessor() + so we need to assert that this.getATrueSuccessor() dominates 'controlled' *and* that + all predecessors of this.getATrueSuccessor() are either this or dominated by this.getATrueSuccessor(). + + For example, in the following python snippet: + + if x: + controlled + false_successor + uncontrolled + + false_successor dominates uncontrolled, but not all of its predecessors are this (if x) + or dominated by itself. Whereas in the following code: + + if x: + while controlled: + also_controlled + false_successor + uncontrolled + + the block 'while controlled' is controlled because all of its predecessors are this (if x) + or (in the case of 'also_controlled') dominated by itself. + + The additional constraint on the predecessors of the test successor implies + that `this` strictly dominates `controlled` so that isn't necessary to check + directly. + */ + exists(BasicBlock succ | + testIsTrue = true and succ = this.getATrueSuccessor() + or + testIsTrue = false and succ = this.getAFalseSuccessor() + | + succ.dominates(controlled) and + forall(BasicBlock pred | pred.getASuccessor() = succ | + pred = this or succ.dominates(pred) + ) + ) + } + + /** Holds if this condition controls the edge `pred->succ`, i.e. those edges for which the condition is `testIsTrue`. */ + predicate controlsEdge(BasicBlock pred, BasicBlock succ, boolean testIsTrue) { + this.controls(pred, testIsTrue) and succ = pred.getASuccessor() + or + pred = this and ( + testIsTrue = true and succ = this.getATrueSuccessor() + or + testIsTrue = false and succ = this.getAFalseSuccessor() + ) + } + +} diff --git a/python/ql/src/semmle/python/Import.qll b/python/ql/src/semmle/python/Import.qll new file mode 100644 index 00000000000..0668c97bdcf --- /dev/null +++ b/python/ql/src/semmle/python/Import.qll @@ -0,0 +1,275 @@ +import python + + +/** An alias in an import statement, the `mod as name` part of `import mod as name`. May be artificial; + `import x` is transformed into `import x as x` */ +class Alias extends Alias_ { + + Location getLocation() { + result = this.getValue().getLocation() + } + +} + +private predicate valid_module_name(string name) { + exists(Module m | m.getName() = name) + or + exists(Object cmod | py_cobjecttypes(cmod, theModuleType()) and py_cobjectnames(cmod, name)) +} + +/** An artificial expression representing an import */ +class ImportExpr extends ImportExpr_ { + + + private string basePackageName(int n) { + n = 1 and result = this.getEnclosingModule().getPackageName() + or + exists(string bpnm1 | bpnm1 = this.basePackageName(n-1) and + bpnm1.matches("%.%") and + result = bpnm1.regexpReplaceAll("\\.[^.]*$", "") + ) + } + + private predicate implicitRelativeImportsAllowed() { + // relative imports are no longer allowed in Python 3 + major_version() < 3 and + // and can be explicitly turned off in later versions of Python 2 + not getEnclosingModule().hasFromFuture("absolute_import") + } + + /** The language specifies level as -1 if relative imports are to be tried first, 0 for absolute imports, + and level > 0 for explicit relative imports. */ + override int getLevel() { + exists(int l | l = super.getLevel() | + l > 0 and result = l + or + /* The extractor may set level to 0 even though relative imports apply */ + l = 0 and ( + if this.implicitRelativeImportsAllowed() then + result = -1 + else + result = 0 + ) + ) + } + + /** + * If this import is relative, and relative imports are allowed, compute + * the name of the topmost module that will be imported. + */ + private string relativeTopName() { + getLevel() = -1 and + result = basePackageName(1) + "." + this.getTopName() and + valid_module_name(result) + } + + private string qualifiedTopName() { + if (this.getLevel() <= 0) then ( + result = this.getTopName() + ) else ( + result = basePackageName(this.getLevel()) and + valid_module_name(result) + ) + } + + /** Gets the name by which the lowest level module or package is imported. + * NOTE: This is the name that used to import the module, + * which may not be the name of the module. */ + string bottomModuleName() { + result = relativeTopName() + this.remainderOfName() + or + ( + not exists(relativeTopName()) and + result = this.qualifiedTopName() + this.remainderOfName() + ) + } + + /** Gets the name of topmost module or package being imported */ + string topModuleName() { + result = relativeTopName() + or + ( + not exists(relativeTopName()) and + result = this.qualifiedTopName() + ) + } + + /** Gets the full name of the module resulting from evaluating this import. + * NOTE: This is the name that used to import the module, + * which may not be the name of the module. */ + string getImportedModuleName() { + exists(string bottomName | bottomName = this.bottomModuleName() | + if this.isTop() then + result = topModuleName() + else + result = bottomName + ) + } + + /** Gets the names of the modules that may be imported by this import. + * For example this predicate would return 'x' and 'x.y' for `import x.y` + */ + string getAnImportedModuleName() { + result = this.bottomModuleName() + or + result = this.getAnImportedModuleName().regexpReplaceAll("\\.[^.]*$", "") + } + + override Expr getASubExpression() { + none() + } + + override predicate hasSideEffects() { + any() + } + + private string getTopName() { + result = this.getName().regexpReplaceAll("\\..*", "") + } + + private string remainderOfName() { + not exists(this.getName()) and result = "" or + this.getLevel() <= 0 and result = this.getName().regexpReplaceAll("^[^\\.]*", "") or + this.getLevel() > 0 and result = "." + this.getName() + } + + /** Whether this import is relative, that is not absolute. + * See https://www.python.org/dev/peps/pep-0328/ */ + predicate isRelative() { + /* Implicit */ + exists(this.relativeTopName()) + or + /* Explicit */ + this.getLevel() > 0 + } + +} + +/** A `from ... import ...` expression */ +class ImportMember extends ImportMember_ { + + override Expr getASubExpression() { + result = this.getModule() + } + + override predicate hasSideEffects() { + /* Strictly this only has side-effects if the module is a package */ + any() + } + + /** Gets the full name of the module resulting from evaluating this import. + * NOTE: This is the name that used to import the module, + * which may not be the name of the module. */ + string getImportedModuleName() { + result = this.getModule().(ImportExpr).getImportedModuleName() + "." + this.getName() + } + + override ImportMemberNode getAFlowNode() { result = super.getAFlowNode() } +} + +/** An import statement */ +class Import extends Import_ { + + private ImportExpr getAModuleExpr() { + result = this.getAName().getValue() + or + result = ((ImportMember)this.getAName().getValue()).getModule() + } + + /** Use getAnImportedModuleName(), + * possibly combined with ModuleObject.importedAs() + * Gets a module imported by this import statement */ + deprecated Module getAModule() { + result.getName() = this.getAnImportedModuleName() + } + + /** Whether this a `from ... import ...` statement */ + predicate isFromImport() { + this.getAName().getValue() instanceof ImportMember + } + + override Expr getASubExpression() { + result = this.getAModuleExpr() or + result = this.getAName().getAsname() or + result = this.getAName().getValue() + } + + override Stmt getASubStatement() { + none() + } + + /** Gets the name of an imported module. + * For example, for the import statement `import bar` which + * is a relative import in package "foo", this would return + * "foo.bar". + * The import statment `from foo import bar` would return + * `foo` and `foo.bar` + * */ + string getAnImportedModuleName() { + result = this.getAModuleExpr().getAnImportedModuleName() + or + exists(ImportMember m, string modname | + m = this.getAName().getValue() and + modname = m.getModule().(ImportExpr).getImportedModuleName() | + result = modname + or + result = modname + "." + m.getName() + ) + } + +} + +/** An import * statement */ +class ImportStar extends ImportStar_ { + + ImportExpr getModuleExpr() { + result = this.getModule() + or + result = ((ImportMember)this.getModule()).getModule() + } + + override string toString() { + result = "from " + this.getModuleExpr().getName() + " import *" + } + + /** Use getAnImportedModuleName(), + * possibly combined with ModuleObject.importedAs() + * Gets the module imported by this import * statement + */ + deprecated Module getTheModule() { + result.getName() = this.getImportedModuleName() + } + + override Expr getASubExpression() { + result = this.getModule() + } + + override Stmt getASubStatement() { + none() + } + + /** Gets the name of the imported module. */ + string getImportedModuleName() { + result = this.getModuleExpr().getImportedModuleName() + } + +} + +/** A statement that imports a module. This can be any statement that includes the `import` keyword, + * such as `import sys`, `from sys import version` or `from sys import *`. */ +class ImportingStmt extends Stmt { + + ImportingStmt() { + this instanceof Import + or + this instanceof ImportStar + } + + /** Gets the name of an imported module. */ + string getAnImportedModuleName() { + result = this.(Import).getAnImportedModuleName() + or + result = this.(ImportStar).getImportedModuleName() + } + +} diff --git a/python/ql/src/semmle/python/Keywords.qll b/python/ql/src/semmle/python/Keywords.qll new file mode 100644 index 00000000000..3be9311d081 --- /dev/null +++ b/python/ql/src/semmle/python/Keywords.qll @@ -0,0 +1,101 @@ +import python + +class KeyValuePair extends KeyValuePair_, DictDisplayItem { + + override Location getLocation() { + result = KeyValuePair_.super.getLocation() + } + + override string toString() { + result = KeyValuePair_.super.toString() + } + + /** Gets the value of this dictionary unpacking. */ + override Expr getValue() { + result = KeyValuePair_.super.getValue() + } + + override Scope getScope() { + result = this.getValue().getScope() + } + + override AstNode getAChildNode() { + result = this.getKey() + or + result = this.getValue() + } + +} + +/** A double-starred expression in a call or dict literal. */ +class DictUnpacking extends DictUnpacking_, DictUnpackingOrKeyword, DictDisplayItem { + + override Location getLocation() { + result = DictUnpacking_.super.getLocation() + } + + override string toString() { + result = DictUnpacking_.super.toString() + } + + /** Gets the value of this dictionary unpacking. */ + override Expr getValue() { + result = DictUnpacking_.super.getValue() + } + + override Scope getScope() { + result = this.getValue().getScope() + } + + override AstNode getAChildNode() { + result = this.getValue() + } + +} + +abstract class DictUnpackingOrKeyword extends DictItem { + + abstract Expr getValue(); + + override string toString() { + none() + } + +} + +abstract class DictDisplayItem extends DictItem { + + abstract Expr getValue(); + + override string toString() { + none() + } + +} + +/** A keyword argument in a call. For example `arg=expr` in `foo(0, arg=expr)` */ +class Keyword extends Keyword_, DictUnpackingOrKeyword { + + override Location getLocation() { + result = Keyword_.super.getLocation() + } + + override string toString() { + result = Keyword_.super.toString() + } + + /** Gets the value of this keyword argument. */ + override Expr getValue() { + result = Keyword_.super.getValue() + } + + override Scope getScope() { + result = this.getValue().getScope() + } + + override AstNode getAChildNode() { + result = this.getValue() + } + +} + diff --git a/python/ql/src/semmle/python/Lists.qll b/python/ql/src/semmle/python/Lists.qll new file mode 100644 index 00000000000..cb6bbb1e3f5 --- /dev/null +++ b/python/ql/src/semmle/python/Lists.qll @@ -0,0 +1,55 @@ +import python + +/** A parameter list */ +class ParameterList extends @py_parameter_list { + + Function getParent() { + py_parameter_lists(this, result) + } + + /** Gets a parameter */ + Parameter getAnItem() { + /* Item can be a Name or a Tuple, both of which are expressions */ + py_exprs(result, _, this, _) + } + + /** Gets the nth parameter */ + Parameter getItem(int index) { + /* Item can be a Name or a Tuple, both of which are expressions */ + py_exprs(result, _, this, index) + } + + string toString() { + result = "ParameterList" + } +} + +/** A list of Comprehensions (for generating parts of a set, list or dictionary comprehension) */ +class ComprehensionList extends ComprehensionList_ { + +} + +/** A list of expressions */ +class ExprList extends ExprList_ { + +} + + +library class DictItemList extends DictItemList_ { + +} + +library class DictItemListParent extends DictItemListParent_ { + +} + +/** A list of strings (the primitive type string not Bytes or Unicode) */ +class StringList extends StringList_ { + +} + +/** A list of aliases in an import statement */ +class AliasList extends AliasList_ { + +} + diff --git a/python/ql/src/semmle/python/Metrics.qll b/python/ql/src/semmle/python/Metrics.qll new file mode 100644 index 00000000000..dde48db0fc8 --- /dev/null +++ b/python/ql/src/semmle/python/Metrics.qll @@ -0,0 +1,388 @@ +import python + +/** The metrics for a function */ +class FunctionMetrics extends Function { + + /** Gets the total number of lines (including blank lines) + from the definition to the end of the function */ + int getNumberOfLines() { + py_alllines(this, result) + } + + /** Gets the number of lines of code in the function */ + int getNumberOfLinesOfCode() { + py_codelines(this, result) + } + + /** Gets the number of lines of comments in the function */ + int getNumberOfLinesOfComments() { + py_commentlines(this, result) + } + + /** Gets the number of lines of docstring in the function */ + int getNumberOfLinesOfDocStrings() { + py_docstringlines(this, result) + } + + /** Cyclomatic complexity: + * The number of linearly independent paths through the source code. + * Computed as E - N + 2P, + * where + * E = the number of edges of the graph. + * N = the number of nodes of the graph. + * P = the number of connected components, which for a single function is 1. + */ + int getCyclomaticComplexity() { + exists(int E, int N | + N = count(BasicBlock b | b = this.getABasicBlock() and b.likelyReachable()) + and + E = count(BasicBlock b1, BasicBlock b2 | + b1 = this.getABasicBlock() and b1.likelyReachable() and + b2 = this.getABasicBlock() and b2.likelyReachable() and + b2 = b1.getASuccessor() and not b1.unlikelySuccessor(b2) + ) + | + result = E - N + 2 + ) + } + + private BasicBlock getABasicBlock() { + result = this.getEntryNode().getBasicBlock() + or + exists(BasicBlock mid | mid = this.getABasicBlock() and result = mid.getASuccessor()) + } + + /** Dependency of Callables + One callable "this" depends on another callable "result" + if "this" makes some call to a method that may end up being "result". + */ + FunctionMetrics getADependency() { + result != this and + not non_coupling_method(result) and + exists(Call call | + call.getScope() = this | + exists(FunctionObject callee | + callee.getFunction() = result | + call.getAFlowNode().getFunction().refersTo(callee) + ) + or + exists(Attribute a | + call.getFunc() = a | + unique_root_method(result, a.getName()) or + exists(Name n | a.getObject() = n and n.getId() = "self" | + result.getScope() = this.getScope() and + result.getName() = a.getName() + ) + ) + ) + } + + /** Afferent Coupling + the number of callables that depend on this method. + This is sometimes called the "fan-in" of a method. + */ + int getAfferentCoupling() { + result = count(FunctionMetrics m | m.getADependency() = this ) + } + + /** Efferent Coupling + the number of methods that this method depends on + This is sometimes called the "fan-out" of a method. + */ + int getEfferentCoupling() { + result = count(FunctionMetrics m | this.getADependency() = m) + } + + int getNumberOfParametersWithoutDefault() { + result = this.getPositionalParameterCount() - + count(((FunctionExpr)this.getDefinition()).getArgs().getADefault()) + } + + int getStatementNestingDepth() { + result = max(Stmt s | s.getScope() = this | getNestingDepth(s)) + } + + int getNumberOfCalls() { + result = count(Call c | c.getScope() = this) + } + +} + +/** The metrics for a class */ +class ClassMetrics extends Class { + + /** Gets the total number of lines (including blank lines) + from the definition to the end of the class */ + int getNumberOfLines() { + py_alllines(this, result) + } + + /** Gets the number of lines of code in the class */ + int getNumberOfLinesOfCode() { + py_codelines(this, result) + } + + /** Gets the number of lines of comments in the class */ + int getNumberOfLinesOfComments() { + py_commentlines(this, result) + } + + /** Gets the number of lines of docstrings in the class */ + int getNumberOfLinesOfDocStrings() { + py_docstringlines(this, result) + } + + private predicate dependsOn(Class other) { + other != this and + ( + exists(FunctionMetrics f1, FunctionMetrics f2 | + f1.getADependency() = f2 | + f1.getScope() = this and f2.getScope() = other + ) + or + exists(Function f, Call c, ClassObject cls | + c.getScope() = f and f.getScope() = this | + c.getFunc().refersTo(cls) and + cls.getPyClass() = other + ) + ) + } + + /** The afferent coupling of a class is the number of classes that + * directly depend on it. + */ + int getAfferentCoupling() { + result = count(ClassMetrics t | t.dependsOn(this)) + } + + /** The efferent coupling of a class is the number of classes that + * it directly depends on. + */ + int getEfferentCoupling() { + result = count(ClassMetrics t | this.dependsOn(t)) + } + + int getInheritanceDepth() { + exists(ClassObject cls | + cls.getPyClass() = this | + result = max(classInheritanceDepth(cls)) + ) + } + + /* -------- CHIDAMBER AND KEMERER LACK OF COHESION IN METHODS ------------ */ + + /* The aim of this metric is to try and determine whether a class + represents one abstraction (good) or multiple abstractions (bad). + If a class represents multiple abstractions, it should be split + up into multiple classes. + + In the Chidamber and Kemerer method, this is measured as follows: + n1 = number of pairs of distinct methods in a class that do *not* + have at least one commonly accessed field + n2 = number of pairs of distinct methods in a class that do + have at least one commonly accessed field + lcom = ((n1 - n2)/2 max 0) + + We divide by 2 because each pair (m1,m2) is counted twice in n1 and n2. + + */ + + /** should function f be excluded from the cohesion computation? */ + predicate ignoreLackOfCohesion(Function f) { + f.isInitMethod() or f.isSpecialMethod() + } + + private predicate methodPair(Function m1, Function m2) { + m1.getScope() = this and + m2.getScope() = this and + not this.ignoreLackOfCohesion(m1) and + not this.ignoreLackOfCohesion(m2) and + m1 != m2 + } + + private predicate one_accesses_other(Function m1, Function m2) { + this.methodPair(m1, m2) and + ( + exists(SelfAttributeRead sa | + sa.getName() = m1.getName() and + sa.getScope() = m2 + ) + or + exists(SelfAttributeRead sa | + sa.getName() = m2.getName() and + sa.getScope() = m1 + ) + ) + } + + + /** do m1 and m2 access a common field or one calls the other? */ + private predicate shareField(Function m1, Function m2) { + this.methodPair(m1, m2) and + exists(string name | + exists(SelfAttributeRead sa | + sa.getName() = name and + sa.getScope() = m1 + ) + and + exists(SelfAttributeRead sa | + sa.getName() = name and + sa.getScope() = m2 + ) + ) + } + + private int similarMethodPairs() { + result = count(Function m1, Function m2 | + this.methodPair(m1, m2) and + (this.shareField(m1, m2) or this.one_accesses_other(m1, m2)) + ) / 2 + } + + private int methodPairs() { + result = count(Function m1, Function m2 | this.methodPair(m1, m2)) / 2 + } + + /** return Chidamber and Kemerer Lack of Cohesion */ + int getLackOfCohesionCK() { + exists(int n | + n = this.methodPairs() - 2 * this.similarMethodPairs() + and + result = n.maximum(0) + ) + } + + private predicate similarMethodPairDag(Function m1, Function m2, int line) { + (this.shareField(m1, m2) or this.one_accesses_other(m1, m2)) and + line = m1.getLocation().getStartLine() and + line < m2.getLocation().getStartLine() + } + + private predicate subgraph(Function m, int line) { + this.similarMethodPairDag(m, _, line) and not this.similarMethodPairDag(_, m, _) + or + exists(Function other | this.subgraph(other, line) | + this.similarMethodPairDag(other, m, _) or + this.similarMethodPairDag(m, other, _) + ) + } + + predicate unionSubgraph(Function m, int line) { + line = min(int l | this.subgraph(m, l)) + } + + /** return Hitz and Montazeri Lack of Cohesion */ + int getLackOfCohesionHM() { + result = count(int line | + this.unionSubgraph(_, line) + ) + } + +} + +private int classInheritanceDepth(ClassObject cls) { + /* Prevent run-away recursion in case of circular inheritance */ + not cls.getASuperType() = cls + and + ( + exists(ClassObject sup | + cls.getABaseType() = sup | + result = classInheritanceDepth(sup) + 1 + ) + or + not exists(cls.getABaseType()) and ( + major_version() = 2 and result = 0 + or + major_version() > 2 and result = 1 + ) + ) +} + +class ModuleMetrics extends Module { + + /** Gets the total number of lines (including blank lines) in the module */ + int getNumberOfLines() { + py_alllines(this, result) + } + + /** Gets the number of lines of code in the module */ + int getNumberOfLinesOfCode() { + py_codelines(this, result) + } + + /** Gets the number of lines of comments in the module */ + int getNumberOfLinesOfComments() { + py_commentlines(this, result) + } + + /** Gets the number of lines of docstrings in the module */ + int getNumberOfLinesOfDocStrings() { + py_docstringlines(this, result) + } + + /** The afferent coupling of a class is the number of classes that + * directly depend on it. + */ + int getAfferentCoupling() { + result = count(ModuleMetrics t | t.dependsOn(this)) + } + + /** The efferent coupling of a class is the number of classes that + * it directly depends on. + */ + int getEfferentCoupling() { + result = count(ModuleMetrics t | this.dependsOn(t)) + } + + private predicate dependsOn(Module other) { + other != this and + ( + exists(FunctionMetrics f1, FunctionMetrics f2 | + f1.getADependency() = f2 | + f1.getEnclosingModule() = this and f2.getEnclosingModule() = other + ) + or + exists(Function f, Call c, ClassObject cls | + c.getScope() = f and f.getScope() = this | + c.getFunc().refersTo(cls) and + cls.getPyClass().getEnclosingModule() = other + ) + ) + } + +} + +/** Helpers for coupling */ + +predicate unique_root_method(Function func, string name) { + name = func.getName() and + not exists(FunctionObject f, FunctionObject other | + f.getFunction() = func and + other.getName() = name | + not other.overrides(f) + ) +} + +predicate non_coupling_method(Function f) { + f.isSpecialMethod() or + f.isInitMethod() or + f.getName() = "close" or + f.getName() = "write" or + f.getName() = "read" or + f.getName() = "get" or + f.getName() = "set" +} + +private int getNestingDepth(Stmt s) { + not exists(Stmt outer | outer.getASubStatement() = s) and result = 1 + or + exists(Stmt outer | + outer.getASubStatement() = s | + if s.(If).isElif() or s instanceof ExceptStmt then + /* If statement is an `elif` or `except` then it is not indented relative to its parent */ + result = getNestingDepth(outer) + else + result = getNestingDepth(outer) + 1 + ) +} + diff --git a/python/ql/src/semmle/python/Module.qll b/python/ql/src/semmle/python/Module.qll new file mode 100644 index 00000000000..d42fe53bbc5 --- /dev/null +++ b/python/ql/src/semmle/python/Module.qll @@ -0,0 +1,231 @@ +import python +private import semmle.python.pointsto.PointsTo + +/** A module. This is the top level element in an AST, corresponding to a source file. + * It is also a Scope; the scope of global variables. */ +class Module extends Module_, Scope, AstNode { + + override string toString() { + result = this.getKind() + " " + this.getName() + or + /* No name is defined, which means that this is not on an import path. So it must be a script */ + not exists(this.getName()) and not this.isPackage() and + result = "Script " + this.getFile().getShortName() + } + + /** This method will be deprecated in the next release. Please use `getEnclosingScope()` instead. + * The enclosing scope of this module (always none) */ + override Scope getScope() { + none() + } + + /** The enclosing scope of this module (always none) */ + override Scope getEnclosingScope() { + none() + } + + /** Gets the statements forming the body of this module */ + override StmtList getBody() { + result = Module_.super.getBody() + } + + /** Gets the nth statement of this module */ + override Stmt getStmt(int n) { + result = Module_.super.getStmt(n) + } + + /** Gets a top-level statement in this module */ + override Stmt getAStmt() { + result = Module_.super.getAStmt() + } + + /** Gets the name of this module */ + override string getName() { + result = Module_.super.getName() and legalDottedName(result) + or + not exists(Module_.super.getName()) and + result = moduleNameFromFile(this.getPath()) + } + + /** Gets this module */ + override Module getEnclosingModule() { + result = this + } + + /** Gets the __init__ module of this module if the module is a package and it has an __init__ module */ + Module getInitModule() { + /* this.isPackage() and */ result.getName() = this.getName() + ".__init__" + } + + /** Whether this module is a package initializer */ + predicate isPackageInit() { + this.getName().matches("%\\_\\_init\\_\\_") and not this.isPackage() + } + + /** Gets a name exported by this module, that is the names that will be added to a namespace by 'from this-module import *' */ + string getAnExport() { + py_exports(this, result) + or + not PointsTo::module_defines_name(this, "__all__") and PointsTo::module_defines_name(this, result) + } + + /** Gets the source file for this module */ + File getFile() { + py_module_path(this, result) + } + + /** Gets the source file or folder for this module or package */ + Container getPath() { + py_module_path(this, result) + } + + /** Whether this is a package */ + predicate isPackage() { + this.getPath() instanceof Folder + } + + /** Gets the package containing this module (or parent package if this is a package) */ + Module getPackage() { + this.getName().matches("%.%") and + result.getName() = getName().regexpReplaceAll("\\.[^.]*$", "") + } + + /** Gets the name of the package containing this module */ + string getPackageName() { + this.getName().matches("%.%") and + result = getName().regexpReplaceAll("\\.[^.]*$", "") + } + + /** Gets the metrics for this module */ + ModuleMetrics getMetrics() { + result = this + } + + /** Use ModuleObject.getAnImportedModule() instead. + * Gets a module imported by this module */ + deprecated Module getAnImportedModule() { + result.getName() = this.getAnImportedModuleName() + } + + string getAnImportedModuleName() { + exists(Import i | i.getEnclosingModule() = this | result = i.getAnImportedModuleName()) + or + exists(ImportStar i | i.getEnclosingModule() = this | result = i.getImportedModuleName()) + } + + override Location getLocation() { + py_scope_location(result, this) + } + + /** Gets a child module or package of this package */ + Module getSubModule(string name) { + result.getPackage() = this and + name = result.getName().regexpReplaceAll(".*\\.", "") + } + + /** Whether name is declared in the __all__ list of this module */ + predicate declaredInAll(string name) + { + exists(AssignStmt a, GlobalVariable all | + a.defines(all) and a.getScope() = this and + all.getId() = "__all__" and ((List)a.getValue()).getAnElt().(StrConst).getText() = name + ) + } + + override AstNode getAChildNode() { + result = this.getAStmt() + } + + predicate hasFromFuture(string attr) { + exists(Import i, ImportMember im, ImportExpr ie, Alias a, Name name | + im.getModule() = ie and ie.getName() = "__future__" and + a.getAsname() = name and name.getId() = attr and + i.getASubExpression() = im and + i.getAName() = a and + i.getEnclosingModule() = this + ) + } + + /** Gets the path element from which this module was loaded. */ + Container getLoadPath() { + result = this.getPath().getImportRoot() + } + + /** Holds if this module is in the standard library for version `major.minor` */ + predicate inStdLib(int major, int minor) { + this.getLoadPath().isStdLibRoot(major, minor) + } + + /** Holds if this module is in the standard library */ + predicate inStdLib() { + this.inStdLib(_, _) + } + + override + predicate containsInScope(AstNode inner) { + Scope.super.containsInScope(inner) + } + + override + predicate contains(AstNode inner) { + Scope.super.contains(inner) + } + + /** Gets the kind of this module. */ + override string getKind() { + if this.isPackage() then + result = "Package" + else ( + not exists(Module_.super.getKind()) and result = "Module" + or + result = Module_.super.getKind() + ) + } + +} + +bindingset[name] +private predicate legalDottedName(string name) { + name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*(\\.(\\p{L}|_)(\\p{L}|\\d|_)*)*") +} + +bindingset[name] +private predicate legalShortName(string name) { + name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*") +} + +/** Holds if `f` is potentially a source package. + * Does it have an __init__.py file and is it within the source archive? + */ +private predicate isPotentialSourcePackage(Folder f) { + f.getRelativePath() != "" and + exists(f.getFile("__init__.py")) +} + +private string moduleNameFromBase(Container file) { + file instanceof Folder and result = file.getBaseName() + or + file instanceof File and result = file.getStem() +} + +private string moduleNameFromFile(Container file) { + exists(string basename | + basename = moduleNameFromBase(file) and + legalShortName(basename) + | + result = moduleNameFromFile(file.getParent()) + "." + basename + or + isPotentialSourcePackage(file) and result = file.getStem() and + (not isPotentialSourcePackage(file.getParent()) or not legalShortName(file.getParent().getBaseName())) + or + result = file.getStem() and file.getParent() = file.getImportRoot() + or + result = file.getStem() and isStubRoot(file.getParent()) + ) +} + +private predicate isStubRoot(Folder f) { + not f.getParent*().isImportRoot() and + f.getAbsolutePath().matches("%/data/python/stubs") +} + diff --git a/python/ql/src/semmle/python/Operations.qll b/python/ql/src/semmle/python/Operations.qll new file mode 100644 index 00000000000..3ea0287dac7 --- /dev/null +++ b/python/ql/src/semmle/python/Operations.qll @@ -0,0 +1,344 @@ +import python + +/** Base class for operators */ +class Operator extends Operator_ { + + /** Gets the name of the special method used to implement this operator */ + string getSpecialMethodName() { none() } + +} + +/* Unary Expression and its operators */ + +/** A unary expression: (`+x`), (`-x`) or (`~x`) */ +class UnaryExpr extends UnaryExpr_ { + + override Expr getASubExpression() { + result = this.getOperand() + } + +} + +/** A unary operator: `+`, `-`, `~` or `not` */ +class Unaryop extends Unaryop_ { + + /** Gets the name of the special method used to implement this operator */ + string getSpecialMethodName() { none() } + +} + +/** An invert (`~`) unary operator */ +class Invert extends Invert_ { + + override string getSpecialMethodName() { result = "__invert__" } + +} + +/** A positive (`+`) unary operator */ +class UAdd extends UAdd_ { + + override string getSpecialMethodName() { result = "__pos__" } + +} + +/** A negation (`-`) unary operator */ +class USub extends USub_ { + + override string getSpecialMethodName() { result = "__neg__" } + +} + +/** A `not` unary operator */ +class Not extends Not_ { + + override string getSpecialMethodName() { none() } + +} + + +/* Binary Operation and its operators */ + +/** A binary expression, such as `x + y` */ +class BinaryExpr extends BinaryExpr_ { + + override Expr getASubExpression() { + result = this.getLeft() or result = this.getRight() + } + +} + +/** A power (`**`) binary operator */ +class Pow extends Pow_ { + + override string getSpecialMethodName() { result = "__pow__" } + +} + +/** A right shift (`>>`) binary operator */ +class RShift extends RShift_ { + + override string getSpecialMethodName() { result = "__rshift__" } + +} + +/** A subtract (`-`) binary operator */ +class Sub extends Sub_ { + + override string getSpecialMethodName() { result = "__sub__" } + +} + +/** A bitwise and (`&`) binary operator */ +class BitAnd extends BitAnd_ { + + override string getSpecialMethodName() { result = "__and__" } + +} + +/** A bitwise or (`|`) binary operator */ +class BitOr extends BitOr_ { + + override string getSpecialMethodName() { result = "__or__" } + +} + +/** A bitwise exclusive-or (`^`) binary operator */ +class BitXor extends BitXor_ { + + override string getSpecialMethodName() { result = "__xor__" } + +} + +/** An add (`+`) binary operator */ +class Add extends Add_ { + + override string getSpecialMethodName() { result = "__add__" } +} + +/** An (true) divide (`/`) binary operator */ +class Div extends Div_ { + + override string getSpecialMethodName() { + result = "__truediv__" + or + major_version() = 2 and result = "__div__" + } +} + +/** An floor divide (`//`) binary operator */ +class FloorDiv extends FloorDiv_ { + + override string getSpecialMethodName() { result = "__floordiv__" } + +} + +/** A left shift (`<<`) binary operator */ +class LShift extends LShift_ { + + override string getSpecialMethodName() { result = "__lshift__" } + +} + +/** A modulo (`%`) binary operator, which includes string formatting */ +class Mod extends Mod_ { + + override string getSpecialMethodName() { result = "__mod__" } + +} + +/** A multiplication (`*`) binary operator */ +class Mult extends Mult_ { + + override string getSpecialMethodName() { result = "__mul__" } + +} + +/** A matrix multiplication (`@`) binary operator */ +class MatMult extends MatMult_ { + + override string getSpecialMethodName() { result = "__matmul__" } + +} + +/* Comparison Operation and its operators */ + +/** A comparison operation, such as `x`) comparison operator */ +class Gt extends Gt_ { + + override string getSymbol() { + result = ">" + } + + override string getSpecialMethodName() { result = "__gt__" } + +} + +/** A greater than or equals (`>=`) comparison operator */ +class GtE extends GtE_ { + + override string getSymbol() { + result = ">=" + } + + override string getSpecialMethodName() { result = "__ge__" } + +} + +/** An `in` comparison operator */ +class In extends In_ { + + override string getSymbol() { + result = "in" + } + +} + +/** An `is` comparison operator */ +class Is extends Is_ { + + override string getSymbol() { + result = "is" + } + +} + +/** An `is not` comparison operator */ +class IsNot extends IsNot_ { + + override string getSymbol() { + result = "is not" + } + +} + +/** An equals (`==`) comparison operator */ +class Eq extends Eq_ { + + override string getSymbol() { + result = "==" + } + + override string getSpecialMethodName() { result = "__eq__" } + +} + +/** A less than (`<`) comparison operator */ +class Lt extends Lt_ { + + override string getSymbol() { + result = "<" + } + + override string getSpecialMethodName() { result = "__lt__" } + +} + +/** A less than or equals (`<=`) comparison operator */ +class LtE extends LtE_ { + + override string getSymbol() { + result = "<=" + } + + override string getSpecialMethodName() { result = "__le__" } + +} + +/** A not equals (`!=`) comparison operator */ +class NotEq extends NotEq_ { + + override string getSymbol() { + result = "!=" + } + + override string getSpecialMethodName() { result = "__ne__" } + +} + +/** An `not in` comparison operator */ +class NotIn extends NotIn_ { + + override string getSymbol() { + result = "not in" + } + +} + +/* Boolean Operation (and/or) and its operators */ + +/** A boolean shortcut (and/or) operation */ +class BoolExpr extends BoolExpr_ { + + override Expr getASubExpression() { + result = this.getAValue() + } + + string getOperator() { + this.getOp() instanceof And and result = "and" + or + this.getOp() instanceof Or and result = "or" + } + + /** Whether part evaluates to partIsTrue if this evaluates to wholeIsTrue */ + predicate impliesValue(Expr part, boolean partIsTrue, boolean wholeIsTrue) { + if this.getOp() instanceof And then ( + wholeIsTrue = true and partIsTrue = true and part = this.getAValue() + or + wholeIsTrue = true and ((BoolExpr)this.getAValue()).impliesValue(part, partIsTrue, true) + ) else ( + wholeIsTrue = false and partIsTrue = false and part = this.getAValue() + or + wholeIsTrue = false and ((BoolExpr)this.getAValue()).impliesValue(part, partIsTrue, false) + ) + } + +} + +/** A short circuit boolean operator, and/or */ +class Boolop extends Boolop_ { + +} + +/** An `and` boolean operator */ +class And extends And_ { + +} + +/** An `or` boolean operator */ +class Or extends Or_ { + +} diff --git a/python/ql/src/semmle/python/SSA.qll b/python/ql/src/semmle/python/SSA.qll new file mode 100644 index 00000000000..0471a673bf4 --- /dev/null +++ b/python/ql/src/semmle/python/SSA.qll @@ -0,0 +1,240 @@ +/** SSA library */ + +import python + +/** A single static assignment variable. + * An SSA variable is a variable which is only assigned once (statically). + * SSA variables can be defined as normal variables or by a phi node which can occur at joins in the flow graph. + * Definitions without uses do not have a SSA variable. + */ +class SsaVariable extends @py_ssa_var{ + + SsaVariable() { + py_ssa_var(this, _) + } + + /** Gets the source variable */ + Variable getVariable() { + py_ssa_var(this, result) + } + + /** Gets a use of this variable */ + ControlFlowNode getAUse() { + py_ssa_use(result, this) + } + + /** Gets the definition (which may be a deletion) of this SSA variable */ + ControlFlowNode getDefinition() { + py_ssa_defn(this, result) + } + + /** Gets an argument of the phi function defining this variable. + * This predicate uses the raw SSA form produced by the extractor. + * In general, you should use `getAPrunedPhiInput()` instead. */ + SsaVariable getAPhiInput() { + py_ssa_phi(this, result) + } + + /** Gets the edge(s) (result->this.getDefinition()) on which the SSA variable 'input' defines this SSA variable. + * For each incoming edge `X->B`, where `B` is the basic block containing this phi-node, only one of the input SSA variables + * for this phi-node is live. This predicate returns the predecessor block such that the variable 'input' + * is the live variable on the edge result->B. + */ + BasicBlock getPredecessorBlockForPhiArgument(SsaVariable input) { + input = this.getAPhiInput() and + result = this.getAPredecessorBlockForPhi() and + input.getDefinition().getBasicBlock().dominates(result) and + /* Beware the case where an SSA variable that is an input on one edge dominates another edge. + * Consider (in SSA form): + * x0 = 0 + * if cond: + * x1 = 1 + * x2 = phi(x0, x1) + * use(x2) + * + * The definition of x0 dominates the exit from the block x1=1, even though it does not reach it. + * Hence we need to check that no other definition dominates the edge and actually reaches it. + * Note that if a dominates c and b dominates c, then either a dominates b or vice-versa. + */ + not exists(SsaVariable other, BasicBlock other_def | + not other = input and + other = this.getAPhiInput() and + other_def = other.getDefinition().getBasicBlock() + | + other_def.dominates(result) and + input.getDefinition().getBasicBlock().strictlyDominates(other_def) + ) + } + + /** Gets an argument of the phi function defining this variable, pruned of unlikely edges. */ + SsaVariable getAPrunedPhiInput() { + result = this.getAPhiInput() and + exists(BasicBlock incoming | incoming = this.getPredecessorBlockForPhiArgument(result) | + not incoming.getLastNode().(RaisingNode).unlikelySuccessor(this.getDefinition()) + ) + } + + /** Gets a variable that ultimately defines this variable and is not itself defined by another variable */ + SsaVariable getAnUltimateDefinition() { + result = this and not exists(this.getAPhiInput()) + or + result = this.getAPhiInput().getAnUltimateDefinition() + } + + string toString() { + result = "SSA Variable " + this.getId() + } + + Location getLocation() { + result = this.getDefinition().getLocation() + } + + /** Gets the id (name) of this variable */ + string getId() { + result = this.getVariable().getId() + } + + /** Gets the incoming edges for a Phi node. */ + private BasicBlock getAPredecessorBlockForPhi() { + exists(getAPhiInput()) and + result.getASuccessor() = this.getDefinition().getBasicBlock() + } + + /** Gets the incoming edges for a Phi node, pruned of unlikely edges. */ + private BasicBlock getAPrunedPredecessorBlockForPhi() { + result = this.getAPredecessorBlockForPhi() and + not result.unlikelySuccessor(this.getDefinition().getBasicBlock()) + } + + /** Whether it is possible to reach a use of this variable without passing a definition */ + predicate reachableWithoutDefinition() { + not exists(this.getDefinition()) and not py_ssa_phi(this, _) + or + exists(SsaVariable var | var = this.getAPhiInput() | var.reachableWithoutDefinition()) + or + /* For phi-nodes, there must be a corresponding phi-input for each control-flow + * predecessor. Otherwise, the variable will be undefined on that incoming edge. + * WARNING: the same phi-input may cover multiple predecessors, so this check + * cannot be done by counting. + */ + exists(BasicBlock incoming | + incoming = this.getAPredecessorBlockForPhi() and + not this.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming) + ) + } + + /** Whether this variable may be undefined */ + predicate maybeUndefined() { + not exists(this.getDefinition()) and not py_ssa_phi(this, _) and not this.implicitlyDefined() + or + this.getDefinition().isDelete() + or + exists(SsaVariable var | var = this.getAPrunedPhiInput() | var.maybeUndefined()) + or + /* For phi-nodes, there must be a corresponding phi-input for each control-flow + * predecessor. Otherwise, the variable will be undefined on that incoming edge. + * WARNING: the same phi-input may cover multiple predecessors, so this check + * cannot be done by counting. + */ + exists(BasicBlock incoming | + reaches_end(incoming) and + incoming = this.getAPrunedPredecessorBlockForPhi() and + not this.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming) + ) + } + + private predicate implicitlyDefined() { + not exists(this.getDefinition()) and not py_ssa_phi(this, _) and + exists(GlobalVariable var | this.getVariable() = var | + globallyDefinedName(var.getId()) or + var.getId() = "__path__" and ((Module)var.getScope()).isPackageInit() + ) + } + + /** Gets the global variable that is accessed if this local is undefined. + * Only applies to local variables in class scopes. + */ + GlobalVariable getFallbackGlobal() { + exists(LocalVariable local, Class cls | this.getVariable() = local | + local.getScope() = cls and + result.getScope() = cls.getScope() and + result.getId() = local.getId() and + not exists(this.getDefinition()) + ) + } + + /* Whether this SSA variable is the first parameter of a method + * (regardless of whether it is actually called self or not) + */ + predicate isSelf() { + exists(Function func | + func.isMethod() + and + this.getDefinition().getNode() = func.getArg(0) + ) + } +} + +private predicate reaches_end(BasicBlock b) { + not exits_early(b) + and + ( + /* Entry point */ + not exists(BasicBlock prev | prev.getASuccessor() = b) + or + exists(BasicBlock prev | prev.getASuccessor() = b | + reaches_end(prev) + ) + ) +} + +private predicate exits_early(BasicBlock b) { + exists(FunctionObject f | + f.neverReturns() and + f.getACall().getBasicBlock() = b + ) +} + +private predicate gettext_installed() { + // Good enough (and fast) approximation + exists(Module m | m.getName() = "gettext") +} + +private predicate builtin_constant(string name) { + exists(builtin_object(name)) + or + name = "WindowsError" + or + name = "_" and gettext_installed() +} + +private predicate auto_name(string name) { + name = "__file__" or name = "__builtins__" or name = "__name__" +} + +/** Whether this name is (almost) always defined, ie. it is a builtin or VM defined name */ +predicate globallyDefinedName(string name) { + builtin_constant(name) or auto_name(name) +} + +/** An SSA variable that is backed by a global variable */ +class GlobalSsaVariable extends EssaVariable { + + GlobalSsaVariable() { + this.getSourceVariable() instanceof GlobalVariable + } + + GlobalVariable getVariable() { + result = this.getSourceVariable() + } + + string getId() { + result = this.getVariable().getId() + } + + override string toString() { + result = "GSSA Variable " + this.getId() + } + + +} diff --git a/python/ql/src/semmle/python/Scope.qll b/python/ql/src/semmle/python/Scope.qll new file mode 100755 index 00000000000..54274623216 --- /dev/null +++ b/python/ql/src/semmle/python/Scope.qll @@ -0,0 +1,181 @@ +import python + +/** A Scope. A scope is the lexical extent over which all identifiers with the same name refer to the same variable. + * Modules, Classes and Functions are all Scopes. There are no other scopes. + * The scopes for expressions that create new scopes, lambdas and comprehensions, are handled by creating an anonymous Function. */ +class Scope extends Scope_ { + + Module getEnclosingModule() { + result = this.getEnclosingScope().getEnclosingModule() + } + + /** This method will be deprecated in the next release. Please use `getEnclosingScope()` instead. + * The reason for this is to avoid confusion around use of `x.getScope+()` where `x` might be an + * `AstNode` or a `Variable`. Forcing the users to write `x.getScope().getEnclosingScope*()` ensures that + * the apparent semantics and the actual semantics coincide. + * [ Gets the scope enclosing this scope (modules have no enclosing scope) ] + */ + Scope getScope() { + none() + } + + /** Gets the scope enclosing this scope (modules have no enclosing scope) */ + Scope getEnclosingScope() { + none() + } + + /** Gets the statements forming the body of this scope */ + StmtList getBody() { + none() + } + + /** Gets the nth statement of this scope */ + Stmt getStmt(int n) { + none() + } + + /** Gets a top-level statement in this scope */ + Stmt getAStmt() { + none() + } + + Location getLocation() { + none() + } + + /** Gets the name of this scope */ + string getName() { + py_strs(result, this, 0) + } + + /** Gets the docstring for this scope */ + StrConst getDocString() { + result = ((ExprStmt)this.getStmt(0)).getValue() + } + + /** Gets the entry point into this Scope's control flow graph */ + ControlFlowNode getEntryNode() { + py_scope_flow(result, this, -1) + } + + /** Gets the non-explicit exit from this Scope's control flow graph */ + ControlFlowNode getFallthroughNode() { + py_scope_flow(result, this, 0) + } + + /** Gets the exit of this scope following from a return statement */ + ControlFlowNode getReturnNode() { + py_scope_flow(result, this, 2) + } + + /** Gets an exit from this Scope's control flow graph */ + ControlFlowNode getAnExitNode() { + exists (int i | py_scope_flow(result, this, i) and i >= 0) + } + + /** Gets an exit from this Scope's control flow graph, + * that does not result from an exception */ + ControlFlowNode getANormalExit() { + result = this.getFallthroughNode() + or + result = this.getReturnNode() + } + + /** Holds if this a top-level (non-nested) class or function */ + predicate isTopLevel() { + this.getEnclosingModule() = this.getEnclosingScope() + } + + /** Holds if this scope is deemed to be public */ + predicate isPublic() { + /* Not inside a function */ + not this.getEnclosingScope() instanceof Function and + /* Not implicitly private */ + this.getName().charAt(0) != "_" and + ( + this instanceof Module + or + exists(Module m | + m = this.getEnclosingScope() and m.isPublic() | + /* If the module has an __all__, is this in it */ + not exists(m.getAnExport()) + or + m.getAnExport() = this.getName() + ) + or + exists(Class c | c = this.getEnclosingScope() | + this instanceof Function and + c.isPublic() + ) + ) + } + + predicate contains(AstNode a) { + this.getBody().contains(a) + or + exists(Scope inner | inner.getEnclosingScope() = this | inner.contains(a)) + } + + /** Holds if this scope can be expected to execute before `other`. + * Modules precede functions and methods in those modules + * `__init__` precedes other methods. `__enter__` precedes `__exit__`. + * NOTE that this is context-insensitive, so a module "precedes" a function + * in that module, even if that function is called from the module scope. + */ + predicate precedes(Scope other) { + exists(Function f, string name | + f = other and name = f.getName() | + if f.isMethod() then ( + // The __init__ method is preceded by the enclosing module + this = f.getEnclosingModule() and name = "__init__" + or + exists(Class c, string pred_name | + // __init__ -> __enter__ -> __exit__ + // __init__ -> other-methods + f.getScope() = c and ( + pred_name = "__init__" and not name = "__init__" and not name = "__exit__" + or + pred_name = "__enter__" and name = "__exit__" + ) + | + this.getScope() = c and + pred_name = this.(Function).getName() + or + not exists(Function pre_func | + pre_func.getName() = pred_name and + pre_func.getScope() = c + ) and this = other.getEnclosingModule() + ) + ) else ( + // Normal functions are preceded by the enclosing module + this = f.getEnclosingModule() + ) + ) + } + + /** Gets the evaluation scope for code in this (lexical) scope. + * This is usually the scope itself, but may be an enclosing scope. + * Notably, for list comprehensions in Python 2. + */ + Scope getEvaluatingScope() { + result = this + } + + /** Holds if this scope is in the source archive, + * that is it is part of the code specified, not library code + */ + predicate inSource() { + exists(this.getEnclosingModule().getFile().getRelativePath()) + } + + Stmt getLastStatement() { + result = this.getBody().getLastItem().getLastStatement() + } + + /** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */ + predicate containsInScope(AstNode inner) { + this.getBody().contains(inner) and + this = inner.getScope() + } + +} diff --git a/python/ql/src/semmle/python/SelfAttribute.qll b/python/ql/src/semmle/python/SelfAttribute.qll new file mode 100644 index 00000000000..9e165a4bb54 --- /dev/null +++ b/python/ql/src/semmle/python/SelfAttribute.qll @@ -0,0 +1,241 @@ +/** Utilities to support queries about instance attribute accesses of + * the form `self.attr`. + */ + +import python +private import semmle.python.pointsto.PointsTo +private import semmle.python.pointsto.Filters + +/** An attribute access where the left hand side of the attribute expression + * is `self`. + */ +class SelfAttribute extends Attribute { + + SelfAttribute() { + self_attribute(this, _) + } + + Class getClass() { + self_attribute(this, result) + } + +} + +/** Whether variable 'self' is the self variable in method 'method' */ +private predicate self_variable(Function method, Variable self) { + self.isParameter() and + method.isMethod() and + method.getArg(0).asName() = self.getAnAccess() +} + +/** Whether attribute is an access of the form `self.attr` in the body of the class 'cls' */ +private predicate self_attribute(Attribute attr, Class cls) { + exists(Function f, Variable self | + self_variable(f, self) | + self.getAnAccess() = attr.getObject() and + cls = f.getScope+() + ) +} + +/** Helper class for UndefinedClassAttribute.ql & MaybeUndefinedClassAttribute.ql */ +class SelfAttributeRead extends SelfAttribute { + + SelfAttributeRead() { + this.getCtx() instanceof Load and + /* Be stricter for loads. + * We want to generous as to what is defined (ie stores), + * but strict as to what needs to be defined (ie loads). + */ + exists(ClassObject cls, FunctionObject func | + cls.declaredAttribute(_) = func | + func.getFunction() = this.getScope() and + cls.getPyClass() = this.getClass() + ) + } + + predicate guardedByHasattr() { + exists(Variable var, ControlFlowNode n | + var.getAUse() = this.getObject().getAFlowNode() and + hasattr(n, var.getAUse(), this.getName()) and + n.strictlyDominates(this.getAFlowNode()) + ) + } + + pragma [noinline] predicate locallyDefined() { + exists(SelfAttributeStore store | + this.getName() = store.getName() and + this.getScope() = store.getScope() | + store.getAFlowNode().strictlyDominates(this.getAFlowNode()) + ) + } + +} + +class SelfAttributeStore extends SelfAttribute { + + SelfAttributeStore() { + this.getCtx() instanceof Store + } + + Expr getAssignedValue() { + exists(Assign a | a.getATarget() = this | + result = a.getValue() + ) + } + +} + +private Object object_getattribute() { + py_cmembers_versioned(theObjectType(), "__getattribute__", result, major_version().toString()) +} + +/** Helper class for UndefinedClassAttribute.ql and MaybeUndefinedClassAttribute.ql */ +class CheckClass extends ClassObject { + + private predicate ofInterest() { + not this.unknowableAttributes() and + not this.getPyClass().isProbableMixin() and + this.getPyClass().isPublic() and + not this.getPyClass().getScope() instanceof Function and + not this.probablyAbstract() and + not this.declaresAttribute("__new__") and + not this.selfDictAssigns() and + not this.lookupAttribute("__getattribute__") != object_getattribute() and + not this.hasAttribute("__getattr__") and + not this.selfSetattr() and + /* If class overrides object.__init__, but we can't resolve it to a Python function then give up */ + forall(ClassObject sup | + sup = this.getAnImproperSuperType() and + sup.declaresAttribute("__init__") and + not sup = theObjectType() | + sup.declaredAttribute("__init__") instanceof PyFunctionObject + ) + } + + predicate alwaysDefines(string name) { + auto_name(name) or + this.hasAttribute(name) or + this.getAnImproperSuperType().assignedInInit(name) or + this.getMetaClass().assignedInInit(name) + } + + predicate sometimesDefines(string name) { + this.alwaysDefines(name) or + exists(SelfAttributeStore sa | + sa.getScope().getScope+() = this.getAnImproperSuperType().getPyClass() | + name = sa.getName() + ) + } + + private predicate selfDictAssigns() { + exists(Assign a, SelfAttributeRead self_dict, Subscript sub | + self_dict.getName() = "__dict__" and + ( + self_dict = sub.getObject() + or + /* Indirect assignment via temporary variable */ + exists(SsaVariable v | + v.getAUse() = sub.getObject().getAFlowNode() and + v.getDefinition().(DefinitionNode).getValue() = self_dict.getAFlowNode() + ) + ) and + a.getATarget() = sub and + exists(FunctionObject meth | meth = this.lookupAttribute(_) and a.getScope() = meth.getFunction()) + ) + } + + pragma [nomagic] + private predicate monkeyPatched(string name) { + exists(Attribute a | + a.getCtx() instanceof Store and + PointsTo::points_to(a.getObject().getAFlowNode(), _, this, _, _) and a.getName() = name + ) + } + + private predicate selfSetattr() { + exists(Call c, Name setattr, Name self, Function method | + ( method.getScope() = this.getPyClass() or + method.getScope() = this.getASuperType().getPyClass() + ) and + c.getScope() = method and + c.getFunc() = setattr and + setattr.getId() = "setattr" and + c.getArg(0) = self and + self.getId() = "self" + ) + } + + predicate interestingUndefined(SelfAttributeRead a) { + exists(string name | name = a.getName() | + interestingContext(a, name) and + not this.definedInBlock(a.getAFlowNode().getBasicBlock(), name) + ) + } + + private predicate interestingContext(SelfAttributeRead a, string name) { + name = a.getName() and + this.ofInterest() and + this.getPyClass() = a.getScope().getScope() and + not a.locallyDefined() and + not a.guardedByHasattr() and + a.getScope().isPublic() and + not this.monkeyPatched(name) and + not attribute_assigned_in_method(lookupAttribute("setUp"), name) + } + + private predicate probablyAbstract() { + this.getName().matches("Abstract%") + or + this.isAbstract() + } + + private pragma[nomagic] predicate definitionInBlock(BasicBlock b, string name) { + exists(SelfAttributeStore sa | + sa.getAFlowNode().getBasicBlock() = b and sa.getName() = name and sa.getClass() = this.getPyClass() + ) + or + exists(FunctionObject method | this.lookupAttribute(_) = method | + attribute_assigned_in_method(method, name) and + b = method.getACall().getBasicBlock() + ) + } + + private pragma[nomagic] predicate definedInBlock(BasicBlock b, string name) { + // manual specialisation: this is only called from interestingUndefined, + // so we can push the context in from there, which must apply to a + // SelfAttributeRead in the same scope + exists(SelfAttributeRead a | + a.getScope() = b.getScope() and name = a.getName() | + interestingContext(a, name) + ) + and + this.definitionInBlock(b, name) + or + exists(BasicBlock prev | this.definedInBlock(prev, name) and prev.getASuccessor() = b) + } + +} + +private predicate attr_assigned_in_method_arg_n(FunctionObject method, string name, int n) { + exists(SsaVariable param | + method.getFunction().getArg(n).asName() = param.getDefinition().getNode() + | + exists(AttrNode attr | + attr.getObject(name) = param.getAUse() and + attr.isStore() + ) + or + exists(CallNode call, FunctionObject callee, int m | + callee.getArgumentForCall(call, m) = param.getAUse() and + attr_assigned_in_method_arg_n(callee, name, m) + ) + ) +} + +predicate attribute_assigned_in_method(FunctionObject method, string name) { + attr_assigned_in_method_arg_n(method, name, 0) +} + +private predicate auto_name(string name) { + name = "__class__" or name = "__dict__" +} diff --git a/python/ql/src/semmle/python/Stmts.qll b/python/ql/src/semmle/python/Stmts.qll new file mode 100644 index 00000000000..b6e0a2b7945 --- /dev/null +++ b/python/ql/src/semmle/python/Stmts.qll @@ -0,0 +1,546 @@ +import python + +/** A statement */ +class Stmt extends Stmt_, AstNode { + + /** Gets the scope immediately enclosing this statement */ + override Scope getScope() { + py_scopes(this, result) + } + + override string toString() { + result = "Stmt" + } + + /** Gets the module enclosing this statement */ + Module getEnclosingModule() { + result = this.getScope().getEnclosingModule() + } + + override Location getLocation() { + result = Stmt_.super.getLocation() + } + + /** Gets an immediate (non-nested) sub-expression of this statement */ + Expr getASubExpression() { + none() + } + + /** Gets an immediate (non-nested) sub-statement of this statement */ + Stmt getASubStatement() { + none() + } + + override AstNode getAChildNode() { + result = this.getASubExpression() + or + result = this.getASubStatement() + } + + private ControlFlowNode possibleEntryNode() { + result.getNode() = this or + this.containsInScope(result.getNode()) + } + + + /** Gets a control flow node for an entry into this statement. + */ + ControlFlowNode getAnEntryNode() { + result = this.possibleEntryNode() and + exists(ControlFlowNode pred | + pred.getASuccessor() = result and + not pred = this.possibleEntryNode() + ) + } + + /** Holds if this statement cannot be reached */ + predicate isUnreachable() { + not exists(this.getAnEntryNode()) + or + exists(If ifstmt | + ifstmt.getTest().(ImmutableLiteral).booleanValue() = false and ifstmt.getBody().contains(this) + or + ifstmt.getTest().(ImmutableLiteral).booleanValue() = true and ifstmt.getOrelse().contains(this) + ) + or + exists(While whilestmt | + whilestmt.getTest().(ImmutableLiteral).booleanValue() = false and whilestmt.getBody().contains(this) + ) + } + + /** Gets the final statement in this statement, ordered by location. + * Will be this statement if not a compound statement. + */ + Stmt getLastStatement() { + result = this + } + +} + +/** A statement that includes a binding (except imports) */ +class Assign extends Assign_ { + + /** Use ControlFlowNodes and SsaVariables for data-flow analysis. */ + predicate defines(Variable v) { + this.getATarget().defines(v) + } + + override Expr getASubExpression() { + result = this.getATarget() or + result = this.getValue() + } + + override Stmt getASubStatement() { + none() + } +} + +/** An augmented assignment statement, such as `x += y` */ +class AugAssign extends AugAssign_ { + + override Expr getASubExpression() { + result = this.getOperation() + } + + Expr getTarget() { + result = ((BinaryExpr)this.getOperation()).getLeft() + } + + Expr getValue() { + result = ((BinaryExpr)this.getOperation()).getRight() + } + + override Stmt getASubStatement() { + none() + } +} + +/** An annotated assignment statement, such as `x: int = 0` */ +class AnnAssign extends AnnAssign_ { + + override Expr getASubExpression() { + result = this.getAnnotation() or + result = this.getTarget() or + result = this.getValue() + } + + override Stmt getASubStatement() { + none() + } + + /** Holds if the value of the annotation of this assignment is stored at runtime. */ + predicate isStored() { + not this.getScope() instanceof Function + and + exists(Name n | + n = this.getTarget() + and + not n.isParenthesized() + ) + } + +} + +/** An exec statement */ +class Exec extends Exec_ { + + override Expr getASubExpression() { + result = this.getBody() or + result = this.getGlobals() or + result = this.getLocals() + } + + override Stmt getASubStatement() { + none() + } + +} + +/** An except statement (part of a `try` statement), such as `except IOError as err:` */ +class ExceptStmt extends ExceptStmt_ { + + /** Gets the immediately enclosing try statement */ + Try getTry() { + result.getAHandler() = this + } + + override Expr getASubExpression() { + result = this.getName() + or + result = this.getType() + } + + override Stmt getASubStatement() { + result = this.getAStmt() + } + + override Stmt getLastStatement() { + result = this.getBody().getLastItem().getLastStatement() + } + +} + +/** An assert statement, such as `assert a == b, "A is not equal to b"` */ +class Assert extends Assert_ { + + override Expr getASubExpression() { + result = this.getMsg() or result = this.getTest() + } + + override Stmt getASubStatement() { + none() + } + +} + +/** A break statement */ +class Break extends Break_ { + + override Expr getASubExpression() { + none() + } + + override Stmt getASubStatement() { + none() + } + +} + +/** A continue statement */ +class Continue extends Continue_ { + + override Expr getASubExpression() { + none() + } + + override Stmt getASubStatement() { + none() + } + +} + +/** A delete statement, such as `del x[-1]` */ +class Delete extends Delete_ { + + override Expr getASubExpression() { + result = this.getATarget() + } + + override Stmt getASubStatement() { + none() + } + +} + +/** An expression statement, such as `len(x)` or `yield y` */ +class ExprStmt extends ExprStmt_ { + + override Expr getASubExpression() { + result = this.getValue() + } + + override Stmt getASubStatement() { + none() + } + +} + +/** A for statement, such as `for x in y: print(x)` */ +class For extends For_ { + + override Stmt getASubStatement() { + result = this.getAStmt() or + result = this.getAnOrelse() + } + + override Expr getASubExpression() { + result = this.getTarget() or + result = this.getIter() + } + + override Stmt getLastStatement() { + result = this.getBody().getLastItem().getLastStatement() + } + +} + +/** A global statement, such as `global var` */ +class Global extends Global_ { + + override Expr getASubExpression() { + none() + } + + override Stmt getASubStatement() { + none() + } +} + +/** An if statement, such as `if eggs: print("spam")` */ +class If extends If_ { + + override Stmt getASubStatement() { + result = this.getAStmt() or + result = this.getAnOrelse() + } + + override Expr getASubExpression() { + result = this.getTest() + } + + /** Whether this if statement takes the form `if __name__ == "__main__":` */ + predicate isNameEqMain() { + exists(StrConst m, Name n, Compare c | + this.getTest() = c and + c.getOp(0) instanceof Eq and + ( + c.getLeft() = n and c.getComparator(0) = m + or + c.getLeft() = m and c.getComparator(0) = n + ) and + n.getId() = "__name__" and + m.getText() = "__main__" + ) + } + + /** Whether this if statement starts with the keyword `elif` */ + predicate isElif() { + /* The Python parser turns all elif chains into nested if-else statements. + * An `elif` can be identified as it is the first statement in an `else` block + * and it is not indented relative to its parent `if`. + */ + exists(If i | + i.getOrelse(0) = this and + this.getLocation().getStartColumn() = i.getLocation().getStartColumn() + ) + } + + /** Gets the `elif` branch of this `if`-statement, if present */ + If getElif() { + result = this.getOrelse(0) and + result.isElif() + } + + override Stmt getLastStatement() { + result = this.getOrelse().getLastItem().getLastStatement() + or + not exists(this.getOrelse()) and + result = this.getBody().getLastItem().getLastStatement() + } + +} + +/** A nonlocal statement, such as `nonlocal var` */ +class Nonlocal extends Nonlocal_ { + + override Stmt getASubStatement() { + none() + } + + override Expr getASubExpression() { + none() + } + + Variable getAVariable() { + result.getScope() = this.getScope() and + result.getId() = this.getAName() + } + +} + +/** A pass statement */ +class Pass extends Pass_ { + + override Stmt getASubStatement() { + none() + } + + override Expr getASubExpression() { + none() + } + +} + +/** A print statement (Python 2 only), such as `print 0` */ +class Print extends Print_ { + + override Stmt getASubStatement() { + none() + } + + override Expr getASubExpression() { + result = this.getAValue() or + result = this.getDest() + } + +} + +/** A raise statement, such as `raise CompletelyDifferentException()` */ +class Raise extends Raise_ { + + override Stmt getASubStatement() { + none() + } + + override Expr getASubExpression() { + py_exprs(result, _, this, _) + } + + /** The expression immediately following the `raise`, this is the + * exception raised, but not accounting for tuples in Python 2. + */ + Expr getException() { + result = this.getType() + or + result = this.getExc() + } + + /** The exception raised, accounting for tuples in Python 2. */ + Expr getRaised() + { + exists(Expr raw | + raw = this.getException() | + if (not major_version() = 2 or not exists(raw.(Tuple).getAnElt())) then + result = raw + else + /* In Python 2 raising a tuple will result in the first element of the tuple being raised. */ + result = raw.(Tuple).getElt(0) + ) + } +} + +/** A return statement, such as return None */ +class Return extends Return_ { + + override Stmt getASubStatement() { + none() + } + + override Expr getASubExpression() { + result = this.getValue() + } + +} + +/** A try statement */ +class Try extends Try_ { + + override Expr getASubExpression() { + none() + } + + override Stmt getASubStatement() { + result = this.getAHandler() or + result = this.getAStmt() or + result = this.getAFinalstmt() or + result = this.getAnOrelse() + } + + override ExceptStmt getHandler(int i) { + result = Try_.super.getHandler(i) + } + + /** Gets an exception handler of this try statement. */ + override ExceptStmt getAHandler() { + result = Try_.super.getAHandler() + } + + override Stmt getLastStatement() { + result = this.getFinalbody().getLastItem().getLastStatement() + or + not exists(this.getFinalbody()) and + result = this.getOrelse().getLastItem().getLastStatement() + or + not exists(this.getFinalbody()) and not exists(this.getOrelse()) and + result = this.getHandlers().getLastItem().getLastStatement() + or + not exists(this.getFinalbody()) and not exists(this.getOrelse()) and not exists(this.getHandlers()) and + result = this.getBody().getLastItem().getLastStatement() + } + +} + +/** A while statement, such as `while parrot_resting():` */ +class While extends While_ { + + override Expr getASubExpression() { + result = this.getTest() + } + + override Stmt getASubStatement() { + result = this.getAStmt() or + result = this.getAnOrelse() + } + + override Stmt getLastStatement() { + result = this.getOrelse().getLastItem().getLastStatement() + or + not exists(this.getOrelse()) and + result = this.getBody().getLastItem().getLastStatement() + } + +} + +/** A with statement such as `with f as open("file"): text = f.read()` */ +class With extends With_ { + + override Expr getASubExpression() { + result = this.getContextExpr() or + result = this.getOptionalVars() + } + + override Stmt getASubStatement() { + result = this.getAStmt() + } + + override Stmt getLastStatement() { + result = this.getBody().getLastItem().getLastStatement() + } + +} + +/** A plain text used in a template is wrapped in a TemplateWrite statement */ +class TemplateWrite extends TemplateWrite_ { + + override Expr getASubExpression() { + result = this.getValue() + } + + override Stmt getASubStatement() { + none() + } + +} + +class AsyncFor extends For { + + AsyncFor() { + this.isAsync() + } + +} + +class AsyncWith extends With { + + AsyncWith() { + this.isAsync() + } + +} + +/** A list of statements */ +class StmtList extends StmtList_ { + + /** Whether this list of statements contains s */ + predicate contains(AstNode a) { + exists(Stmt item | + item = this.getAnItem() | + item = a or item.contains(a) + ) + } + + Stmt getLastItem() { result = this.getItem(max(int i | exists(this.getItem(i)))) } + +} + + diff --git a/python/ql/src/semmle/python/TestUtils.qll b/python/ql/src/semmle/python/TestUtils.qll new file mode 100644 index 00000000000..6697dbed00d --- /dev/null +++ b/python/ql/src/semmle/python/TestUtils.qll @@ -0,0 +1,24 @@ +/* This file contains test-related utility functions */ + +import python + + +/** Removes everything up to the occurrence of `sub` in the string `str` */ + +bindingset[str,sub] +string remove_prefix_before_substring(string str, string sub) { + exists(int index | + index = str.indexOf(sub) and + result = str.suffix(index) + ) + or + not exists(str.indexOf(sub)) and + result = str +} + +/** Removes the part of the `resources/lib` Python library path that may vary + * from machine to machine. */ + +string remove_library_prefix(Location loc) { + result = remove_prefix_before_substring(loc.toString(), "resources/lib") +} diff --git a/python/ql/src/semmle/python/Variables.qll b/python/ql/src/semmle/python/Variables.qll new file mode 100644 index 00000000000..21ccc43b545 --- /dev/null +++ b/python/ql/src/semmle/python/Variables.qll @@ -0,0 +1,123 @@ + +import python + +/** A variable, either a global or local variable (including parameters) */ +class Variable extends @py_variable { + + /** Gets the identifier (name) of this variable */ + string getId() { + variable(this, _, result) + } + + string toString() { + result = "Variable " + this.getId() + } + + /** Gets an access (load or store) of this variable */ + Name getAnAccess() { + result = this.getALoad() + or + result = this.getAStore() + } + + /** Gets a load of this variable */ + Name getALoad() { + result.uses(this) + } + + /** Gets a store of this variable */ + Name getAStore() { + result.defines(this) + } + + /** Gets a use of this variable */ + NameNode getAUse() { + result.uses(this) + } + + /** Gets the scope of this variable */ + Scope getScope() { + variable(this, result, _) + } + + /** Whether there is an access to this variable outside + * of its own scope. Usually occurs in nested functions + * or for global variables. + */ + predicate escapes() { + exists(Name n | n = this.getAnAccess() | n.getScope() != this.getScope()) + } + + /** Whether this variable is a parameter */ + predicate isParameter() { + none() + } + + predicate isSelf() { + none() + } + +} + +/** A local (function or class) variable */ +class LocalVariable extends Variable { + + LocalVariable() { + exists(Scope s | s = this.getScope() | s instanceof Function or s instanceof Class) + } + + override string toString() { + result = "Local Variable " + this.getId() + } + + /** Whether this variable is a parameter */ + override predicate isParameter() { + exists(Parameter p | this.getAnAccess() = p) + } + + /** Holds if this variable is the first parameter of a method. It is not necessarily called "self" */ + override predicate isSelf() { + exists(Function f, Parameter self | + this.getAnAccess() = self and + f.isMethod() and f.getArg(0) = self + ) + } + +} + +/** A local variable that uses "load fast" semantics, for lookup: + * If the variable is undefined, then raise an exception. + */ +class FastLocalVariable extends LocalVariable { + + FastLocalVariable() { + this.getScope() instanceof FastLocalsFunction + } + +} + +/** A local variable that uses "load name" semantics, for lookup: + * If the variable is undefined, then lookup the value in globals(). + */ +class NameLocalVariable extends LocalVariable { + + NameLocalVariable() { + not this instanceof FastLocalVariable + } + +} + +/** A global (module-level) variable */ +class GlobalVariable extends Variable { + + GlobalVariable() { + exists(Module m | m = this.getScope()) + } + + override string toString() { + result = "Global Variable " + this.getId() + } + +} + + diff --git a/python/ql/src/semmle/python/dataflow/SsaDefinitions.qll b/python/ql/src/semmle/python/dataflow/SsaDefinitions.qll new file mode 100644 index 00000000000..d6d49949053 --- /dev/null +++ b/python/ql/src/semmle/python/dataflow/SsaDefinitions.qll @@ -0,0 +1,463 @@ +/** Provides classes and predicates for determining the uses and definitions of + * variables for ESSA form. + */ + +import python +private import semmle.python.pointsto.Base + + +/* Classification of variables. These should be non-overlapping and complete. + * + * Function local variables - Non escaping variables in a function, except 'self' + * Self variables - The 'self' variable for a method. + * Class local variables - Local variables declared in a class + * Non-local variables - Escaping variables in a function + * Built-in variables - Global variables with no definition + * Non-escaping globals -- Global variables that have definitions and all of those definitions are in the module scope + * Escaping globals -- Global variables that have definitions and at least one of those definitions is in another scope. + */ + +/** Python specific version of `SsaSourceVariable`. */ +abstract class PythonSsaSourceVariable extends SsaSourceVariable { + + PythonSsaSourceVariable() { + /* Exclude `True`, `False` and `None` */ + not this.(Variable).getALoad() instanceof NameConstant + } + + override string getName() { + result = this.(Variable).getId() + } + + abstract ControlFlowNode getAnImplicitUse(); + + abstract ControlFlowNode getScopeEntryDefinition(); + + override ControlFlowNode getAUse() { + result = this.getASourceUse() + or + result = this.getAnImplicitUse() + or + /* `import *` is a definition of *all* variables, so must be a use as well, for pass-through + * once we have established that a variable is not redefined. + */ + SsaSource::import_star_refinement(this, result, _) + or + /* Add a use at the end of scope for all variables to keep them live + * This is necessary for taint-tracking. + */ + result = this.(Variable).getScope().getANormalExit() + } + + override predicate hasDefiningNode(ControlFlowNode def) { + def = this.getScopeEntryDefinition() + or + SsaSource::assignment_definition(this, def, _) + or + SsaSource::multi_assignment_definition(this, def) + or + SsaSource::deletion_definition(this, def) + or + SsaSource::iteration_defined_variable(this, def, _) + or + SsaSource::init_module_submodule_defn(this, def) + or + SsaSource::parameter_definition(this, def) + or + SsaSource::exception_capture(this, def) + or + SsaSource::with_definition(this, def) + } + + override predicate hasDefiningEdge(BasicBlock pred, BasicBlock succ) { + none() + } + + override predicate hasRefinement(ControlFlowNode use, ControlFlowNode def) { + this.hasDefiningNode(_) and /* Can't have a refinement unless there is a definition */ + refinement(this, use, def) + } + + override predicate hasRefinementEdge(ControlFlowNode use, BasicBlock pred, BasicBlock succ) { + use.(NameNode).uses(this) and + exists(ControlFlowNode test | + test.getAChild*() = use and + test.isBranch() and + test = pred.getLastNode() + ) and + (pred.getAFalseSuccessor() = succ or pred.getATrueSuccessor() = succ) + and + /* There is a store to this variable -- We don't want to refine builtins */ + exists(this.(Variable).getAStore()) and + /* There is at least one use or definition of the variable that is reachable by the test */ + exists(ControlFlowNode n | + n = this.getAUse() or + this.hasDefiningNode(n) | + pred.(ConditionBlock).strictlyReaches(n.getBasicBlock()) + ) + } + + override ControlFlowNode getASourceUse() { + result.(NameNode).uses(this) + or + result.(NameNode).deletes(this) + } + + abstract CallNode redefinedAtCallSite(); + +} + + +class FunctionLocalVariable extends PythonSsaSourceVariable { + + FunctionLocalVariable() { + this.(LocalVariable).getScope() instanceof Function and not this.(LocalVariable).escapes() + } + + override ControlFlowNode getAnImplicitUse() { + this.(Variable).isSelf() and this.(Variable).getScope().getANormalExit() = result + } + + override ControlFlowNode getScopeEntryDefinition() { + not this.(LocalVariable).getId() = "*" and + not this.(LocalVariable).isParameter() and + this.(LocalVariable).getScope().getEntryNode() = result + } + + override CallNode redefinedAtCallSite() { none() } + +} + +class NonLocalVariable extends PythonSsaSourceVariable { + + NonLocalVariable() { + this.(LocalVariable).getScope() instanceof Function and this.(LocalVariable).escapes() + } + + override ControlFlowNode getAnImplicitUse() { + result.(CallNode).getScope().getScope*() = this.(LocalVariable).getScope() + } + + override ControlFlowNode getScopeEntryDefinition() { + exists(Function f | + f.getScope+() = this.(LocalVariable).getScope() and + f.getEntryNode() = result + ) + or + not this.(LocalVariable).isParameter() and + this.(LocalVariable).getScope().getEntryNode() = result + } + + override CallNode redefinedAtCallSite() { + not this.(LocalVariable).getId() = "*" and + result.getScope().getScope*() = this.(LocalVariable).getScope() + } + +} + +class ClassLocalVariable extends PythonSsaSourceVariable { + + ClassLocalVariable() { + this.(LocalVariable).getScope() instanceof Class + } + + override ControlFlowNode getAnImplicitUse() { + none() + } + + override ControlFlowNode getScopeEntryDefinition() { + not this.(LocalVariable).getId() = "*" and + result = this.(LocalVariable).getScope().getEntryNode() + } + + override CallNode redefinedAtCallSite() { none() } + +} + +class BuiltinVariable extends PythonSsaSourceVariable { + + BuiltinVariable() { + this instanceof GlobalVariable and + not exists(this.(Variable).getAStore()) and + not this.(Variable).getId() = "__name__" and + not this.(Variable).getId() = "__package__" and + not exists(ImportStar is | is.getScope() = this.(Variable).getScope()) + } + + override ControlFlowNode getAnImplicitUse() { + none() + } + + override ControlFlowNode getScopeEntryDefinition() { + none() + } + + override CallNode redefinedAtCallSite() { none() } + +} + +class ModuleVariable extends PythonSsaSourceVariable { + + ModuleVariable() { + this instanceof GlobalVariable and + ( + exists(this.(Variable).getAStore()) + or + this.(Variable).getId() = "__name__" + or + this.(Variable).getId() = "__package__" + or + exists(ImportStar is | is.getScope() = this.(Variable).getScope()) + ) + } + + override ControlFlowNode getAnImplicitUse() { + result.getScope() = this.(GlobalVariable).getScope() and + ( + result instanceof CallNode + or + import_from_dot_in_init(result.(ImportMemberNode).getModule(this.getName())) + ) + or + exists(ImportTimeScope scope | + scope.entryEdge(result, _) | + this = scope.getOuterVariable(_) or + this.(Variable).getAUse().getScope() = scope + ) + or + /* For implicit use of __metaclass__ when constructing class */ + exists(Class c | + class_with_global_metaclass(c, this) and + c.(ImportTimeScope).entryEdge(result, _) + ) + or + exists(ImportTimeScope s | + result = s.getANormalExit() and this.(Variable).getScope() = s and + implicit_definition(this) + ) + } + + override ControlFlowNode getScopeEntryDefinition() { + not this.(GlobalVariable).getId() = "*" and + exists(Scope s | + s.getEntryNode() = result | + /* Module entry point */ + this.(GlobalVariable).getScope() = s + or + /* For implicit use of __metaclass__ when constructing class */ + class_with_global_metaclass(s, this) + or + /* Variable is used in scope */ + this.(GlobalVariable).getAUse().getScope() = s + ) + or + exists(ImportTimeScope scope | + scope.entryEdge(_, result) | + this = scope.getOuterVariable(_) or + this.(Variable).getAUse().getScope() = scope + ) + or + this.(GlobalVariable).getId() = "*" and + exists(Scope s | + s.getEntryNode() = result and + this.(Variable).getScope() = s and + exists(ImportStar is | is.getScope() = s) + ) + } + + override CallNode redefinedAtCallSite() { none() } + +} + +class NonEscapingGlobalVariable extends ModuleVariable { + + NonEscapingGlobalVariable() { + this instanceof GlobalVariable and + exists(this.(Variable).getAStore()) and + not variable_or_attribute_defined_out_of_scope(this) + } + +} + +class EscapingGlobalVariable extends ModuleVariable { + + EscapingGlobalVariable() { + this instanceof GlobalVariable and exists(this.(Variable).getAStore()) and variable_or_attribute_defined_out_of_scope(this) + } + + override ControlFlowNode getAnImplicitUse() { + result = ModuleVariable.super.getAnImplicitUse() + or + result.(CallNode).getScope().getScope+() = this.(GlobalVariable).getScope() + or + result = this.innerScope().getANormalExit() + } + + private Scope innerScope() { + result.getScope+() = this.(GlobalVariable).getScope() and + not result instanceof ImportTimeScope + } + + override ControlFlowNode getScopeEntryDefinition() { + result = ModuleVariable.super.getScopeEntryDefinition() + or + result = this.innerScope().getEntryNode() + } + + override CallNode redefinedAtCallSite() { + result.(CallNode).getScope().getScope*() = this.(GlobalVariable).getScope() + } + +} + +private predicate variable_or_attribute_defined_out_of_scope(Variable v) { + exists(NameNode n | n.defines(v) and not n.getScope() = v.getScope()) + or + exists(AttrNode a | a.isStore() and a.getObject() = v.getAUse() and not a.getScope() = v.getScope()) +} + +private predicate class_with_global_metaclass(Class cls, GlobalVariable metaclass) { + metaclass.getId() = "__metaclass__" and major_version() = 2 and + cls.getEnclosingModule() = metaclass.getScope() +} + + +/** Holds if this variable is implicitly defined */ +private predicate implicit_definition(Variable v) { + v.getId() = "*" + or + exists(ImportStar is | is.getScope() = v.getScope()) +} + +cached module SsaSource { + + /** Holds if `v` is used as the receiver in a method call. */ + cached predicate method_call_refinement(Variable v, ControlFlowNode use, CallNode call) { + use = v.getAUse() and + call.getFunction().(AttrNode).getObject() = use + } + + /** Holds if `v` is defined by assignment at `defn` and given `value`. */ + cached predicate assignment_definition(Variable v, ControlFlowNode defn, ControlFlowNode value) { + defn.(NameNode).defines(v) and defn.(DefinitionNode).getValue() = value + } + + /** Holds if `v` is defined by assignment of the captured exception. */ + cached predicate exception_capture(Variable v, NameNode defn) { + defn.defines(v) and + exists(ExceptFlowNode ex | ex.getName() = defn) + } + + /** Holds if `v` is defined by a with statement. */ + cached predicate with_definition(Variable v, ControlFlowNode defn) { + exists(With with, Name var | + with.getOptionalVars() = var and + var.getAFlowNode() = defn | + var = v.getAStore() + ) + } + + /** Holds if `v` is defined by multiple assignment at `defn`. */ + cached predicate multi_assignment_definition(Variable v, ControlFlowNode defn) { + defn.(NameNode).defines(v) and + not exists(defn.(DefinitionNode).getValue()) and + exists(SequenceNode s | s.getAnElement() = defn) + } + + /** Holds if `v` is defined by a `for` statement, the definition being `defn` */ + cached predicate iteration_defined_variable(Variable v, ControlFlowNode defn, ControlFlowNode sequence) { + exists(ForNode for | for.iterates(defn, sequence)) and + defn.(NameNode).defines(v) + } + + /** Holds if `v` is a parameter variable and `defn` is the CFG node for that parameter. */ + cached predicate parameter_definition(Variable v, ControlFlowNode defn) { + exists(Function f, Name param | + f.getAnArg() = param or + f.getVararg() = param or + f.getKwarg() = param or + f.getKeywordOnlyArg(_) = param | + defn.getNode() = param and + param.getVariable() = v + ) + } + + /** Holds if `v` is deleted at `del`. */ + cached predicate deletion_definition(Variable v, DeletionNode del) { + del.getTarget().(NameNode).deletes(v) + } + + /** Holds if the name of `var` refers to a submodule of a package and `f` is the entry point + * to the __init__ module of that package. + */ + cached predicate init_module_submodule_defn(Variable var, ControlFlowNode f) { + exists(Module init | + init.isPackageInit() and exists(init.getPackage().getSubModule(var.getId())) and + var instanceof GlobalVariable and init.getEntryNode() = f and + var.getScope() = init + ) + } + + /** Holds if the `v` is in scope at a `from import ... *` and may thus be redefined by that statement */ + cached predicate import_star_refinement(Variable v, ControlFlowNode use, ControlFlowNode def) { + use = def and def instanceof ImportStarNode + and + ( + v.getScope() = def.getScope() + or + exists(NameNode other | + other.uses(v) and + def.getScope() = other.getScope() + ) + ) + } + + /** Holds if an attribute is assigned at `def` and `use` is the use of `v` for that assignment */ + cached predicate attribute_assignment_refinement(Variable v, ControlFlowNode use, ControlFlowNode def) { + use.(NameNode).uses(v) and + def.isStore() and def.(AttrNode).getObject() = use + } + + /** Holds if a `v` is used as an argument to `call`, which *may* modify the object referred to by `v` */ + cached predicate argument_refinement(Variable v, ControlFlowNode use, CallNode call) { + use.(NameNode).uses(v) and + call.getArg(0) = use and + not method_call_refinement(v, _, call) and + not test_refinement(v, _, call) + } + + /** Holds if an attribute is deleted at `def` and `use` is the use of `v` for that deletion */ + cached predicate attribute_deletion_refinement(Variable v, NameNode use, DeletionNode def) { + use.uses(v) and + def.getTarget().(AttrNode).getObject() = use + } + + /** Holds if the set of possible values for `v` is refined by `test` and `use` is the use of `v` in that test. */ + cached predicate test_refinement(Variable v, ControlFlowNode use, ControlFlowNode test) { + use.(NameNode).uses(v) and + test.getAChild*() = use and + test.isBranch() and + exists(BasicBlock block | + block = use.getBasicBlock() and + block = test.getBasicBlock() and + not block.getLastNode() = test + ) + } + +} + +private predicate refinement(Variable v, ControlFlowNode use, ControlFlowNode def) { + SsaSource::import_star_refinement(v, use, def) + or + SsaSource::attribute_assignment_refinement(v, use, def) + or + SsaSource::argument_refinement(v, use, def) + or + SsaSource::attribute_deletion_refinement(v, use, def) + or + SsaSource::test_refinement(v, use, def) + or + SsaSource::method_call_refinement(v, use, def) + or + def = v.(PythonSsaSourceVariable).redefinedAtCallSite() and def = use +} diff --git a/python/ql/src/semmle/python/dataflow/StateTracking.qll b/python/ql/src/semmle/python/dataflow/StateTracking.qll new file mode 100644 index 00000000000..39f240f6388 --- /dev/null +++ b/python/ql/src/semmle/python/dataflow/StateTracking.qll @@ -0,0 +1,174 @@ +/** Provides classes and predicates for tracking global state across the control flow and call graphs. + * + * NOTE: State tracking tracks both whether a state may apply to a given node in a given context *and* + * whether it may not apply. + * That `state.appliesTo(f, ctx)` holds implies nothing about whether `state.mayNotApplyTo(f, ctx)` holds. + * Neither may hold which merely means that `f` with context `ctx` is not reached during the analysis. + * Conversely, both may hold, which means that `state` may or may not apply depending on how `f` was reached. + */ + +import python +private import semmle.python.pointsto.Base +private import semmle.python.pointsto.PointsTo +private import semmle.python.pointsto.PointsToContext + +/** A state that should be tracked. */ +abstract class TrackableState extends string { + + bindingset[this] + TrackableState() { this = this } + + /** Holds if this state may apply to the control flow node `f`, regardless of the context. */ + final predicate appliesTo(ControlFlowNode f) { + this.appliesTo(f, _) + } + + /** Holds if this state may not apply to the control flow node `f`, given the context `ctx`. */ + final predicate appliesTo(ControlFlowNode f, Context ctx) { + StateTracking::appliesToNode(this, f, ctx, true) + } + + /** Holds if this state may apply to the control flow node `f`, given the context `ctx`. */ + final predicate mayNotApplyTo(ControlFlowNode f, Context ctx) { + StateTracking::appliesToNode(this, f, ctx, false) + } + + /** Holds if this state may apply to the control flow node `f`, regardless of the context. */ + final predicate mayNotApplyTo(ControlFlowNode f) { + this.mayNotApplyTo(f, _) + } + + /** Holds if `test` shows value to be untainted with `taint`, given the context `ctx`. */ + predicate testsFor(PyEdgeRefinement test, Context ctx, boolean sense) { + ctx.appliesToScope(test.getScope()) and this.testsFor(test, sense) + } + + /** Holds if `test` shows value to be untainted with `taint` */ + predicate testsFor(PyEdgeRefinement test, boolean sense) { none() } + + /** Holds if state starts at `f`. + * Either this predicate or `startsAt(ControlFlowNode f, Context ctx)` + * should be overriden by sub-classes. + */ + predicate startsAt(ControlFlowNode f) { none() } + + /** Holds if state starts at `f` given context `ctx`. + * Either this predicate or `startsAt(ControlFlowNode f)` + * should be overriden by sub-classes. + */ + pragma [noinline] + predicate startsAt(ControlFlowNode f, Context ctx) { + ctx.appliesTo(f) and this.startsAt(f) + } + + /** Holds if state ends at `f`. + * Either this predicate or `endsAt(ControlFlowNode f, Context ctx)` + * may be overriden by sub-classes. + */ + predicate endsAt(ControlFlowNode f) { none() } + + /** Holds if state ends at `f` given context `ctx`. + * Either this predicate or `endsAt(ControlFlowNode f)` + * may be overriden by sub-classes. + */ + pragma [noinline] + predicate endsAt(ControlFlowNode f, Context ctx) { + ctx.appliesTo(f) and this.endsAt(f) + } + +} + + +module StateTracking { + + private predicate not_allowed(TrackableState state, ControlFlowNode f, Context ctx, boolean sense) { + state.endsAt(f, ctx) and sense = true + or + state.startsAt(f, ctx) and sense = false + } + + /** Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) to + * control flow node `f` given the context `ctx`. + */ + predicate appliesToNode(TrackableState state, ControlFlowNode f, Context ctx, boolean sense) { + state.endsAt(f, ctx) and sense = false + or + state.startsAt(f, ctx) and sense = true + or + not not_allowed(state, f, ctx, sense) + and + ( + exists(BasicBlock b | + /* First node in a block */ + f = b.getNode(0) and appliesAtBlockStart(state, b, ctx, sense) + or + /* Other nodes in block, except trackable calls */ + exists(int n | + f = b.getNode(n) and + appliesToNode(state, b.getNode(n-1), ctx, sense) and + not exists(PyFunctionObject func, Context callee | + callee.fromCall(f, func, ctx) + ) + ) + ) + or + /* Function entry via call */ + exists(FunctionObject func, CallNode call, Context caller | + ctx.fromCall(call, func, caller) and + func.getFunction().getEntryNode() = f and + appliesToNode(state, call.getAPredecessor(), caller, sense) + ) + or + /* Function return */ + exists(PyFunctionObject func, Context callee | + callee.fromCall(f, func, ctx) and + appliesToNode(state, func.getFunction().getANormalExit(), callee, sense) + ) + or + /* Other scope entries */ + exists(Scope s | + s.getEntryNode() = f and + ctx.appliesToScope(s) + | + not exists(Scope pred | pred.precedes(s)) and + (ctx.isImport() or ctx.isRuntime()) and sense = false + or + exists(Scope pred, Context pred_ctx | + appliesToNode(state, pred.getANormalExit(), pred_ctx, sense) and + pred.precedes(s) and + ctx.isRuntime() | + pred_ctx.isRuntime() or pred_ctx.isImport() + ) + ) + ) + } + + /** Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the + * start of basic block `block` given the context `ctx`. + */ + private predicate appliesAtBlockStart(TrackableState state, BasicBlock block, Context ctx, boolean sense) { + exists(PyEdgeRefinement test | + test.getSuccessor() = block and + state.testsFor(test, ctx, sense) + ) + or + exists(BasicBlock pred | + pred.getASuccessor() = block and + appliesAtBlockEnd(state, pred, ctx, sense) and + not exists(PyEdgeRefinement test | + test.getPredecessor() = pred and + test.getSuccessor() = block and + state.testsFor(test, sense.booleanNot()) + ) + ) + } + + /** Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the + * end of basic block `block` given the context `ctx`. + */ + private predicate appliesAtBlockEnd(TrackableState state, BasicBlock block, Context ctx, boolean sense) { + appliesToNode(state, block.getLastNode(), ctx, sense) + } + +} + diff --git a/python/ql/src/semmle/python/dependencies/Dependencies.qll b/python/ql/src/semmle/python/dependencies/Dependencies.qll new file mode 100644 index 00000000000..f328c43c314 --- /dev/null +++ b/python/ql/src/semmle/python/dependencies/Dependencies.qll @@ -0,0 +1,199 @@ + +import python +import semmle.python.dependencies.DependencyKind + +private predicate importDependency(Object target, AstNode source) { + source.getScope() != target.getOrigin() and /* Imports of own module are ignored */ + ( + exists(ModuleObject importee, ImportingStmt imp_stmt | + source = imp_stmt and + importee = target | + exists(ImportMember im | imp_stmt.contains(im) | + importee.importedAs(im.getImportedModuleName()) + ) + or + exists(ImportExpr im | imp_stmt.contains(im) | + importee.importedAs(im.getImportedModuleName()) + ) + or + exists(ModuleObject mod | + importDependency(mod, source) and + target = mod.getPackage+() + ) + ) + or + /* from m import name, where m.name is not a submodule */ + exists(PythonModuleObject importee, ImportingStmt imp_stmt | + source = imp_stmt | + exists(ImportMember im | imp_stmt.contains(im) | + importee.importedAs(im.getModule().(ImportExpr).getImportedModuleName()) + and + defn_of_module_attribute(target, importee.getModule(), im.getName()) + ) + ) + ) +} + +class PythonImport extends DependencyKind { + + PythonImport() { + this = "import" + } + + override predicate isADependency(AstNode source, Object target) { + this = this and + importDependency(target, source) + } + +} + +private predicate interesting(Object target) { + target.(ControlFlowNode).getNode() instanceof Scope + or + target instanceof FunctionObject + or + target instanceof ClassObject + or + target instanceof ModuleObject +} + +class PythonUse extends DependencyKind { + + PythonUse() { + this = "use" + } + + override predicate isADependency(AstNode source, Object target) { + interesting(target) and + this = this and + source != target.(ControlFlowNode).getNode() and + exists(ControlFlowNode use, Object obj | + use.getNode() = source and + use.refersTo(obj) and + use.isLoad() + | + interesting(obj) and target = obj + ) + and + not has_more_specific_dependency_source(source) + } + +} + +/** Whether there is a more specific dependency source than this one. + * E.g. if the expression pack.mod.func is a dependency on the function 'func' in 'pack.mod' + * don't make pack.mod depend on the module 'pack.mod' + */ +private predicate has_more_specific_dependency_source(Expr e) { + exists(Attribute member | + member.getObject() = e | + attribute_access_dependency(_, member) + or + has_more_specific_dependency_source(member) + ) +} + +class PythonInheritance extends DependencyKind { + + PythonInheritance() { + this = "inheritance" + } + + override predicate isADependency(AstNode source, Object target) { + this = this and + exists(ClassObject cls | + source = cls.getOrigin() + | + target = cls.getASuperType() + or + target = cls.getAnInferredType() + ) + } + +} + +class PythonAttribute extends DependencyKind { + + PythonAttribute() { + this = "attribute" + } + + override predicate isADependency(AstNode source, Object target) { + this = this and + attribute_access_dependency(target, source) + } + +} + +private predicate attribute_access_dependency(Object target, AstNode source) { + exists(Scope s, string name | + use_of_attribute(source, s, name) and + defn_of_attribute(target, s, name) + ) +} + +private predicate use_of_attribute(Attribute attr, Scope s, string name) { + exists(AttrNode cfg | + cfg.isLoad() and cfg.getNode() = attr + | + exists(Object obj | + cfg.getObject(name).refersTo(obj) | + s = obj.(PythonModuleObject).getModule() or + s = obj.(ClassObject).getPyClass() + ) or + exists(ClassObject cls | + cfg.getObject(name).refersTo(_, cls, _) | + s = cls.getPyClass() + ) + ) + or + exists(SelfAttributeRead sar | + sar = attr | + sar.getClass() = s and + sar.getName() = name + ) +} + +private predicate defn_of_attribute(Object target, Scope s, string name) { + exists(Assign asgn | + target.(ControlFlowNode).getNode() = asgn | + defn_of_instance_attribute(asgn, s, name) + or + defn_of_class_attribute(asgn, s, name) + ) + or + defn_of_module_attribute(target, s, name) +} + +/* Whether asgn defines an instance attribute, that is does + * asgn take the form self.name = ... where self is an instance + * of class c and asgn is not a redefinition. + */ +private predicate defn_of_instance_attribute(Assign asgn, Class c, string name) { + exists(SelfAttributeStore sas | + asgn.getATarget() = sas | + sas.getClass() = c and + sas.getName() = name and + not exists(SelfAttributeStore in_init | + not sas.getScope().(Function).isInitMethod() and + not sas = in_init and + in_init.getClass() = c and + in_init.getName() = name and + in_init.getScope().(Function).isInitMethod() + ) + ) +} + +/* Whether asgn defines an attribute of a class */ +private predicate defn_of_class_attribute(Assign asgn, Class c, string name) { + asgn.getScope() = c and + asgn.getATarget().(Name).getId() = name +} + +/* Holds if `value` is a value assigned to the `name`d attribute of module `m`. */ +private predicate defn_of_module_attribute(ControlFlowNode value, Module m, string name) { + exists(DefinitionNode def | + def.getValue() = value and + def.(NameNode).getId() = name + ) +} diff --git a/python/ql/src/semmle/python/dependencies/DependencyKind.qll b/python/ql/src/semmle/python/dependencies/DependencyKind.qll new file mode 100644 index 00000000000..791723042ac --- /dev/null +++ b/python/ql/src/semmle/python/dependencies/DependencyKind.qll @@ -0,0 +1,31 @@ +import semmle.python.dependencies.Dependencies + +/** + * A library describing an abstract mechanism for representing dependency categories. + */ + +/* + * A DependencyCategory is a unique string key used by Architect to identify different categories + * of dependencies that might be viewed independently. + *

    + * The string key defining the category must adhere to the isValid(), otherwise it will not be + * accepted by Architect. + *

    + */ +abstract class DependencyKind extends string { + + bindingset[this] + DependencyKind() { + this = this + } + + /* Tech inventory interface */ + /** + * Identify dependencies associated with this category. + *

    + * The source element is the source of the dependency. + *

    + */ + abstract predicate isADependency(AstNode source, Object target); + +} \ No newline at end of file diff --git a/python/ql/src/semmle/python/dependencies/TechInventory.qll b/python/ql/src/semmle/python/dependencies/TechInventory.qll new file mode 100644 index 00000000000..957ed9fe481 --- /dev/null +++ b/python/ql/src/semmle/python/dependencies/TechInventory.qll @@ -0,0 +1,116 @@ +import python +import semmle.python.dependencies.Dependencies +import semmle.python.dependencies.DependencyKind + +/** Combine the source-file and package into a single string: + * /path/to/file.py<|>package-name-and-version + */ +string munge(File sourceFile, ExternalPackage package) { + result = "/" + sourceFile.getRelativePath() + "<|>" + package.getName() + "<|>" + package.getVersion() or + not exists(package.getVersion()) and result = "/" + sourceFile.getRelativePath() + "<|>" + package.getName() + "<|>unknown" +} + +abstract class ExternalPackage extends Object { + + ExternalPackage() { + this instanceof ModuleObject + } + + abstract string getName(); + + abstract string getVersion(); + + Object getAttribute(string name) { + result = this.(ModuleObject).getAttribute(name) + } + + PackageObject getPackage() { + result = this.(ModuleObject).getPackage() + } + +} + +bindingset[text] +private predicate is_version(string text) { + text.regexpMatch("\\d+\\.\\d+(\\.\\d+)?([ab]\\d+)?") +} + +bindingset[v] +private string version_format(float v) { + exists(int i, int f | + i = (v+0.05).floor() and f = ((v+0.05-i)*10).floor() | + result = i + "." + f + ) +} + +class DistPackage extends ExternalPackage { + + DistPackage() { + exists(Folder parent | + parent = this.(ModuleObject).getPath().getParent() and + parent.isImportRoot() and + /* Not in standard library */ + not parent.isStdLibRoot(_, _) and + /* Not in the source */ + not exists(parent.getRelativePath()) + ) + } + + /* We don't extract the meta-data for dependencies (yet), so make a best guess from the source + * https://www.python.org/dev/peps/pep-0396/ + */ + private predicate possibleVersion(string version, int priority) { + exists(Object v | + v = this.getAttribute("__version__") and priority = 3 | + version = v.(StringObject).getText() and is_version(version) + or + version = version_format(v.(NumericObject).floatValue()) + or + version = version_format(v.(NumericObject).intValue()) + ) + or + exists(SequenceObject tuple, NumericObject major, NumericObject minor, string base_version | + this.getAttribute("version_info") = tuple and + major = tuple.getInferredElement(0) and minor = tuple.getInferredElement(1) and + base_version = major.intValue() + "." + minor.intValue() | + version = base_version + "." + tuple.getBuiltinElement(2).(NumericObject).intValue() + or + not exists(tuple.getBuiltinElement(2)) and version = base_version + ) and priority = 2 + or + exists(string v | + v.toLowerCase() = "version" | + is_version(version) and + version = this.getAttribute(v).(StringObject).getText() + ) and priority = 1 + } + + override string getVersion() { + this.possibleVersion(result, max(int priority | this.possibleVersion(_, priority))) + } + + override string getName() { + result = this.(ModuleObject).getShortName() + } + + predicate fromSource(Object src) { + exists(ModuleObject m | + m.getModule() = src.(ControlFlowNode).getEnclosingModule() or + src = m | + m = this or + m.getPackage+() = this and + not exists(DistPackage inter | + m.getPackage*() = inter and + inter.getPackage+() = this + ) + ) + } + +} + +predicate dependency(AstNode src, DistPackage package) { + exists(DependencyKind cat, Object target | + cat.isADependency(src, target) | + package.fromSource(target) + ) +} diff --git a/python/ql/src/semmle/python/filters/GeneratedCode.qll b/python/ql/src/semmle/python/filters/GeneratedCode.qll new file mode 100644 index 00000000000..8b6f780ce55 --- /dev/null +++ b/python/ql/src/semmle/python/filters/GeneratedCode.qll @@ -0,0 +1,185 @@ +import python +import semmle.python.templates.Templates + +/** + * A file that is detected as being generated. + */ +abstract class GeneratedFile extends File { + + abstract string getTool(); + +} + +/* We distinguish between a "lax" match which just includes "generated by" or similar versus a "strict" match which includes "this file is generated by" or similar + * "lax" matches are taken to indicate generated file if they occur at the top of a file. "strict" matches can occur anywhere. + * There is no formal reason for the above, it just seems to work well in practice. + */ + +library class GenericGeneratedFile extends GeneratedFile { + + GenericGeneratedFile() { + not this instanceof SpecificGeneratedFile + and + ( + (lax_generated_by(this, _) or lax_generated_from(this, _)) and dont_modify(this) + or + strict_generated_by(this, _) or strict_generated_from(this, _) + ) + } + + override string getTool() { + lax_generated_by(this, result) or strict_generated_by(this, result) + } + +} + +private string comment_or_docstring(File f, boolean before_code) { + exists(Comment c | + c.getLocation().getFile() = f and + result = c.getText() | + if exists(Stmt s | s.getEnclosingModule().getFile() = f and s.getLocation().getStartLine() < c.getLocation().getStartLine()) then + before_code = false + else + before_code = true + ) + or + exists(Module m | m.getFile() = f | + result = m.getDocString().getText() and + before_code = true + ) + +} + +private predicate lax_generated_by(File f, string tool) { + exists(string comment | comment = comment_or_docstring(f, _) | + tool = comment.regexpCapture("(?is).*\\b(?:(?:auto[ -]?)?generated|created automatically) by (?:the )?([-/\\w.]+[-/\\w]).*", 1) + ) +} + +private predicate lax_generated_from(File f, string src) { + exists(string comment | comment = comment_or_docstring(f, _) | + src = comment.regexpCapture("(?is).*\\b((?:auto[ -]?)?generated|created automatically) from ([-/\\w.]+[-/\\w]).*", 1) + ) +} + +private predicate strict_generated_by(File f, string tool) { + exists(string comment | comment = comment_or_docstring(f, true) | + tool = comment.regexpCapture("(?is)# *(?:this +)?(?:(?:code|file) +)?(?:is +)?(?:(?:auto(?:matically)?[ -]?)?generated|created automatically) by (?:the )?([-/\\w.]+[-/\\w]).*", 1) + ) +} + +private predicate strict_generated_from(File f, string src) { + exists(string comment | comment = comment_or_docstring(f, true) | + src = comment.regexpCapture("(?is)# *(?:this +)?(?:(?:code|file) +)?(?:is +)?(?:(?:auto(?:matically)?[ -]?)?generated|created automatically) from ([-/\\w.]+[-/\\w]).*", 1) + ) +} + +private predicate dont_modify(File f) { + comment_or_docstring(f, _).regexpMatch("(?is).*\\b(Do not|Don't) (edit|modify|make changes)\\b.*") +} + + +/** + * A file generated by a template engine + */ +abstract library class SpecificGeneratedFile extends GeneratedFile { + /* Currently cover Spitfire, Pyxl and Mako. + * Django templates are not compiled to Python. + * Jinja2 templates are compiled direct to bytecode via the ast. + */ +} + +/** File generated by the spitfire templating engine */ +class SpitfireGeneratedFile extends SpecificGeneratedFile { + + SpitfireGeneratedFile() { + exists(Module m | + m.getFile() = this and not m instanceof SpitfireTemplate | + exists(ImportMember template_method, ImportExpr spitfire_runtime_template | + spitfire_runtime_template.getName() = "spitfire.runtime.template" and + template_method.getModule() = spitfire_runtime_template and + template_method.getName() = "template_method" + ) + ) + } + + override string getTool() { + result = "spitfire" + } + +} + +/** File generated by the pyxl templating engine */ +class PyxlGeneratedFile extends SpecificGeneratedFile { + + PyxlGeneratedFile() { + this.getSpecifiedEncoding() = "pyxl" + } + + override string getTool() { + result = "pyxl" + } + +} + +/** File generated by the mako templating engine */ +class MakoGeneratedFile extends SpecificGeneratedFile { + + MakoGeneratedFile() { + exists(Module m | m.getFile() = this | + from_mako_import(m) = "runtime" and + from_mako_import(m) = "filters" and + from_mako_import(m) = "cache" and + exists(Assign a, Name n | + a.getScope() = m and a.getATarget() = n and n.getId() = "__M_dict_builtin" + ) and + exists(Assign a, Name n | + a.getScope() = m and a.getATarget() = n and n.getId() = "__M_locals_builtin" + ) and + exists(Assign a, Name n | + a.getScope() = m and a.getATarget() = n and n.getId() = "_magic_number" + ) + ) + } + + override string getTool() { + result = "mako" + } + +} + +string from_mako_import(Module m) { + exists(ImportMember member, ImportExpr mako | + member.getScope() = m and + member.getModule() = mako and + mako.getName() = "mako" | + result = member.getName() + ) +} + +/** File generated by Google's protobuf tool. */ +class ProtobufGeneratedFile extends SpecificGeneratedFile { + + ProtobufGeneratedFile() { + this.getName().regexpMatch(".*_pb2?.py") + and + exists(Module m | + m.getFile() = this | + exists(ImportExpr imp | + imp.getEnclosingModule() = m | + imp.getImportedModuleName() = "google.net.proto2.python.public" + ) + and + exists(AssignStmt a, Name n | + a.getEnclosingModule() = m and + a.getATarget() = n and + n.getId() = "DESCRIPTOR" + ) + ) + } + + override string getTool() { + result = "protobuf" + } + +} diff --git a/python/ql/src/semmle/python/filters/Tests.qll b/python/ql/src/semmle/python/filters/Tests.qll new file mode 100644 index 00000000000..e9cfedc499f --- /dev/null +++ b/python/ql/src/semmle/python/filters/Tests.qll @@ -0,0 +1,48 @@ +import python + +abstract class TestScope extends Scope {} + +// don't extend Class directly to avoid ambiguous method warnings +class UnitTestClass extends TestScope { + UnitTestClass() { + exists(ClassObject c | + this = c.getPyClass() | + c.getASuperType() = theUnitTestPackage().getAttribute(_) + or + c.getASuperType().getName().toLowerCase() = "testcase" + ) + } +} + +PackageObject theUnitTestPackage() { + result.getName() = "unittest" +} + +abstract class Test extends TestScope {} + +class UnitTestFunction extends Test { + + UnitTestFunction() { + this.getScope+() instanceof UnitTestClass + and + this.(Function).getName().matches("test%") + } +} + +class PyTestFunction extends Test { + + PyTestFunction() { + exists(Module pytest | pytest.getName() = "pytest") and + this.(Function).getName().matches("test%") + } + +} + +class NoseTestFunction extends Test { + + NoseTestFunction() { + exists(Module nose | nose.getName() = "nose") and + this.(Function).getName().matches("test%") + } + +} diff --git a/python/ql/src/semmle/python/flow/NameNode.qll b/python/ql/src/semmle/python/flow/NameNode.qll new file mode 100644 index 00000000000..0dab82fe865 --- /dev/null +++ b/python/ql/src/semmle/python/flow/NameNode.qll @@ -0,0 +1,143 @@ +import python +private import semmle.python.pointsto.Base + +/** A control flow node corresponding to a (plain variable) name expression, such as `var`. + * `None`, `True` and `False` are excluded. + */ +class NameNode extends ControlFlowNode { + + NameNode() { + exists(Name n | py_flow_bb_node(this, n, _, _)) + or + exists(PlaceHolder p | py_flow_bb_node(this, p, _, _)) + } + + /** Whether this flow node defines the variable `v`. */ + predicate defines(Variable v) { + exists(Name d | this.getNode() = d and d.defines(v)) + and not this.isLoad() + } + + /** Whether this flow node deletes the variable `v`. */ + predicate deletes(Variable v) { + exists(Name d | this.getNode() = d and d.deletes(v)) + } + + /** Whether this flow node uses the variable `v`. */ + predicate uses(Variable v) { + this.isLoad() and exists(Name u | this.getNode() = u and u.uses(v)) + or + exists(PlaceHolder u | this.getNode() = u and u.getVariable() = v and u.getCtx() instanceof Load) + or + use_of_global_variable(this, v.getScope(), v.getId()) + } + + string getId() { + result = this.getNode().(Name).getId() + or + result = this.getNode().(PlaceHolder).getId() + } + + /** Whether this is a use of a local variable. */ + predicate isLocal() { + local(this) + } + + /** Whether this is a use of a non-local variable. */ + predicate isNonLocal() { + non_local(this) + } + + /** Whether this is a use of a global (including builtin) variable. */ + predicate isGlobal() { + use_of_global_variable(this, _, _) + } + + predicate isSelf() { + exists(SsaVariable selfvar | + selfvar.isSelf() and selfvar.getAUse() = this + ) + } + +} + +private predicate fast_local(NameNode n) { + exists(FastLocalVariable v | + n.uses(v) and + v.getScope() = n.getScope() + ) +} + +private predicate local(NameNode n) { + fast_local(n) + or + exists(SsaVariable var | + var.getAUse() = n and + n.getScope() instanceof Class and + exists(var.getDefinition()) + ) +} + +private predicate non_local(NameNode n) { + exists(FastLocalVariable flv | + flv.getALoad() = n.getNode() and + not flv.getScope() = n.getScope() + ) +} + +// magic is fine, but we get questionable join-ordering of it +pragma [nomagic] +private predicate use_of_global_variable(NameNode n, Module scope, string name) { + n.isLoad() and + not non_local(n) + and + not exists(SsaVariable var | + var.getAUse() = n | + var.getVariable() instanceof FastLocalVariable + or + n.getScope() instanceof Class and + not maybe_undefined(var) + ) + and name = n.getId() + and scope = n.getEnclosingModule() +} + +private predicate maybe_defined(SsaVariable var) { + exists(var.getDefinition()) and not py_ssa_phi(var, _) and not var.getDefinition().isDelete() + or + exists(SsaVariable input | + input = var.getAPhiInput() | + maybe_defined(input) + ) +} + +private predicate maybe_undefined(SsaVariable var) { + not exists(var.getDefinition()) and not py_ssa_phi(var, _) + or + var.getDefinition().isDelete() + or + maybe_undefined(var.getAPhiInput()) + or + exists(BasicBlock incoming | + exists(var.getAPhiInput()) and + incoming.getASuccessor() = var.getDefinition().getBasicBlock() and + not var.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming) + ) +} + +/** A control flow node corresponding to a named constant, one of `None`, `True` or `False`. */ +class NameConstantNode extends NameNode { + + NameConstantNode() { + exists(NameConstant n | py_flow_bb_node(this, n, _, _)) + } + + override deprecated predicate defines(Variable v) { none() } + + override deprecated predicate deletes(Variable v) { none() } + + /* We ought to override uses as well, but that has + * a serious performance impact. + deprecated predicate uses(Variable v) { none() } + */ +} diff --git a/python/ql/src/semmle/python/libraries/Zope.qll b/python/ql/src/semmle/python/libraries/Zope.qll new file mode 100644 index 00000000000..f355982dfc7 --- /dev/null +++ b/python/ql/src/semmle/python/libraries/Zope.qll @@ -0,0 +1,29 @@ +/** Utilities for handling the zope libraries */ + +import python + +/** A method that to a sub-class of `zope.interface.Interface` */ +class ZopeInterfaceMethod extends PyFunctionObject { + + /** Holds if this method belongs to a class that sub-classes `zope.interface.Interface` */ + ZopeInterfaceMethod() { + exists(ModuleObject zope, Object interface, ClassObject owner | + zope.getAttribute("Interface") = interface and + zope.getName() = "zope.interface" and + owner.declaredAttribute(_) = this and + owner.getAnImproperSuperType().getABaseType() = interface + ) + } + + override int minParameters() { + result = super.minParameters() + 1 + } + + override int maxParameters() { + if exists(this.getFunction().getVararg()) then + result = super.maxParameters() + else + result = super.maxParameters() + 1 + } + +} diff --git a/python/ql/src/semmle/python/pointsto/Base.qll b/python/ql/src/semmle/python/pointsto/Base.qll new file mode 100644 index 00000000000..cc57a6f5d69 --- /dev/null +++ b/python/ql/src/semmle/python/pointsto/Base.qll @@ -0,0 +1,654 @@ +/** + * Combined points-to and type-inference for "run-time" (as opposed to "import-time" values) + * The main relation `runtime_points_to(node, object, cls, origin)` relates a control flow node + * to the possible objects it points-to the inferred types of those objects and the 'origin' + * of those objects. The 'origin' is the point in source code that the object can be traced + * back to. + * + * This file contains non-layered parts of the points-to analysis. + */ +import python +import semmle.python.dataflow.SsaDefinitions + +module BasePointsTo { + /** INTERNAL -- Use n.refersTo(value, _, origin) instead */ + pragma [noinline] + predicate points_to(ControlFlowNode f, Object value, ControlFlowNode origin) { + ( + f.isLiteral() and value = f and not f.getNode() instanceof ImmutableLiteral + or + f.isFunction() and value = f + ) and origin = f + } +} + +/** The kwargs parameter (**kwargs) in a function definition is always a dict */ +predicate kwargs_points_to(ControlFlowNode f, ClassObject cls) { + exists(Function func | func.getKwarg() = f.getNode()) and + cls = theDictType() +} + +/** The varargs (*varargs) in a function definition is always a tuple */ +predicate varargs_points_to(ControlFlowNode f, ClassObject cls) { + exists(Function func | func.getVararg() = f.getNode()) and + cls = theTupleType() +} + +/** Gets the class of the object for simple cases, namely constants, functions, + * comprehensions and built-in objects. + * + * This exists primarily for internal use. Use getAnInferredType() instead. + */ +pragma [noinline] +ClassObject simple_types(Object obj) { + result = comprehension(obj.getOrigin()) + or + result = collection_literal(obj.getOrigin()) + or + obj.getOrigin() instanceof CallableExpr and result = thePyFunctionType() + or + obj.getOrigin() instanceof Module and result = theModuleType() + or + result = builtin_object_type(obj) +} + +private ClassObject comprehension(Expr e) { + e instanceof ListComp and result = theListType() + or + e instanceof SetComp and result = theSetType() + or + e instanceof DictComp and result = theDictType() + or + e instanceof GeneratorExp and result = theGeneratorType() +} + +private ClassObject collection_literal(Expr e) { + e instanceof List and result = theListType() + or + e instanceof Set and result = theSetType() + or + e instanceof Dict and result = theDictType() + or + e instanceof Tuple and result = theTupleType() +} + +private int tuple_index_value(Object t, int i) { + result = t.(TupleNode).getElement(i).getNode().(Num).getN().toInt() + or + exists(Object item | + py_citems(t, i, item) and + result = item.(NumericObject).intValue() + ) +} + +pragma [noinline] +int version_tuple_value(Object t) { + not exists(tuple_index_value(t, 1)) and result = tuple_index_value(t, 0)*10 + or + not exists(tuple_index_value(t, 2)) and result = tuple_index_value(t, 0)*10 + tuple_index_value(t, 1) + or + tuple_index_value(t, 2) = 0 and result = tuple_index_value(t, 0)*10 + tuple_index_value(t, 1) + or + tuple_index_value(t, 2) > 0 and result = tuple_index_value(t, 0)*10 + tuple_index_value(t, 1) + 1 +} + +/** Choose a version numbers that represent the extreme of supported versions. */ +private int major_minor() { + if major_version() = 3 then + (result = 33 or result = 37) // 3.3 to 3.7 + else + (result = 25 or result = 27) // 2.5 to 2.7 +} + +/** Compares the given tuple object to both the maximum and minimum possible sys.version_info values */ +int version_tuple_compare(Object t) { + version_tuple_value(t) < major_minor() and result = -1 + or + version_tuple_value(t) = major_minor() and result = 0 + or + version_tuple_value(t) > major_minor() and result = 1 +} + +/* Holds if `cls` is a new-style class if it were to have no explicit base classes */ +predicate baseless_is_new_style(ClassObject cls) { + cls.isBuiltin() + or + major_version() = 3 + or + exists(cls.declaredMetaClass()) +} + +/* The following predicates exist in order to provide + * more precise type information than the underlying + * database relations. This help to optimise the points-to + * analysis. + */ + +/** Gets the base class of built-in class `cls` */ +pragma [noinline] +ClassObject builtin_base_type(ClassObject cls) { + /* The extractor uses the special name ".super." to indicate the super class of a builtin class */ + py_cmembers_versioned(cls, ".super.", result, _) +} + +/** Gets the `name`d attribute of built-in class `cls` */ +pragma [noinline] +Object builtin_class_attribute(ClassObject cls, string name) { + not name = ".super." and + py_cmembers_versioned(cls, name, result, _) +} + +/** Holds if the `name`d attribute of built-in module `m` is `value` of `cls` */ +pragma [noinline] +predicate builtin_module_attribute(ModuleObject m, string name, Object value, ClassObject cls) { + py_cmembers_versioned(m, name, value, _) and cls = builtin_object_type(value) +} + +/** Gets the (built-in) class of the built-in object `obj` */ +pragma [noinline] +ClassObject builtin_object_type(Object obj) { + py_cobjecttypes(obj, result) and not obj = unknownValue() + or + obj = unknownValue() and result = theUnknownType() +} + +/** Holds if this class (not on a super-class) declares name */ +pragma [noinline] +predicate class_declares_attribute(ClassObject cls, string name) { + exists(Class defn | + defn = cls.getPyClass() and + class_defines_name(defn, name) + ) + or + exists(Object o | + o = builtin_class_attribute(cls, name) and + not exists(ClassObject sup | + sup = builtin_base_type(cls) and + o = builtin_class_attribute(sup, name) + ) + ) +} + +/** Holds if the class defines name */ +private predicate class_defines_name(Class cls, string name) { + exists(SsaVariable var | name = var.getId() and var.getAUse() = cls.getANormalExit()) +} + +/** Gets a return value CFG node, provided that is safe to track across returns */ +ControlFlowNode safe_return_node(PyFunctionObject func) { + result = func.getAReturnedNode() + // Not a parameter + and not exists(Parameter p, SsaVariable pvar | + p.asName().getAFlowNode() = pvar.getDefinition() and + result = pvar.getAUse() + ) and + // No alternatives + not exists(ControlFlowNode branch | branch.isBranch() and branch.getScope() = func.getFunction()) +} + +/** Holds if it can be determined from the control flow graph alone that this function can never return */ +predicate function_can_never_return(FunctionObject func) { + /* A Python function never returns if it has no normal exits that are not dominated by a + * call to a function which itself never returns. + */ + exists(Function f | + f = func.getFunction() and + not exists(f.getAnExitNode()) + ) + or + func = theExitFunctionObject() +} + +/** Python specific sub-class of generic EssaNodeDefinition */ +class PyNodeDefinition extends EssaNodeDefinition { + + PyNodeDefinition() { + this.getSourceVariable().hasDefiningNode(this.getDefiningNode()) + } + + override string getRepresentation() { + result = this.getAQlClass() + } + +} + +/** Python specific sub-class of generic EssaNodeRefinement */ +class PyNodeRefinement extends EssaNodeRefinement { + + override string getRepresentation() { + result = this.getAQlClass() + "(" + this.getInput().getRepresentation() + ")" + or + not exists(this.getInput()) and + result = this.getAQlClass() + "(" + this.getSourceVariable().getName() + "??)" + } +} + +/** An assignment to a variable `v = val` */ +class AssignmentDefinition extends PyNodeDefinition { + + AssignmentDefinition() { + SsaSource::assignment_definition(this.getSourceVariable(), this.getDefiningNode(), _) + } + + ControlFlowNode getValue() { + SsaSource::assignment_definition(this.getSourceVariable(), this.getDefiningNode(), result) + } + + override string getRepresentation() { + result = this.getValue().getNode().toString() + } + +} + +/** Capture of a raised exception `except ExceptionType ex:` */ +class ExceptionCapture extends PyNodeDefinition { + + ExceptionCapture() { + SsaSource::exception_capture(this.getSourceVariable(), this.getDefiningNode()) + } + + ControlFlowNode getType() { + exists(ExceptFlowNode ex | + ex.getName() = this.getDefiningNode() and + result = ex.getType() + ) + } + + override string getRepresentation() { + result = "except " + this.getSourceVariable().getName() + } + +} +/** An assignment to a variable as part of a multiple assignment `..., v, ... = val` */ +class MultiAssignmentDefinition extends PyNodeDefinition { + + MultiAssignmentDefinition() { + SsaSource::multi_assignment_definition(this.getSourceVariable(), this.getDefiningNode()) + } + + override string getRepresentation() { + result = "..." + } + +} + + +class WithDefinition extends PyNodeDefinition { + + WithDefinition () { + SsaSource::with_definition(this.getSourceVariable(), this.getDefiningNode()) + } + + override string getRepresentation() { + result = "with" + } + +} + +/** A definition of a variable by declaring it as a parameter */ +class ParameterDefinition extends PyNodeDefinition { + + ParameterDefinition() { + SsaSource::parameter_definition(this.getSourceVariable(), this.getDefiningNode()) + } + + predicate isSelf() { + this.getDefiningNode().getNode().(Parameter).isSelf() + } + + ControlFlowNode getDefault() { + result.getNode() = this.getParameter().getDefault() + } + + Parameter getParameter() { + result = this.getDefiningNode().getNode() + } + +} + +/** A definition of a variable in a for loop `for v in ...:` */ +class IterationDefinition extends PyNodeDefinition { + + ControlFlowNode sequence; + + IterationDefinition() { + SsaSource::iteration_defined_variable(this.getSourceVariable(), this.getDefiningNode(), sequence) + } + + ControlFlowNode getSequence() { + result = sequence + } + +} + +/** A deletion of a variable `del v` */ +class DeletionDefinition extends PyNodeDefinition { + + DeletionDefinition() { + SsaSource::deletion_definition(this.getSourceVariable(), this.getDefiningNode()) + } + +} + +/** Definition of variable at the entry of a scope. Usually this represents the transfer of + * a global or non-local variable from one scope to another. + */ +class ScopeEntryDefinition extends PyNodeDefinition { + + ScopeEntryDefinition() { + this.getDefiningNode() = this.getSourceVariable().(PythonSsaSourceVariable).getScopeEntryDefinition() and + not this instanceof ImplicitSubModuleDefinition + } + + override Scope getScope() { + result.getEntryNode() = this.getDefiningNode() + } + +} + +/** Possible redefinition of variable via `from ... import *` */ +class ImportStarRefinement extends PyNodeRefinement { + + ImportStarRefinement() { + SsaSource::import_star_refinement(this.getSourceVariable(), _, this.getDefiningNode()) + } + +} + +/** Assignment of an attribute `obj.attr = val` */ +class AttributeAssignment extends PyNodeRefinement { + + AttributeAssignment() { + SsaSource::attribute_assignment_refinement(this.getSourceVariable(), _, this.getDefiningNode()) + } + + string getName() { + result = this.getDefiningNode().(AttrNode).getName() + } + + ControlFlowNode getValue() { + result = this.getDefiningNode().(DefinitionNode).getValue() + } + + override string getRepresentation() { + result = this.getAQlClass() + " '" + this.getName() + "'(" + this.getInput().getRepresentation() + ")" + or + not exists(this.getInput()) and + result = this.getAQlClass() + " '" + this.getName() + "'(" + this.getSourceVariable().getName() + "??)" + } + +} + +/** A use of a variable as an argument, `foo(v)`, which might modify the object referred to. */ +class ArgumentRefinement extends PyNodeRefinement { + + ControlFlowNode argument; + + ArgumentRefinement() { + SsaSource::argument_refinement(this.getSourceVariable(), argument, this.getDefiningNode()) + } + + ControlFlowNode getArgument() { result = argument } + +} + +/** Deletion of an attribute `del obj.attr`. */ +class EssaAttributeDeletion extends PyNodeRefinement { + + EssaAttributeDeletion() { + SsaSource::attribute_deletion_refinement(this.getSourceVariable(), _, this.getDefiningNode()) + } + + string getName() { + result = this.getDefiningNode().(AttrNode).getName() + } + +} + +/** A pi-node (guard) with only one successor. */ +class SingleSuccessorGuard extends PyNodeRefinement { + + SingleSuccessorGuard() { + SsaSource::test_refinement(this.getSourceVariable(), _, this.getDefiningNode()) + } + + boolean getSense() { + exists(this.getDefiningNode().getAFalseSuccessor()) and result = false + or + exists(this.getDefiningNode().getATrueSuccessor()) and result = true + } + + override string getRepresentation() { + result = PyNodeRefinement.super.getRepresentation() + " [" + this.getSense().toString() + "]" + or + not exists(this.getSense()) and + result = PyNodeRefinement.super.getRepresentation() + " [??]" + } +} + +/** Implicit definition of the names of sub-modules in a package. + * Although the interpreter does not pre-define these names, merely populating them + * as they are imported, this is a good approximation for static analysis. + */ +class ImplicitSubModuleDefinition extends PyNodeDefinition { + + ImplicitSubModuleDefinition() { + SsaSource::init_module_submodule_defn(this.getSourceVariable(), this.getDefiningNode()) + } + +} + +/** An implicit (possible) definition of an escaping variable at a call-site */ +class CallsiteRefinement extends PyNodeRefinement { + + override string toString() { + result = "CallsiteRefinement" + } + + CallsiteRefinement() { + exists(PythonSsaSourceVariable var, ControlFlowNode defn | + defn = var.redefinedAtCallSite() and + this.definedBy(var, defn) and + not this instanceof ArgumentRefinement and + not this instanceof MethodCallsiteRefinement and + not this instanceof SingleSuccessorGuard + ) + } + + CallNode getCall() { + this.getDefiningNode() = result + } + +} + +/** An implicit (possible) modification of the object referred at a method call */ +class MethodCallsiteRefinement extends PyNodeRefinement { + + MethodCallsiteRefinement() { + SsaSource::method_call_refinement(this.getSourceVariable(), _, this.getDefiningNode()) + and not this instanceof SingleSuccessorGuard + } + + CallNode getCall() { + this.getDefiningNode() = result + } + +} + +/** An implicit (possible) modification of `self` at a method call */ +class SelfCallsiteRefinement extends MethodCallsiteRefinement { + + SelfCallsiteRefinement() { + this.getSourceVariable().(Variable).isSelf() + } + +} + +/** Python specific sub-class of generic EssaEdgeRefinement */ +class PyEdgeRefinement extends EssaEdgeRefinement { + + override string getRepresentation() { + /* This is for testing so use capital 'P' to make it sort before 'phi' and + * be more visually distinctive. */ + result = "Pi(" + this.getInput().getRepresentation() + ") [" + this.getSense() + "]" + or + not exists(this.getInput()) and + result = "Pi(" + this.getSourceVariable().getName() + "??) [" + this.getSense() + "]" + } + + ControlFlowNode getTest() { + result = this.getPredecessor().getLastNode() + } + +} + +/** Hold if outer contains inner, both are contained within a test and inner is a use is a plain use or an attribute lookup */ +pragma[noinline] +predicate contains_interesting_expression_within_test(ControlFlowNode outer, ControlFlowNode inner) { + inner.isLoad() and + exists(ControlFlowNode test | + outer.getAChild*() = inner and + test_contains(test, outer) and + test_contains(test, inner) | + inner instanceof NameNode or + inner instanceof AttrNode + ) +} + +/** Hold if `expr` is a test (a branch) and `use` is within that test */ +predicate test_contains(ControlFlowNode expr, ControlFlowNode use) { + expr.getNode() instanceof Expr and + expr.isBranch() and + expr.getAChild*() = use +} + +/** Holds if `test` is a test (a branch), `use` is within that test and `def` is an edge from that test with `sense` */ +predicate refinement_test(ControlFlowNode test, ControlFlowNode use, boolean sense, PyEdgeRefinement def) { + /* Because calls such as `len` may create a new variable, we need to go via the source variable + * That is perfectly safe as we are only dealing with calls that do not mutate their arguments. + */ + use = def.getInput().getSourceVariable().(Variable).getAUse() and + test = def.getPredecessor().getLastNode() and + test_contains(test, use) and + sense = def.getSense() +} + +/** Holds if `f` is an import of the form `from .[...] import name` and the enclosing scope is an __init__ module */ +pragma [noinline] +predicate live_import_from_dot_in_init(ImportMemberNode f, EssaVariable var) { + exists(string name | + import_from_dot_in_init(f.getModule(name)) and + var.getSourceVariable().getName() = name and var.getAUse() = f + ) +} + +/** Holds if `f` is an import of the form `from .[...] import ...` and the enclosing scope is an __init__ module */ +predicate import_from_dot_in_init(ImportExprNode f) { + f.getScope() = any(Module m).getInitModule() and + ( + f.getNode().getLevel() = 1 and + not exists(f.getNode().getName()) + or + f.getNode().getImportedModuleName() = f.getEnclosingModule().getPackage().getName() + ) +} + +/** Gets the pseudo-object representing the value referred to by an undefined variable */ +Object undefinedVariable() { + py_special_objects(result, "_semmle_undefined_value") +} + +/** Gets the pseudo-object representing an unknown value */ +Object unknownValue() { + py_special_objects(result, "_1") +} + +BuiltinCallable theTypeNewMethod() { + py_cmembers_versioned(theTypeType(), "__new__", result, major_version().toString()) +} + +/** Gets the `value, cls, origin` that `f` would refer to if it has not been assigned some other value */ +pragma [noinline] +predicate potential_builtin_points_to(NameNode f, Object value, ClassObject cls, ControlFlowNode origin) { + f.isGlobal() and f.isLoad() and origin = f and + ( + builtin_name_points_to(f.getId(), value, cls) + or + not exists(builtin_object(f.getId())) and value = unknownValue() and cls = theUnknownType() + ) +} + +pragma [noinline] +predicate builtin_name_points_to(string name, Object value, ClassObject cls) { + value = builtin_object(name) and py_cobjecttypes(value, cls) +} + +module BaseFlow { + + predicate reaches_exit(EssaVariable var) { + var.getAUse() = var.getScope().getANormalExit() + } + + /* Helper for this_scope_entry_value_transfer(...). Transfer of values from earlier scope to later on */ + pragma [noinline] + predicate scope_entry_value_transfer_from_earlier(EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope) { + exists(SsaSourceVariable var | + reaches_exit(pred_var) and + pred_var.getScope() = pred_scope and + var = pred_var.getSourceVariable() and + var = succ_def.getSourceVariable() and + succ_def.getScope() = succ_scope + | + pred_scope.precedes(succ_scope) + or + /* If an `__init__` method does not modify the global variable, then + * we can skip it and take the value directly from the module. + */ + exists(Scope init | + init.getName() = "__init__" and init.precedes(succ_scope) and pred_scope.precedes(init) and + not var.(Variable).getAStore().getScope() = init and var instanceof GlobalVariable + ) + ) + } +} + +/** Points-to for syntactic elements where context is not relevant */ +predicate simple_points_to(ControlFlowNode f, Object value, ClassObject cls, ControlFlowNode origin) { + kwargs_points_to(f, cls) and value = f and origin = f + or + varargs_points_to(f, cls) and value = f and origin = f + or + BasePointsTo::points_to(f, value, origin) and cls = simple_types(value) + or + value = f.getNode().(ImmutableLiteral).getLiteralObject() and cls = simple_types(value) and origin = f +} + +/** Holds if `bit` is a binary expression node with a bitwise operator. + * Helper for `this_binary_expr_points_to`. + */ +predicate bitwise_expression_node(BinaryExprNode bit, ControlFlowNode left, ControlFlowNode right) { + exists(Operator op | + op = bit.getNode().getOp() | + op instanceof BitAnd or + op instanceof BitOr or + op instanceof BitXor + ) and + left = bit.getLeft() and + right = bit.getRight() +} + + +private +Module theCollectionsAbcModule() { + result.getName() = "_abcoll" + or + result.getName() = "_collections_abc" +} + +ClassObject collectionsAbcClass(string name) { + exists(Class cls | + result.getPyClass() = cls and + cls.getName() = name and + cls.getScope() = theCollectionsAbcModule() + ) +} diff --git a/python/ql/src/semmle/python/pointsto/CallGraph.qll b/python/ql/src/semmle/python/pointsto/CallGraph.qll new file mode 100644 index 00000000000..cc618690c32 --- /dev/null +++ b/python/ql/src/semmle/python/pointsto/CallGraph.qll @@ -0,0 +1,73 @@ +/** + * Context-sensitive call-graph. + * + * NOTE: Since an "invocation" contains callsite information + * and a path back to its ancestor calls, the "invocation" call-graph must be a tree. + * This has two important consequences: + * 1. The graph is incomplete; it has quite limited depth in order to keep the graph to a sensible size. + * 2. The graph is precise. Since different invocations are distinct, there can be no "cross-talk" between + * different calls to the same function. + */ +import python +private import semmle.python.pointsto.PointsToContext + +private newtype TTInvocation = TInvocation(FunctionObject f, Context c) { + exists(Context outer, CallNode call | + call = f.getACall(outer) and + c.fromCall(call, outer) + ) + or + c.appliesToScope(f.getFunction()) +} + +/** This class represents a static approximation to the + * dynamic call-graph. A `FunctionInvocation` represents + * all calls made to a function for a given context. + */ +class FunctionInvocation extends TTInvocation { + + string toString() { result = "Invocation" } + + FunctionObject getFunction() { this = TInvocation(result, _) } + + Context getContext() { this = TInvocation(_, result) } + + /** Gets the callee invocation for the given callsite. + * The callsite must be within the function of this invocation. + */ + FunctionInvocation getCallee(CallNode call) { + exists(FunctionObject callee, Context callee_context, FunctionObject caller, Context caller_context | + this = TInvocation(caller, caller_context) and + result = TInvocation(callee, callee_context) and + call = callee.getACall(caller_context) and + callee_context.fromCall(call, caller_context) and + call.getScope() = caller.getFunction() + ) + } + + /** Gets a callee invocation. + * That is any invocation made from within this invocation. + */ + FunctionInvocation getACallee() { + result = this.getCallee(_) + } + + /** Holds if this is an invocation `f` in the "runtime" context. */ + predicate runtime(FunctionObject f) { + exists(Context c | + c.isRuntime() and + this = TInvocation(f, c) + ) + } + + /** Gets the call from which this invocation was made. */ + CallNode getCall() { + this.getContext().fromCall(result, _) + } + + /** Gets the caller invocation of this invocation, if any. */ + FunctionInvocation getCaller() { + this = result.getCallee(_) + } + +} diff --git a/python/ql/src/semmle/python/pointsto/Context.qll b/python/ql/src/semmle/python/pointsto/Context.qll new file mode 100644 index 00000000000..156e8eb43b4 --- /dev/null +++ b/python/ql/src/semmle/python/pointsto/Context.qll @@ -0,0 +1,4 @@ +import python +private import semmle.python.pointsto.PointsToContext + +class Context = PointsToContext; \ No newline at end of file diff --git a/python/ql/src/semmle/python/pointsto/Filters.qll b/python/ql/src/semmle/python/pointsto/Filters.qll new file mode 100644 index 00000000000..6ac515b217a --- /dev/null +++ b/python/ql/src/semmle/python/pointsto/Filters.qll @@ -0,0 +1,47 @@ +/** Helper predicates for standard tests in Python commonly + * used to filter objects by value or by type. + */ + + +import python +import semmle.dataflow.SSA + +/** Holds if `c` is a call to `hasattr(obj, attr)`. */ +predicate hasattr(CallNode c, ControlFlowNode obj, string attr) { + c.getFunction().getNode().(Name).getId() = "hasattr" and + c.getArg(0) = obj and + c.getArg(1).getNode().(StrConst).getText() = attr +} + +/** Holds if `c` is a call to `callable(obj)`. */ +predicate is_callable(CallNode c, ControlFlowNode obj) { + c.getFunction().(NameNode).getId() = "callable" and + obj = c.getArg(0) +} + +/** Holds if `c` is a call to `isinstance(use, cls)`. */ +predicate isinstance(CallNode fc, ControlFlowNode cls, ControlFlowNode use) { + fc.getFunction().(NameNode).getId() = "isinstance" and + cls = fc.getArg(1) and fc.getArg(0) = use +} + +/** Holds if `c` is a test comparing `x` and `y`. `is` is true if the operator is `is` or `==`, it is false if the operator is `is not` or `!=`. */ +predicate equality_test(CompareNode c, ControlFlowNode x, boolean is, ControlFlowNode y) { + exists(Cmpop op | + c.operands(x, op, y) or + c.operands(y, op, x) + | + (is = true and op instanceof Is or + is = false and op instanceof IsNot or + is = true and op instanceof Eq or + is = false and op instanceof NotEq + ) + ) +} + +/** Holds if `c` is a call to `issubclass(use, cls)`. */ +predicate issubclass(CallNode fc, ControlFlowNode cls, ControlFlowNode use) { + fc.getFunction().(NameNode).getId() = "issubclass" and + fc.getArg(0) = use and cls = fc.getArg(1) +} + diff --git a/python/ql/src/semmle/python/pointsto/Final.qll b/python/ql/src/semmle/python/pointsto/Final.qll new file mode 100644 index 00000000000..a1f14b0f87c --- /dev/null +++ b/python/ql/src/semmle/python/pointsto/Final.qll @@ -0,0 +1,6 @@ +/* For backwards compatibility */ + +import PointsTo::PointsTo as P + +/** DEPRECATED: Use `PointsTo` instead */ +deprecated module FinalPointsTo = P; \ No newline at end of file diff --git a/python/ql/src/semmle/python/pointsto/MRO.qll b/python/ql/src/semmle/python/pointsto/MRO.qll new file mode 100644 index 00000000000..11ac81b120a --- /dev/null +++ b/python/ql/src/semmle/python/pointsto/MRO.qll @@ -0,0 +1,466 @@ +/** Classes and predicates for computing the Method Resolution Order (MRO) of classes. + * Supports both old-style (diamond) inheritance and new-style (C3 linearization) inheritance. + */ + +/* + * Implementation of the C3 linearization algorithm. + * See https://en.wikipedia.org/wiki/C3_linearization + * + * The key operation is merge, which takes a list of lists and produces a list. + * We implement it as the method `ClassListList.merge()` + * + * To support that we need to determine the best candidate to extract from a list of lists, + * implemented as `ClassListList.bestMergeCandidate()` + * + * The following code is designed to implement those operations + * without negation and as efficiently as possible. + */ + +import python +import semmle.python.pointsto.PointsTo + +cached private newtype TClassList = Empty() + or + Cons(ClassObject head, TClassList tail) { + required_cons(head, tail) + } + +/* Keep ClassList finite and as small as possible */ +private predicate required_cons(ClassObject head, ClassList tail) { + tail = merge_of_linearization_of_bases(head) + or + exists(ClassObject cls, int n | + head = cls.getBaseType(n) and tail = bases(cls, n+1) + ) + or + head = theObjectType() and tail = Empty() + or + reverse_step(_, Cons(head, _), tail) + or + exists(ClassListList list | + merge_step(tail, list, _) and + head = list.bestMergeCandidate() + ) + or + exists(ClassList list, int n | + n = list.firstIndex(head) and + tail = list.deduplicate(n+1) + ) + or + exists(ClassListList list, int n | + head = list.getHead().getItem(n) and + tail = flatten_list(list, n+1) + ) + or + tail = list_old_style_base_mros(head).flatten() +} + +/** A list of classes, used to represent the MRO of a class */ +class ClassList extends TClassList { + + string toString() { + result = "[" + this.contents() + "]" + } + + string contents() { + this = Empty() and result = "" + or + exists(ClassObject head | + head = this.getHead() | + this.getTail() = Empty() and result = head.getName() + or + this.getTail() != Empty() and result = head.getName() + ", " + this.getTail().contents() + ) + } + + int length() { + this = Empty() and result = 0 + or + result = this.getTail().length() + 1 + } + + ClassObject getHead() { + this = Cons(result, _) + } + + ClassList getTail() { + this = Cons(_, result) + } + + ClassObject getItem(int n) { + n = 0 and result = this.getHead() + or + result = this.getTail().getItem(n-1) + } + + pragma [inline] + ClassList removeHead(ClassObject cls) { + this.getHead() = cls and result = this.getTail() + or + this.getHead() != cls and result = this + or + this = Empty() and result = Empty() + } + + predicate legalMergeHead(ClassObject cls) { + this.getTail().doesNotContain(cls) + or + this = Empty() + } + + /** Use negative formulation for efficiency */ + predicate contains(ClassObject cls) { + cls = this.getHead() + or + this.getTail().contains(cls) + } + + /** Use negative formulation to avoid negative recursion */ + predicate doesNotContain(ClassObject cls) { + this.relevantForContains(cls) and + cls != this.getHead() and + this.getTail().doesNotContain(cls) + or + this = Empty() + } + + private predicate relevantForContains(ClassObject cls) { + exists(ClassListList list | + list.getItem(_).getHead() = cls and + list.getItem(_) = this + ) + or + exists(ClassList l | + l.relevantForContains(cls) and + this = l.getTail() + ) + } + + ClassObject findDeclaringClass(string name) { + exists(ClassObject head | + head = this.getHead() and + not head = theUnknownType() | + if head.declaresAttribute(name) then + result = head + else + result = this.getTail().findDeclaringClass(name) + ) + } + + Object lookup(string name) { + exists(ClassObject head | + head = this.getHead() and + not head = theUnknownType() | + if head.declaresAttribute(name) then + result = head.declaredAttribute(name) + else + result = this.getTail().lookup(name) + ) + } + + predicate declares(string name) { + this.getHead().declaresAttribute(name) + or + this.getTail().declares(name) + } + + ClassList startingAt(ClassObject cls) { + exists(ClassObject head | + head = this.getHead() | + if head = cls then + result = this + else + result = this.getTail().startingAt(cls) + ) + } + + ClassList deduplicate() { + result = this.deduplicate(0) + } + + /* Helpers for `deduplicate()` */ + + int firstIndex(ClassObject cls) { + result = this.firstIndex(cls, 0) + } + + /* Helper for firstIndex(cls), getting the first index of `cls` where result >= n */ + private int firstIndex(ClassObject cls, int n) { + this.getItem(n) = cls and result = n + or + this.getItem(n) != cls and result = this.firstIndex(cls, n+1) + } + + /** Holds if the class at `n` is a duplicate of an earlier position. */ + private predicate duplicate(int n) { + exists(ClassObject cls | + cls = this.getItem(n) and this.firstIndex(cls) < n + ) + } + + /** Gets a class list which is the de-duplicated form of the list containing elements of + * this list from `n` onwards. + */ + ClassList deduplicate(int n) { + n = this.length() and result = Empty() + or + this.duplicate(n) and result = this.deduplicate(n+1) + or + exists(ClassObject cls | + n = this.firstIndex(cls) and + result = Cons(cls, this.deduplicate(n+1)) + ) + } + + predicate isEmpty() { + this = Empty() + } + + ClassList reverse() { + reverse_step(this, Empty(), result) + } +} + +private newtype TClassListList = + EmptyList() or + ConsList(TClassList head, TClassListList tail) { + required_list(head, tail) + } + +/* Keep ClassListList finite and as small as possible */ +private predicate required_list(ClassList head, ClassListList tail) { + any(ClassListList x).removedClassParts(_, head, tail, _) + or + head = bases(_) and tail = EmptyList() + or + exists(ClassObject cls, int n | + head = new_style_mro(cls.getBaseType(n)) and + tail = list_of_linearization_of_bases_plus_bases(cls, n+1) + ) + or + exists(ClassObject cls, int n | + head = old_style_mro(cls.getBaseType(n)) and + tail = list_old_style_base_mros(cls, n+1) + ) +} + +private class ClassListList extends TClassListList { + + string toString() { + result = "[" + this.contents() + "]" + } + + string contents() { + this = EmptyList() and result = "" + or + exists(ClassList head | + head = this.getHead() | + this.getTail() = EmptyList() and result = head.toString() + or + this.getTail() != EmptyList() and result = head.toString() + ", " + this.getTail().contents() + ) + } + + int length() { + this = EmptyList() and result = 0 + or + result = this.getTail().length() + 1 + } + + ClassList getHead() { + this = ConsList(result, _) + } + + ClassListList getTail() { + this = ConsList(_, result) + } + + ClassList getItem(int n) { + n = 0 and result = this.getHead() + or + result = this.getTail().getItem(n-1) + } + + private ClassObject getAHead() { + result = this.getHead().getHead() + or + result = this.getTail().getAHead() + } + + pragma [nomagic] + ClassList merge() { + exists(ClassList reversed | + merge_step(reversed, EmptyList(), this) and + result = reversed.reverse() + ) + or + this = EmptyList() and result = Empty() + } + + /* Join ordering helper */ + pragma [noinline] + predicate removedClassParts(ClassObject cls, ClassList removed_head, ClassListList removed_tail, int n) { + cls = this.bestMergeCandidate() and n = this.length()-1 and + removed_head = this.getItem(n).removeHead(cls) and removed_tail = EmptyList() + or + exists(ClassList prev_head, ClassListList prev_tail | + this.removedClassParts(cls, prev_head, prev_tail, n+1) and + removed_head = this.getItem(n).removeHead(cls) and + removed_tail = ConsList(prev_head, prev_tail) + ) + } + + ClassListList remove(ClassObject cls) { + exists(ClassList removed_head, ClassListList removed_tail | + this.removedClassParts(cls, removed_head, removed_tail, 0) and + result = ConsList(removed_head, removed_tail) + ) + or + this = EmptyList() and result = EmptyList() + } + + predicate legalMergeCandidate(ClassObject cls, int n) { + cls = this.getAHead() and n = this.length() + or + this.getItem(n).legalMergeHead(cls) and + this.legalMergeCandidate(cls, n+1) + } + + predicate legalMergeCandidate(ClassObject cls) { + this.legalMergeCandidate(cls, 0) + } + + predicate illegalMergeCandidate(ClassObject cls) { + cls = this.getAHead() and + this.getItem(_).getTail().contains(cls) + } + + ClassObject bestMergeCandidate(int n) { + exists(ClassObject head | + head = this.getItem(n).getHead() + | + legalMergeCandidate(head) and result = head + or + illegalMergeCandidate(head) and result = this.bestMergeCandidate(n+1) + ) + } + + ClassObject bestMergeCandidate() { + result = this.bestMergeCandidate(0) + } + + /** Gets a ClassList representing the this list of list flattened into a single list. + * Used for old-style MRO computation. + */ + ClassList flatten() { + this = EmptyList() and result = Empty() + or + result = flatten_list(this, 0) + } + +} + +private ClassList flatten_list(ClassListList list, int n) { + need_flattening(list) and + exists(ClassList head, ClassListList tail | + list = ConsList(head, tail) + | + n = head.length() and result = tail.flatten() + or + result = Cons(head.getItem(n), flatten_list(list, n+1)) + ) +} + +/* Restrict flattening to those lists that need to be flattened */ +private predicate need_flattening(ClassListList list) { + list = list_old_style_base_mros(_) + or + exists(ClassListList toflatten | + need_flattening(toflatten) and + list = toflatten.getTail() + ) +} + +private ClassList bases(ClassObject cls) { + result = bases(cls, 0) +} + +private ClassList bases(ClassObject cls, int n) { + result = Cons(cls.getBaseType(n), bases(cls, n+1)) + or + result = Empty() and n = PointsTo::Types::class_base_count(cls) +} + +private ClassListList list_of_linearization_of_bases_plus_bases(ClassObject cls) { + result = list_of_linearization_of_bases_plus_bases(cls, 0) +} + +private ClassListList list_of_linearization_of_bases_plus_bases(ClassObject cls, int n) { + result = ConsList(bases(cls), EmptyList()) and n = PointsTo::Types::class_base_count(cls) + or + exists(ClassListList partial | + partial = list_of_linearization_of_bases_plus_bases(cls, n+1) and + result = ConsList(new_style_mro(cls.getBaseType(n)), partial) + ) +} + +private ClassList merge_of_linearization_of_bases(ClassObject cls) { + result = list_of_linearization_of_bases_plus_bases(cls).merge() +} + +cached ClassList new_style_mro(ClassObject cls) { + cls = theObjectType() and result = Cons(cls, Empty()) + or + result = Cons(cls, merge_of_linearization_of_bases(cls)) +} + +cached ClassList old_style_mro(ClassObject cls) { + PointsTo::Types::is_new_style_bool(cls) = false and + result = Cons(cls, list_old_style_base_mros(cls).flatten()).(ClassList).deduplicate() +} + +private ClassListList list_old_style_base_mros(ClassObject cls) { + result = list_old_style_base_mros(cls, 0) +} + +pragma [nomagic] +private ClassListList list_old_style_base_mros(ClassObject cls, int n) { + n = PointsTo::Types::class_base_count(cls) and result = EmptyList() + or + result = ConsList(old_style_mro(cls.getBaseType(n)), list_old_style_base_mros(cls, n+1)) +} + +/** Holds if the pair `reversed_mro`, `remaining_list` represents a step in the C3 merge operation + * of computing the C3 linearization of `original`. + */ +private predicate merge_step(ClassList reversed_mro, ClassListList remaining_list, ClassListList original) { + remaining_list = list_of_linearization_of_bases_plus_bases(_) and reversed_mro = Empty() and remaining_list = original + or + /* Removes the best merge candidate from `remaining_list` and prepends it to `reversed_mro` */ + exists(ClassObject head, ClassList prev_reverse_mro, ClassListList prev_list | + merge_step(prev_reverse_mro, prev_list, original) and + head = prev_list.bestMergeCandidate() and + reversed_mro = Cons(head, prev_reverse_mro) and + remaining_list = prev_list.remove(head) + ) + or + merge_step(reversed_mro, ConsList(Empty(), remaining_list), original) +} + +/* Helpers for `ClassList.reverse()` */ + +private predicate needs_reversing(ClassList lst) { + merge_step(lst, EmptyList(), _) + or + lst = Empty() +} + +private predicate reverse_step(ClassList lst, ClassList remainder, ClassList reversed) { + needs_reversing(lst) and remainder = lst and reversed = Empty() + or + exists(ClassObject head, ClassList tail | + reversed = Cons(head, tail) and + reverse_step(lst, Cons(head, remainder), tail) + ) +} + diff --git a/python/ql/src/semmle/python/pointsto/Overview.qll b/python/ql/src/semmle/python/pointsto/Overview.qll new file mode 100644 index 00000000000..f46f83dbb3c --- /dev/null +++ b/python/ql/src/semmle/python/pointsto/Overview.qll @@ -0,0 +1,122 @@ +/** + * + * ## Points-to analysis for Python + * + * + * The purpose of points-to analysis is to determine what values a variable might hold at runtime. + * This allows us to write useful queries to check for the misuse of those values. + * In the academic and technical literature, points-to analysis (AKA pointer analysis) attempts to determine which variables can refer to which heap allocated objects. + * From the point of view of Python we can treat all Python objects as "heap allocated objects". + * + * + * The output of the points-to analysis consists of a large set of relations which provide not only points-to information, but call-graph, pruned flow-graph and exception-raising information. + * + * These relations are computed by a large set of mutually recursive predicates which infer the flow of values through the program. + * Our analysis is inter-procedural use contexts to maintain the precision of an intra-procedural analysis. + * + * ### Precision + * + * In conventional points-to, the computed points-to set should be a super-set of the real points-to set (were it possible to determine such a thing). + * However for our purposes we want the points-to set to be a sub-set of the real points-to set. + * This is simply because conventional points-to is used to determine compiler optimisations, so the points-to set needs to be a conservative over-estimate of what is possible. + * We have the opposite concern; we want to eliminate false positives where possible. + * + * This should be born in mind when reading the literature about points-to analysis. In conventional points-to, a precise analysis produces as small a points-to set as possible. + * Our analysis is precise (or very close to it). Instead of seeking to maximise precision, we seek to maximise *recall* and produce as large a points-to set as possible (whilst remaining precise). + * + * When it comes to designing the inference, we always choose precision over recall. + * We want to minimise false positives so it is important to avoid making incorrect inferences, even if it means losing a lot of potential information. + * If a potential new points-to fact would increase the number of values we are able to infer, but decrease precision, then we omit it. + * + * ###Objects + * + * In convention points-to an 'object' is generally considered to be any static instantiation. E.g. in Java this is simply anything looking like `new X(..)`. + * However, in Python as there is no `new` expression we cannot known what is a class merely from the syntax. + * Consequently, we must start with only with the simplest objects and extend to instance creation as we can infer classes. + * + * To perform points-to analysis we start with the set of built-in objects, all literal constants, and class and function definitions. + * From there we can propagate those values. Whenever we see a call `x()` we add a new object if `x` refers to some class. + * + * In the `PointsTo::points_to` relation, the second argument, `Object value` is the "value" referred to by the ControlFlowNode (which will correspond to an rvalue in the source code). + * The set of "values" used will change as the library continues to improve, but currently include the following: + * + * * Classes (both in the source and builtin) + * * Functions (both in the source and builtin) + * * Literal constants defined in the source (string and numbers) + * * Constant objects defined in compiled libraries and the interpreter (None, boolean, strings and numbers) + * * Some calls (many calls are absent as we can infer what the call returns). Consider a call to represent the set of objects that it could return. + * * Some other constructs that might create a new object. + * + * A number of constructs that might create a new object, such as binary operations, are omitted if there is no useful information to can be attached to them and they would just increase the size of the database. + * + * ###Contexts + * + * In order to better handle value tracking in functions, we introduce context to the points-to relation. + * There is one `default` context, equivalent to having no context, a `main` context for scripts and any number of call-site contexts. + * + * Adding context to a conventional points-to analysis can significantly improve its precision. Whereas, for our points-to analysis adding context significantly improves the recall of our analysis. + * The consensus in the academic literature is that "object sensitivity" is superior to "call-site sensitivity". + * However, since we are seeking to maximise not minimise our points-to set, it is entirely possible that the reverse is true for us. + * We use "call-site sensitivity" at the moment, although the exact set of contexts used will change. + * + * ### Points-to analysis over the ESSA dataflow graph + * + * In order to perform points-to analysis on the dataflow graph, we + * need to understand the many implicit "definitions" that occur within Python code. + * + * These are: + * + * 1. Implicit definition as "undefined" for any local or global variable at the start of its scope. + * Many of these will be dead and will be eliminated during construction of the dataflow graph. + * 2. Implicit definition of `__name__`, `__package__` and `__module__` at the start of the relevant scopes. + * 3. Implicit definition of all submodules as global variables at the start of an `__init__` module + * + * In addition, there are the "artificial", data-flow definitions: + * + * 1. Phi functions + * 2. Pi (guard, or filter) functions. + * 3. "Refinements" of a variable. These are not definitions of the variable, but may modify the object referred to by the variable, + * possibly changing some inferred facts about the object. + * 4. Definition of any variable that escapes the scope, at entry, exit and at all call-sites. + * + * As an example, consider: + * ```python + * if a: + * float = "global" + * #float can now be either the class 'float' or the string "global" + * + * class C2: + * if b: + * float = "local" + * float + * + * float #Cannot be "local" + * ``` + * + * Ignoring `__name__` and `__package__`, the data-flow graph looks something like this, noting that there are two variables named "float" + * in the scope `C2`, the local and the global. + * + * ``` + * a_0 = undefined + * b_0 = undefined + * float_0 = undefined + * int_0 = undefined + * float_1 = "global" + * float_2 = phi(float_0, float_1) + * float_3 = float_2 (Definition on entry to C2 for global variable) + * float_4 = undefined (Definition on entry to C2 for local variable) + * float_5 = "local" | + * float_6 = phi(float_4, float_5) | + * float_7 = float_3 (transfer values in global 'float', but not local, back to module scope). + * ``` + * + * ### Implementation + * + * This section is for information purposes only. Any or all details may change without notice. + * + * QL, being based on Datalog, has fixed-point semantics which makes it impossible to make negative statements that are recursive. + * To work around this we need to define many predicates over boolean variables. Suppose we have a predicate with determines whether a test can be true or false at runtime. + * We might naively implement this as `predicate test_is_true(ControlFlowNode test, Context ctx)` but this would lead to negative recursion if we want to know when the test can be false. + * Instead we implement it as `boolean test_result(ControlFlowNode test, Context ctx)` where the absence of a value indicates merely that we do (yet) know what value the test may have. + * + */ diff --git a/python/ql/src/semmle/python/pointsto/PointsTo.qll b/python/ql/src/semmle/python/pointsto/PointsTo.qll new file mode 100644 index 00000000000..284e5ccb5e7 --- /dev/null +++ b/python/ql/src/semmle/python/pointsto/PointsTo.qll @@ -0,0 +1,2857 @@ +/** + * Part of the combined points-to, call-graph and type-inference library. + * The main relation `points_to(node, context, object, cls, origin)` relates a control flow node + * to the possible objects it points-to the inferred types of those objects and the 'origin' + * of those objects. The 'origin' is the point in source code that the object can be traced + * back to. + * + * The predicates provided are not intended to be used directly (although they are available to the advanced user), but are exposed in the user API as methods on key classes. + * + * For example, the most important predicate in the points-to relation is: + * ```ql + * predicate PointsTo::points_to(ControlFlowNode f, PointsToContext ctx, Object value, ClassObject cls, ControlFlowNode origin) + * ``` + * Where `f` is the control flow node representing something that might hold a value. `ctx` is the context in which `f` "points-to" `value` and may be "general" or from a specific call-site. + * `value` is a static approximation to a value, such as a number, a class, or an object instantiation. + * `cls` is the class of this value if known, or `theUnknownType()` which is an internal `ClassObject` and should not be exposed to the general QL user. + * `origin` is the point in the source from where `value` originates and is useful when presenting query results. + * + * The `PointsTo::points_to` relation is exposed at the user API level as + * ```ql + * ControlFlowNode.refersTo(Context context, Object value, ClassObject cls, ControlFlowNode origin) + * ``` + * + */ + +import python +private import PointsToContext +private import Base +private import semmle.python.types.Extensions +private import Filters as BaseFilters +import semmle.dataflow.SSA +private import MRO + +/** Get a `ControlFlowNode` from an object or `here`. + * If the object is a ControlFlowNode then use that, otherwise fall back on `here` + */ +pragma[inline] +private ControlFlowNode origin_from_object_or_here(ObjectOrCfg object, ControlFlowNode here) { + result = object + or + not object instanceof ControlFlowNode and result = here +} + +module PointsTo { + + cached module API { + + /** INTERNAL -- Use `FunctionObject.getACall()`. + * + * Gets a call to `func` with the given context. */ + cached CallNode get_a_call(FunctionObject func, PointsToContext context) { + function_call(func, context, result) + or + method_call(func, context, result) + } + + /** INTERNAL -- Use `FunctionObject.getAFunctionCall()`. + * + * Holds if `call` is a function call to `func` with the given context. */ + cached predicate function_call(FunctionObject func, PointsToContext context, CallNode call) { + points_to(call.getFunction(), context, func, _, _) + } + + /** INTERNAL -- Use `FunctionObject.getAMethodCall()`. + * + * Holds if `call` is a method call to `func` with the given context. */ + cached predicate method_call(FunctionObject func, PointsToContext context, CallNode call) { + Calls::plain_method_call(func, context, call) + or + Calls::super_method_call(context, call, _, func) + or + class_method_call(_, _, func, context, call) + } + + /** INTERNAL -- Use `ClassMethod.getACall()` instead */ + cached predicate class_method_call(Object cls_method, ControlFlowNode attr, FunctionObject func, PointsToContext context, CallNode call) { + exists(ClassObject cls, string name | + attr = call.getFunction() and + Types::class_attribute_lookup(cls, name, cls_method, _, _) | + Calls::receiver_type_for(cls, name, attr, context) + or + points_to(attr.(AttrNode).getObject(name), context, cls, _, _) + ) and + class_method(cls_method, func) + } + + /** INTERNAL -- Use `ClassMethod` instead */ + cached predicate class_method(Object cls_method, FunctionObject method) { + decorator_call(cls_method, theClassMethodType(), method) + } + + pragma [nomagic] + private predicate decorator_call(Object method, ClassObject decorator, FunctionObject func) { + exists(CallNode f, PointsToContext imp | + method = f and imp.isImport() and + points_to(f.getFunction(), imp, decorator, _, _) and + points_to(f.getArg(0), imp, func, _, _) + ) + } + + /** INTERNAL -- Use `f.refersTo(value, cls, origin)` instead. */ + cached predicate points_to(ControlFlowNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + points_to_candidate(f, context, value, cls, origin) and + Layer::reachableBlock(f.getBasicBlock(), context) + } + + /** Gets the value that `expr` evaluates to (when converted to a boolean) when `use` refers to `(val, cls, origin)` + * and `expr` is a test (a branch) and contains `use`. */ + cached boolean test_evaluates_boolean(ControlFlowNode expr, ControlFlowNode use, PointsToContext context, Object val, ClassObject cls, ControlFlowNode origin) { + test_contains(expr, use) and + result = Filters::evaluates(expr, use, context, val, cls, origin) + } + + /** INTERNAL -- Do not use. + * + * Holds if `package.name` points to `(value, cls, origin)`, where `package` is a package object. */ + cached predicate package_attribute_points_to(PackageObject package, string name, Object value, ClassObject cls, ControlFlowNode origin) { + py_module_attributes(package.getInitModule().getModule(), name, value, cls, origin) + or + exists(Module init | + init = package.getInitModule().getModule() | + not exists(Variable v | v.getScope() = init | v.getId() = name or v.getId() = "*") + or + exists(EssaVariable v, PointsToContext imp | + v.getScope() = init and v.getName() = "*" and v.getAUse() = init.getANormalExit() | + SSA::ssa_variable_named_attribute_points_to(v, imp, name, undefinedVariable(), _, _) and + imp.isImport() + ) + ) and explicitly_imported(value) and + value = package.submodule(name) and cls = theModuleType() and origin = value + } + + /** INTERNAL -- `Use m.attributeRefersTo(name, obj, origin)` instead. + * + * Holds if `m.name` points to `(value, cls, origin)`, where `m` is a (source) module. */ + cached predicate py_module_attributes(Module m, string name, Object obj, ClassObject cls, ControlFlowNode origin) { + exists(EssaVariable var, ControlFlowNode exit, ObjectOrCfg orig, PointsToContext imp | + exit = m.getANormalExit() and var.getAUse() = exit and + var.getSourceVariable().getName() = name and + ssa_variable_points_to(var, imp, obj, cls, orig) and + imp.isImport() and + not obj = undefinedVariable() | + origin = origin_from_object_or_here(orig, exit) + ) + or + not exists(EssaVariable var | var.getAUse() = m.getANormalExit() and var.getSourceVariable().getName() = name) and + exists(EssaVariable var, PointsToContext imp | + var.getAUse() = m.getANormalExit() and var.getSourceVariable().getName() = "*" | + SSA::ssa_variable_named_attribute_points_to(var, imp, name, obj, cls, origin) and + imp.isImport() and not obj = undefinedVariable() + ) + } + + /** INTERNAL -- Use `ModuleObject.hasAttribute(name)` + * + * Whether the module defines name. */ + cached predicate module_defines_name(Module mod, string name) { + module_defines_name_boolean(mod, name) = true + } + + /** INTERNAL -- Use `Version.isTrue()` instead. + * + * Holds if `cmp` points to a test on version that is `value`. + * For example, if `cmp` is `sys.version[0] < "3"` then for, Python 2, `value` would be `true`. */ + cached predicate version_const(CompareNode cmp, PointsToContext context, boolean value) { + exists(ControlFlowNode fv, ControlFlowNode fc, Object val | + comparison(cmp, fv, fc, _) and + points_to(cmp, context, val, _, _) and + value = val.booleanValue() + | + sys_version_info_slice(fv, context, _) + or + sys_version_info_index(fv, context, _, _) + or + sys_version_string_char0(fv, context, _, _) + or + points_to(fv, context, theSysHexVersionNumber(), _, _) + ) + or + value = version_tuple_compare(cmp, context).booleanValue() + } + + /** INTERNAL -- Use `FunctionObject.getArgumentForCall(call, position)` instead. */ + cached ControlFlowNode get_positional_argument_for_call(FunctionObject func, PointsToContext context, CallNode call, int position) { + result = Calls::get_argument_for_call_by_position(func, context, call, position) + or + exists(string name | + result = Calls::get_argument_for_call_by_name(func, context, call, name) and + func.getFunction().getArg(position).asName().getId() = name + ) + } + + /** INTERNAL -- Use `FunctionObject.getNamedArgumentForCall(call, name)` instead. */ + cached ControlFlowNode get_named_argument_for_call(FunctionObject func, PointsToContext context, CallNode call, string name) { + ( + result = Calls::get_argument_for_call_by_name(func, context, call, name) + or + exists(int position | + result = Calls::get_argument_for_call_by_position(func, context, call, position) and + func.getFunction().getArg(position).asName().getId() = name + ) + ) + } + + /** INTERNAL -- Use `FunctionObject.neverReturns()` instead. + * Whether function `func` never returns. Slightly conservative approximation, this predicate may be false + * for a function that can never return. */ + cached predicate function_never_returns(FunctionObject func) { + /* A Python function never returns if it has no normal exits that are not dominated by a + * call to a function which itself never returns. + */ + function_can_never_return(func) + or + exists(Function f | + f = func.getFunction() + | + forall(BasicBlock exit | + exit = f.getANormalExit().getBasicBlock() | + exists(FunctionObject callee, BasicBlock call | + get_a_call(callee, _).getBasicBlock() = call and + function_never_returns(callee) and + call.dominates(exit) + ) + ) + ) + } + + /** INTERNAL -- Use `m.importedAs(name)` instead. + * + * Holds if `import name` will import the module `m`. */ + cached predicate module_imported_as(ModuleObject m, string name) { + /* Normal imports */ + m.getName() = name + or + /* sys.modules['name'] = m */ + exists(ControlFlowNode sys_modules_flow, ControlFlowNode n, ControlFlowNode mod | + /* Use previous points-to here to avoid slowing down the recursion too much */ + exists(SubscriptNode sub, Object sys_modules | + sub.getValue() = sys_modules_flow and + points_to(sys_modules_flow, _, sys_modules, _, _) and + builtin_module_attribute(theSysModuleObject(), "modules", sys_modules, _) and + sub.getIndex() = n and + n.getNode().(StrConst).getText() = name and + sub.(DefinitionNode).getValue() = mod and + points_to(mod, _, m, _, _) + ) + ) + } + + /** Holds if `call` is of the form `getattr(arg, "name")`. */ + cached predicate getattr(CallNode call, ControlFlowNode arg, string name) { + points_to(call.getFunction(), _, builtin_object("getattr"), _, _) and + call.getArg(1).getNode().(StrConst).getText() = name and + arg = call.getArg(0) + } + + /** Holds if `f` is the instantiation of an object, `cls(...)`. */ + cached predicate instantiation(CallNode f, PointsToContext context, ClassObject cls) { + points_to(f.getFunction(), context, cls, _, _) and + not cls = theTypeType() and + Types::callToClassWillReturnInstance(cls) + } + + /** Holds if `var` refers to `(value, cls, origin)` given the context `context`. */ + cached predicate ssa_variable_points_to(EssaVariable var, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + SSA::ssa_definition_points_to(var.getDefinition(), context, value, cls, origin) + } + + + } + + predicate name_maybe_imported_from(ModuleObject mod, string name) { + exists(Module m, ImportStar s | + has_import_star(m, s, mod) | + exists(Variable var | name = var.getId() and var.getScope() = s.getScope()) + or + exists(ModuleObject other | + name_maybe_imported_from(other, name) and other.getModule() = m + ) + ) + or + exists(ImportMemberNode imp | + points_to(imp.getModule(name), _, mod, _, _) + ) + or + exists(PackageObject pack | + pack.getInitModule() = mod | + name_maybe_imported_from(pack, name) + ) + or + exists(mod.(PackageObject).submodule(name)) + or + exists(PackageObject package | + package.getInitModule() = mod and + exists(package.submodule(name)) + ) + or + module_exports(mod, name) + or + name = "__name__" + } + + private boolean module_defines_name_boolean(Module m, string name) { + exists(ModuleObject mod | + m = mod.getModule() | + exists(SsaVariable var | name = var.getId() and var.getAUse() = m.getANormalExit()) and result = true + or + name_maybe_imported_from(mod, name) and not any(ImportStar i).getScope() = m and result = false and + not exists(SsaVariable var | name = var.getId() and var.getAUse() = m.getANormalExit()) and + not exists(PackageObject pack | + pack.getInitModule() = mod and + exists(pack.submodule(name)) + ) + or + exists(Object obj | + not obj = undefinedVariable() and + py_module_attributes(mod.getModule(), name, obj, _, _) + ) and result = true + or + exists(ImportStarNode isn, ModuleObject imported | + isn.getScope() = m and + points_to(isn.getModule(), _, imported, _, _) and + module_exports(imported, name) + ) and result = true + ) + or + name = "__name__" and result = true + } + + private boolean py_module_exports_boolean(ModuleObject mod, string name) { + exists(Module m | + m = mod.getModule() | + /* Explicitly declared in __all__ */ + m.declaredInAll(name) and result = true + or + /* No __all__ and name is defined and public */ + not m.declaredInAll(_) and name.charAt(0) != "_" and + result = module_defines_name_boolean(m, name) + or + /* May be imported from this module, but not declared in __all__ */ + name_maybe_imported_from(mod, name) and m.declaredInAll(_) and not m.declaredInAll(name) and + result = false + ) + } + + private boolean package_exports_boolean(PackageObject pack, string name) { + explicitly_imported(pack.submodule(name)) and + ( + not exists(pack.getInitModule()) + or + exists(ModuleObject init | + pack.getInitModule() = init | + not init.getModule().declaredInAll(_) and name.charAt(0) != "_" + ) + ) and result = true + or + result = module_exports_boolean(pack.getInitModule(), name) + } + + /** INTERNAL -- Use `m.exports(name)` instead. */ + cached predicate module_exports(ModuleObject mod, string name) { + module_exports_boolean(mod, name) = true + } + + private boolean module_exports_boolean(ModuleObject mod, string name) { + py_cmembers_versioned(mod, name, _, major_version().toString()) and + name.charAt(0) != "_" and result = true + or + result = package_exports_boolean(mod, name) + or + result = py_module_exports_boolean(mod, name) + } + + /** Predicates in this layer need to visible to the next layer, but not otherwise */ + private module Layer { + + /* Holds if BasicBlock `b` is reachable, given the context `context`. */ + predicate reachableBlock(BasicBlock b, PointsToContext context) { + context.appliesToScope(b.getScope()) and + forall(ConditionBlock guard | + guard.controls(b, _) | + exists(Object value | + points_to(guard.getLastNode(), context, value, _, _) + | + guard.controls(b, true) and not value.booleanValue() = false + or + guard.controls(b, false) and not value.booleanValue() = true + ) + or + /* Assume the true edge of an assert is reachable (except for assert 0/False) */ + guard.controls(b, true) and + exists(Assert a, Expr test | + a.getTest() = test and + guard.getLastNode().getNode() = test and + not test instanceof ImmutableLiteral + ) + ) + } + + /* Holds if the edge `pred` -> `succ` is reachable, given the context `context`. + */ + predicate controlledReachableEdge(BasicBlock pred, BasicBlock succ, PointsToContext context) { + exists(ConditionBlock guard, Object value | + points_to(guard.getLastNode(), context, value, _, _) + | + guard.controlsEdge(pred, succ, true) and not value.booleanValue() = false + or + guard.controlsEdge(pred, succ, false) and not value.booleanValue() = true + ) + } + + /** Holds if `mod.name` points to `(value, cls, origin)`, where `mod` is a module object. */ + predicate module_attribute_points_to(ModuleObject mod, string name, Object value, ClassObject cls, ObjectOrCfg origin) { + py_module_attributes(mod.getModule(), name, value, cls, origin) + or + package_attribute_points_to(mod, name, value, cls, origin) + or + builtin_module_attribute(mod, name, value, cls) and origin = value + } + + } + + import API + + /* Holds if `f` points to a test on the OS that is `value`. + * For example, if `f` is `sys.platform == "win32"` then, for Windows, `value` would be `true`. + */ + private predicate os_const(ControlFlowNode f, PointsToContext context, boolean value) { + exists(string os | + os_test(f, os, context) | + value = true and py_flags_versioned("sys.platform", os, major_version().toString()) + or + value = false and not py_flags_versioned("sys.platform", os, major_version().toString()) + ) + } + + /** Points-to before pruning. */ + pragma [nomagic] + private predicate points_to_candidate(ControlFlowNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + simple_points_to(f, value, cls, origin) and context.appliesToScope(f.getScope()) + or + f.isClass() and value = f and origin = f and context.appliesToScope(f.getScope()) and + cls = Types::class_get_meta_class(value) + or + exists(boolean b | + os_const(f, context, b) + | + value = theTrueObject() and b = true + or + value = theFalseObject() and b = false + ) and cls = theBoolType() and origin = f + or + import_points_to(f, value, origin) and cls = theModuleType() and context.appliesToScope(f.getScope()) + or + attribute_load_points_to(f, context, value, cls, origin) + or + getattr_points_to(f, context, value, cls, origin) + or + if_exp_points_to(f, context, value, cls, origin) + or + from_import_points_to(f, context, value, cls, origin) + or + use_points_to(f, context, value, cls, origin) + or + def_points_to(f, context, value, cls, origin) + or + Calls::call_points_to(f, context, value, cls, origin) + or + subscript_points_to(f, context, value, cls, origin) + or + sys_version_info_slice(f, context, cls) and value = theSysVersionInfoTuple() and origin = f + or + sys_version_info_index(f, context, value, cls) and origin = f + or + sys_version_string_char0(f, context, value, cls) and origin = f + or + six_metaclass_points_to(f, context, value, cls, origin) + or + binary_expr_points_to(f, context, value, cls, origin) + or + compare_expr_points_to(f, context, value, cls, origin) + or + not_points_to(f, context, value, cls, origin) + or + value.(SuperCall).instantiation(context, f) and f = origin and cls = theSuperType() + or + value.(SuperBoundMethod).instantiation(context, f) and f = origin and cls = theBoundMethodType() + or + exists(boolean b | + b = Filters::evaluates_boolean(f, _, context, _, _, _) + | + value = theTrueObject() and b = true + or + value = theFalseObject() and b = false + ) and cls = theBoolType() and origin = f + or + f.(CustomPointsToFact).pointsTo(context, value, cls, origin) + } + + /** The ESSA variable with fast-local lookup (LOAD_FAST bytecode). */ + private EssaVariable fast_local_variable(NameNode n) { + n.isLoad() and + result.getASourceUse() = n and + result.getSourceVariable() instanceof FastLocalVariable + } + + /** The ESSA variable with name-local lookup (LOAD_NAME bytecode). */ + private EssaVariable name_local_variable(NameNode n) { + n.isLoad() and + result.getASourceUse() = n and + result.getSourceVariable() instanceof NameLocalVariable + } + + /** The ESSA variable for the global variable lookup. */ + private EssaVariable global_variable(NameNode n) { + n.isLoad() and + result.getASourceUse() = n and + result.getSourceVariable() instanceof GlobalVariable + } + + private predicate use_points_to_maybe_origin(NameNode f, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin_or_obj) { + ssa_variable_points_to(fast_local_variable(f), context, value, cls, origin_or_obj) + or + name_lookup_points_to_maybe_origin(f, context, value, cls, origin_or_obj) + or + not exists(fast_local_variable(f)) and not exists(name_local_variable(f)) and + global_lookup_points_to_maybe_origin(f, context, value, cls, origin_or_obj) + } + + pragma [noinline] + private predicate local_variable_undefined(NameNode f, PointsToContext context) { + ssa_variable_points_to(name_local_variable(f), context, undefinedVariable(), _, _) + } + + private predicate name_lookup_points_to_maybe_origin(NameNode f, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin_or_obj) { + exists(EssaVariable var | var = name_local_variable(f) | + ssa_variable_points_to(var, context, value, cls, origin_or_obj) + ) + or + local_variable_undefined(f, context) and + global_lookup_points_to_maybe_origin(f, context, value, cls, origin_or_obj) + } + + private predicate global_lookup_points_to_maybe_origin(NameNode f, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin_or_obj) { + ssa_variable_points_to(global_variable(f), context, value, cls, origin_or_obj) + or + ssa_variable_points_to(global_variable(f), context, undefinedVariable(), _, _) and + potential_builtin_points_to(f, value, cls, origin_or_obj) + or + not exists(global_variable(f)) and context.appliesToScope(f.getScope()) and + potential_builtin_points_to(f, value, cls, origin_or_obj) + } + + /** Gets an object pointed to by a use (of a variable). */ + private predicate use_points_to(NameNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(ObjectOrCfg origin_or_obj | + not value = undefinedVariable() and + use_points_to_maybe_origin(f, context, value, cls, origin_or_obj) | + origin = origin_from_object_or_here(origin_or_obj, f) + ) + } + + /** Gets an object pointed to by the definition of an ESSA variable. */ + private predicate def_points_to(DefinitionNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + points_to(f.getValue(), context, value, cls, origin) + } + + /** Holds if `f` points to `@six.add_metaclass(cls)\nclass ...`. */ + private predicate six_metaclass_points_to(ControlFlowNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(ControlFlowNode meta | + Types::six_add_metaclass(f, value, meta) and + points_to(meta, context, cls, _, _) + ) and + origin = value + } + + /** Holds if `obj.name` points to `(value, cls, orig)`. */ + pragma [noinline] + private predicate class_or_module_attribute(Object obj, string name, Object value, ClassObject cls, ObjectOrCfg orig) { + /* Normal class attributes */ + Types::class_attribute_lookup(obj, name, value, cls, orig) and not cls = theStaticMethodType() and not cls = theClassMethodType() + or + /* Static methods of the class */ + exists(CallNode sm | Types::class_attribute_lookup(obj, name, sm, theStaticMethodType(), _) and sm.getArg(0) = value and cls = thePyFunctionType() and orig = value) + or + /* Module attributes */ + Layer::module_attribute_points_to(obj, name, value, cls, orig) + } + + /** Holds if `f` points to `(value, cls, origin)` where `f` is an instance attribute, `x.attr`. */ + pragma [nomagic] + private predicate instance_attribute_load_points_to(AttrNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + f.isLoad() and + exists(string name | + named_attribute_points_to(f.getObject(name), context, name, value, cls, origin) + or + /* Static methods on the class of the instance */ + exists(CallNode sm, ClassObject icls | + points_to(f.getObject(name), context, _, icls, _) and + Types::class_attribute_lookup(icls, name, sm, theStaticMethodType(), _) and sm.getArg(0) = value and cls = thePyFunctionType() and origin = value + ) + or + /* Unknown instance attributes */ + exists(Object x, ClassObject icls, ControlFlowNode obj_node | + obj_node = f.getObject(name) and + not obj_node.(NameNode).isSelf() and + points_to(obj_node, context, x, icls, _) and + (not x instanceof ModuleObject and not x instanceof ClassObject) and + not icls.isBuiltin() and + Types::class_has_attribute_bool(icls, name) = false and + value = unknownValue() and cls = theUnknownType() and origin = f + ) + ) + } + + pragma[noinline] + private predicate receiver_object(AttrNode f, PointsToContext context, Object cls_or_mod, string name) { + f.isLoad() and + exists(ControlFlowNode fval| + fval = f.getObject(name) and + points_to(fval, context, cls_or_mod, _, _) | + cls_or_mod instanceof ClassObject or + cls_or_mod instanceof ModuleObject + ) + } + + /** Holds if `f` is an attribute `x.attr` and points to `(value, cls, origin)`. */ + private predicate attribute_load_points_to(AttrNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + instance_attribute_load_points_to(f, context, value, cls, origin) + or + exists(Object cls_or_mod, string name, ObjectOrCfg orig | + receiver_object(f, context, cls_or_mod, name) and + class_or_module_attribute(cls_or_mod, name, value, cls, orig) and + origin = origin_from_object_or_here(orig, f) + ) + or + points_to(f.getObject(), context, unknownValue(), theUnknownType(), origin) and value = unknownValue() and cls = theUnknownType() + } + + /** Holds if `f` is an expression node `tval if cond else fval` and points to `(value, cls, origin)`. */ + private predicate if_exp_points_to(IfExprNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + points_to(f.getAnOperand(), context, value, cls, origin) + } + + /** Holds if `f` is an import expression, `import mod` and points to `(value, cls, origin)`. */ + private predicate import_points_to(ControlFlowNode f, ModuleObject value, ControlFlowNode origin) { + exists(string name, ImportExpr i | + i.getAFlowNode() = f and i.getImportedModuleName() = name and + module_imported_as(value, name) and + origin = f + ) + } + + /** Holds if `f` is a "from import" expression, `from mod import x` and points to `(value, cls, origin)`. */ + pragma [nomagic] + private predicate from_import_points_to(ImportMemberNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(EssaVariable var, ObjectOrCfg orig | + live_import_from_dot_in_init(f, var) and + ssa_variable_points_to(var, context, value, cls, orig) and + origin = origin_from_object_or_here(orig, f) + ) + or + not live_import_from_dot_in_init(f, _) and + exists(string name, ModuleObject mod | + points_to(f.getModule(name), context, mod, _, _) | + exists(ObjectOrCfg orig | + Layer::module_attribute_points_to(mod, name, value, cls, orig) and + origin = origin_from_object_or_here(orig, f) + ) + ) + } + + /** Holds if `f` is of the form `getattr(x, "name")` and x.name points to `(value, cls, origin)`. */ + private predicate getattr_points_to(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(ControlFlowNode arg, string name | + named_attribute_points_to(arg, context, name, value, cls, origin) and + getattr(f, arg, name) + ) + } + + /** Whether the module is explicitly imported somewhere. */ + private predicate explicitly_imported(ModuleObject mod) { + exists(ImportExpr ie | module_imported_as(mod, ie.getAnImportedModuleName())) + or + exists(ImportMember im | module_imported_as(mod, im.getImportedModuleName())) + } + + /** Holds if an import star exists in the module m that imports the module `imported_module`, such that the flow from the import reaches the module exit. */ + private predicate has_import_star(Module m, ImportStar im, ModuleObject imported_module) { + exists(string name | + module_imported_as(imported_module, name) and + name = im.getImportedModuleName() and + im.getScope() = m and + im.getAFlowNode().getBasicBlock().reachesExit() + ) + } + + /** Track bitwise expressions so we can handle integer flags and enums. + * Tracking too many binary expressions is likely to kill performance. + */ + private predicate binary_expr_points_to(BinaryExprNode b, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + cls = theIntType() and + exists(ControlFlowNode left, ControlFlowNode right | + bitwise_expression_node(b, left, right) and + points_to(left, context, _, cls, _) and + points_to(right, context, _, cls, _) + ) and + value = origin and origin = b + } + + pragma [noinline] + private predicate incomparable_values(CompareNode cmp, PointsToContext context) { + exists(ControlFlowNode left, ControlFlowNode right | + cmp.operands(left, _, right) and + exists(Object lobj, Object robj | + points_to(left, context, lobj, _, _) and + points_to(right, context, robj, _, _) | + not Filters::comparable_value(lobj) + or + not Filters::comparable_value(robj) + ) + ) + } + + pragma [noinline] + private Object in_tuple(CompareNode cmp, PointsToContext context) { + exists(ControlFlowNode left, ControlFlowNode right | + cmp.operands(left, any(In i), right) and + exists(Object lobj, TupleObject tuple | + points_to(left, context, lobj, _, _) and + points_to(right, context, tuple, _, _) + | + lobj = tuple.getBuiltinElement(_) and result = theTrueObject() + or + not lobj = tuple.getBuiltinElement(_) and result = theFalseObject() + ) + ) + } + + pragma [noinline] + private predicate const_compare(CompareNode cmp, PointsToContext context, int comp, boolean strict) { + exists(ControlFlowNode left, ControlFlowNode right | + inequality(cmp, left, right, strict) and + ( + exists(NumericObject n1, NumericObject n2 | + points_to(left, context, n1, _, _) and + points_to(right, context, n2, _, _) and + comp = int_compare(n1, n2) + ) + or + exists(StringObject s1, StringObject s2| + points_to(left, context, s1, _, _) and + points_to(right, context, s2, _, _) and + comp = string_compare(s1, s2) + ) + ) + ) + } + + pragma [noinline] + private Object version_tuple_compare(CompareNode cmp, PointsToContext context) { + exists(ControlFlowNode lesser, ControlFlowNode greater, boolean strict | + inequality(cmp, lesser, greater, strict) and + exists(TupleObject tuple, int comp | + points_to(lesser, context, tuple, _, _) and + points_to(greater, context, theSysVersionInfoTuple(), _, _) and + comp = version_tuple_compare(tuple) + or + points_to(lesser, context, theSysVersionInfoTuple(), _, _) and + points_to(greater, context, tuple, _, _) and + comp = version_tuple_compare(tuple)*-1 + | + comp = -1 and result = theTrueObject() + or + comp = 0 and strict = false and result = theTrueObject() + or + comp = 0 and strict = true and result = theFalseObject() + or + comp = 1 and result = theFalseObject() + ) + ) + } +/* + pragma [noinline] + private Object version_hex_compare(CompareNode cmp, PointsToContext context) { + exists(ControlFlowNode lesser, ControlFlowNode greater, boolean strict | + inequality(cmp, lesser, greater, strict) and + exists(TupleObject tuple, int comp | + points_to(lesser, context, tuple, _, _) and + points_to(greater, context, theSysHexVersionNumber(), _, _) and + comp = version_tuple_compare(tuple) + or + points_to(lesser, context, theSysHexVersionNumber(), _, _) and + points_to(greater, context, tuple, _, _) and + comp = version_hex_compare(tuple)*-1 + | + comp = -1 and result = theTrueObject() + or + comp = 0 and strict = false and result = theTrueObject() + or + comp = 0 and strict = true and result = theFalseObject() + or + comp = 1 and result = theFalseObject() + ) + ) + } +*/ + private predicate compare_expr_points_to(CompareNode cmp, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + equality_expr_points_to(cmp, context, value, cls, origin) + or + cls = theBoolType() and origin = cmp and + ( + incomparable_values(cmp, context) and + (value = theFalseObject() or value = theTrueObject()) + or + value = in_tuple(cmp, context) + or + exists(int comp, boolean strict | + const_compare(cmp, context, comp, strict) + | + comp = -1 and value = theTrueObject() + or + comp = 0 and strict = false and value = theTrueObject() + or + comp = 0 and strict = true and value = theFalseObject() + or + comp = 1 and value = theFalseObject() + ) + or + value = version_tuple_compare(cmp, context) + ) + } + + pragma[inline] + private int int_compare(NumericObject n1, NumericObject n2) { + exists(int i1, int i2 | + i1 = n1.intValue() and i2 = n2.intValue() | + i1 = i2 and result = 0 + or + i1 < i2 and result = -1 + or + i1 > i2 and result = 1 + ) + } + + pragma[inline] + private int string_compare(StringObject s1, StringObject s2) { + exists(string a, string b | + a = s1.getText() and b = s2.getText() | + a = b and result = 0 + or + a < b and result = -1 + or + a > b and result = 1 + ) + } + + private predicate not_points_to(UnaryExprNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + f.getNode().getOp() instanceof Not and + cls = theBoolType() and origin = f and + exists(Object operand | + points_to(f.getOperand(), context, operand, _, _) + | + not operand.booleanValue() = true and value = theTrueObject() + or + not operand.booleanValue() = false and value = theFalseObject() + ) + } + + private predicate equality_expr_points_to(CompareNode cmp, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + cls = theBoolType() and origin = cmp and + exists(ControlFlowNode x, ControlFlowNode y, Object xobj, Object yobj, boolean is | + BaseFilters::equality_test(cmp, x, is, y) and + points_to(x, context, xobj, _, _) and + points_to(y, context, yobj, _, _) and + Filters::equatable_value(xobj) and Filters::equatable_value(yobj) + | + xobj = yobj and is = true and value = theTrueObject() + or + xobj != yobj and is = true and value = theFalseObject() + or + xobj = yobj and is = false and value = theFalseObject() + or + xobj != yobj and is = false and value = theTrueObject() + ) + } + + private predicate subscript_points_to(SubscriptNode sub, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(Object unknownCollection | + varargs_points_to(unknownCollection, _) or + kwargs_points_to(unknownCollection, _) + | + sub.isLoad() and + points_to(sub.getValue(), context, unknownCollection, _, _) and + value = unknownValue() and cls = theUnknownType() and origin = sub + ) + or + points_to(sub.getValue(), context, unknownValue(), _, _) and + value = unknownValue() and cls = theUnknownType() and origin = sub + } + + /* ************** + * VERSION INFO * + ****************/ + + /** Holds if `s` points to `sys.version_info[0]`. */ + private predicate sys_version_info_index(SubscriptNode s, PointsToContext context, NumericObject value, ClassObject cls) { + points_to(s.getValue(), context, theSysVersionInfoTuple(), _, _) and + exists(NumericObject zero | + zero.intValue() = 0 | + points_to(s.getIndex(), context, zero, _, _) + ) and + value.intValue() = major_version() and + cls = theIntType() + } + + /** Holds if `s` points to `sys.version_info[:x]` or `sys.version_info[:]`. */ + private predicate sys_version_info_slice(SubscriptNode s, PointsToContext context, ClassObject cls) { + points_to(s.getValue(), context, theSysVersionInfoTuple(), cls, _) and + exists(Slice index | index = s.getIndex().getNode() | + not exists(index.getStart()) + ) + } + + /** Holds if `s` points to `sys.version[0]`. */ + private predicate sys_version_string_char0(SubscriptNode s, PointsToContext context, Object value, ClassObject cls) { + points_to(s.getValue(), context, theSysVersionString(), cls, _) and + exists(NumericObject zero | + zero.intValue() = 0 | + points_to(s.getIndex(), context, zero, _, _) + ) + and + value = object_for_string(major_version().toString()) + } + + /* Version tests. Ignore micro and release parts. Treat major, minor as a single version major*10+minor + * Currently cover versions 0.9 to 4.0 + */ + + /** Helper for `version_const`. */ + private predicate comparison(CompareNode cmp, ControlFlowNode fv, ControlFlowNode fc, string opname) { + exists(Cmpop op | + cmp.operands(fv, op, fc) and opname = op.getSymbol() + or + cmp.operands(fc, op, fv) and opname = reversed(op) + ) + } + + /** Helper for `version_const`. */ + private predicate inequality(CompareNode cmp, ControlFlowNode lesser, ControlFlowNode greater, boolean strict) { + exists(Cmpop op | + cmp.operands(lesser, op, greater) and op.getSymbol() = "<" and strict = true + or + cmp.operands(lesser, op, greater) and op.getSymbol() = "<=" and strict = false + or + cmp.operands(greater, op, lesser) and op.getSymbol() = ">" and strict = true + or + cmp.operands(greater, op, lesser) and op.getSymbol() = ">=" and strict = false + ) + } + + /** Holds if `f` is a test for the O/S. */ + private predicate os_test(ControlFlowNode f, string os, PointsToContext context) { + exists(ControlFlowNode c | + os_compare(c, os) and + points_to(f, context, _, _, c) + ) + } + + predicate named_attribute_points_to(ControlFlowNode f, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + exists(EssaVariable var | + var.getAUse() = f | + SSA::ssa_variable_named_attribute_points_to(var, context, name, value, cls, origin) + ) + or + exists(ClassObject c, EssaVariable self, Function init | + instantiation(f, context, c) and + init = c.getPyClass().getInitMethod() and + self.getAUse() = init.getANormalExit() and + SSA::ssa_variable_named_attribute_points_to(self, context, name, value, cls, origin) + ) + } + + private module Calls { + + /** Holds if `f` is a call to type() with a single argument `arg` */ + private predicate call_to_type(CallNode f, ControlFlowNode arg, PointsToContext context) { + points_to(f.getFunction(), context, theTypeType(), _, _) and not exists(f.getArg(1)) and arg = f.getArg(0) + } + + pragma [noinline] + predicate call_to_type_known_python_class_points_to(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(ControlFlowNode arg | + call_to_type(f, arg, context) and + points_to(arg, context, _, value, _) + ) and + origin.getNode() = value.getOrigin() and + cls = theTypeType() + } + + pragma [noinline] + predicate call_to_type_known_builtin_class_points_to(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(ControlFlowNode arg | + call_to_type(f, arg, context) | + points_to(arg, context, _, value, _) + ) and + not exists(value.getOrigin()) and + origin = f and cls = theTypeType() + } + + pragma [noinline] + predicate call_points_to_builtin_function(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(BuiltinCallable b | + not b = builtin_object("isinstance") and + not b = builtin_object("issubclass") and + not b = builtin_object("callable") and + f = get_a_call(b, context) and + cls = b.getAReturnType() + ) and + f = origin and + if cls = theNoneType() then + value = theNoneObject() + else + value = f + } + + /** Holds if call is to an object that always returns its first argument. + * Typically, this is for known decorators and the like. + * The current implementation only accounts for instances of `zope.interface.declarations.implementer` and + * calls to `functools.wraps(fn)`. + */ + private predicate annotation_call(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + points_to(f.getArg(0), context, value, cls, origin) and + ( + points_to(f.getFunction(), context, _, zopeInterfaceImplementer(), _) + or + points_to(f.getFunction().(CallNode).getFunction(), context, functoolsWraps(), _, _) + ) + } + + private ClassObject zopeInterfaceImplementer() { + result.getName() = "implementer" and + result.getPyClass().getEnclosingModule().getName() = "zope.interface.declarations" + } + + private PyFunctionObject functoolsWraps() { + result.getName() = "wraps" and + result.getFunction().getEnclosingModule().getName() = "functools" + } + + pragma [noinline] + predicate call_to_procedure_points_to(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(PyFunctionObject func | + f = get_a_call(func, context) and + implicitly_returns(func, value, cls) and origin.getNode() = func.getOrigin() + ) + } + + predicate call_to_unknown(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + value = unknownValue() and cls = theUnknownType() and origin = f + and + exists(ControlFlowNode callable | + callable = f.getFunction() or + callable = f.getFunction().(AttrNode).getObject() + | + points_to(callable, context, unknownValue(), _, _) + ) + } + + predicate call_to_type_new(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + points_to(f.getFunction(), context, theTypeNewMethod(), _, _) and + value = theUnknownType() and cls = theUnknownType() and origin = f + } + + predicate call_to_generator_points_to(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(PyFunctionObject func | + f = get_a_call(func, context) | + func.getFunction().isGenerator() and origin = f and value = f and cls = theGeneratorType() + ) + } + + /* Helper for call_points_to_python_function */ + predicate return_val_points_to(PyFunctionObject func, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(ControlFlowNode rval | + rval = func.getAReturnedNode() and + points_to(rval, context, value, cls, origin) + ) + } + + pragma [noinline] + predicate call_points_to_python_function(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(PyFunctionObject func, PointsToContext callee | + return_val_points_to(func, callee, value, cls, origin) and + callee.fromCall(f, func, context) + ) + } + + /** A call, including calls to `type(arg)`, functions and classes. + * + * Call analysis logic + * =================== + * There are five possibilities (that we currently account for) here. + * 1. `type(known_type)` where we know the class of `known_type` and we know its origin + * 2. `type(known_type)` where we know the class of `known_type`, + * but we don't know its origin (because it is a builtin type) + * 3. `Class(...)` where Class is any class except type (with one argument) and calls to that class return instances of that class + * 4. `func(...)` where we know the return type of func (because it is a builtin function) + * 5. `func(...)` where we know the returned object and origin of func (because it is a Python function) + */ + predicate call_points_to(CallNode f, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + /* Case 1 */ + call_to_type_known_python_class_points_to(f, context, value, cls, origin) + or + /* Case 2 */ + call_to_type_known_builtin_class_points_to(f, context, value, cls, origin) + or + /* Case 3 */ + instantiation(f, context, cls) and value = f and f = origin + or + /* Case 4 */ + call_points_to_builtin_function(f, context, value, cls, origin) + or + /* Case 5a */ + call_points_to_python_function(f, context, value, cls, origin) + or + /* Case 5b */ + call_to_generator_points_to(f, context, value, cls, origin) + or + /* Case 5c */ + call_to_procedure_points_to(f, context, value, cls, origin) + or + call_to_unknown(f, context, value, cls, origin) + or + call_to_type_new(f, context, value, cls, origin) + or + annotation_call(f, context, value, cls, origin) + } + + /** INTERNAL -- Public for testing only. + * Whether `call` is a call to `method` of the form `super(...).method(...)` + */ + predicate super_method_call(PointsToContext context, CallNode call, EssaVariable self, FunctionObject method) { + exists(ControlFlowNode func, SuperBoundMethod bound_method | + call.getFunction() = func and + points_to(func, context, bound_method, _, _) and + method = bound_method.getFunction(context) and + self = bound_method.getSelf() + ) + } + + /** INTERNAL -- Use `FunctionObject.getAMethodCall()`. */ + pragma [nomagic] + predicate plain_method_call(FunctionObject func, PointsToContext context, CallNode call) { + exists(ControlFlowNode attr, ClassObject cls, string name | + attr = call.getFunction() and + receiver_type_for(cls, name, attr, context) and + Types::class_attribute_lookup(cls, name, func, _, _) + ) + } + + /** INTERNAL -- Do not use; part of the internal API. + * + * Whether cls `cls` is the receiver type of an attribute access `n`. + * Also bind the name of the attribute. + */ + predicate receiver_type_for(ClassObject cls, string name, ControlFlowNode n, PointsToContext context) { + /* `super().meth()` is not a method on `super` */ + cls != theSuperType() and + exists(Object o | + /* list.__init__() is not a call to type.__init__() */ + not o instanceof ClassObject | + points_to(n.(AttrNode).getObject(name), context, o, cls, _) + ) + or + exists(PlaceHolder p, Variable v | + n.getNode() = p and n.(NameNode).uses(v) and name = v.getId() and + p.getScope().getScope() = cls.getPyClass() and context.appliesTo(n) + ) + } + + /** Gets the argument for the parameter at `position` where `call` is a call to `func`. + * Handles method calls, such that for a call `x.foo()` with `position equal to 0, the result is `x`. + */ + pragma [nomagic] + ControlFlowNode get_argument_for_call_by_position(FunctionObject func, PointsToContext context, CallNode call, int position) { + method_call(func, context, call) and + ( + result = call.getArg(position-1) + or + position = 0 and result = call.getFunction().(AttrNode).getObject() + ) + or + function_call(func, context, call) and + result = call.getArg(position) + } + + /** Holds if `value` is the value attached to the keyword argument `name` in `call`. */ + predicate keyword_value_for_call(CallNode call, string name, ControlFlowNode value) { + exists(Keyword kw | + call.getNode().getAKeyword() = kw | + kw.getArg() = name and kw.getValue() = value.getNode() and + value.getBasicBlock().dominates(call.getBasicBlock()) + ) + } + + /** Gets the value for the keyword argument `name` in `call`, where `call` calls `func` in context. */ + ControlFlowNode get_argument_for_call_by_name(FunctionObject func, PointsToContext context, CallNode call, string name) { + call = get_a_call(func, context) and + keyword_value_for_call(call, name, result) + } + + /** Holds if `func` implicitly returns the `None` object */ + predicate implicitly_returns(PyFunctionObject func, Object none_, ClassObject noneType) { + noneType = theNoneType() and not func.getFunction().isGenerator() and none_ = theNoneObject() and + ( + not exists(func.getAReturnedNode()) and exists(func.getFunction().getANormalExit()) + or + exists(Return ret | ret.getScope() = func.getFunction() and not exists(ret.getValue())) + ) + } + + } + + cached module Flow { + + /** Model the transfer of values at scope-entry points. Transfer from `(pred_var, pred_context)` to `(succ_def, succ_context)`. */ + cached predicate scope_entry_value_transfer(EssaVariable pred_var, PointsToContext pred_context, ScopeEntryDefinition succ_def, PointsToContext succ_context) { + scope_entry_value_transfer_from_earlier(pred_var, pred_context, succ_def, succ_context) + or + callsite_entry_value_transfer(pred_var, pred_context, succ_def, succ_context) + or + pred_context.isImport() and pred_context = succ_context and + class_entry_value_transfer(pred_var, succ_def) + } + + /** Helper for `scope_entry_value_transfer`. Transfer of values from a temporally earlier scope to later scope. + * Earlier and later scopes are, for example, a module and functions in that module, or an __init__ method and another method. */ + pragma [noinline] + private predicate scope_entry_value_transfer_from_earlier(EssaVariable pred_var, PointsToContext pred_context, ScopeEntryDefinition succ_def, PointsToContext succ_context) { + exists(Scope pred_scope, Scope succ_scope | + BaseFlow::scope_entry_value_transfer_from_earlier(pred_var, pred_scope, succ_def, succ_scope) and + succ_context.appliesToScope(succ_scope) + | + succ_context.isRuntime() and succ_context = pred_context + or + pred_context.isImport() and pred_scope instanceof ImportTimeScope and + (succ_context.fromRuntime() or + /* A call made at import time, but from another module. Assume this module has been fully imported. */ + succ_context.isCall() and exists(CallNode call | succ_context.fromCall(call, _) and call.getEnclosingModule() != pred_scope)) + or + /* If predecessor scope is main, then we assume that any global defined exactly once + * is available to all functions. Although not strictly true, this gives less surprising + * results in practice. */ + pred_context.isMain() and pred_scope instanceof Module and succ_context.fromRuntime() and + not strictcount(pred_var.getSourceVariable().(Variable).getAStore()) > 1 + ) + or + exists(NonEscapingGlobalVariable var | + var = pred_var.getSourceVariable() and var = succ_def.getSourceVariable() and + pred_var.getAUse() = succ_context.getRootCall() and pred_context.isImport() and + succ_context.appliesToScope(succ_def.getScope()) + ) + } + + /** Helper for `scope_entry_value_transfer`. + * Transfer of values from the callsite to the callee, for enclosing variables, but not arguments/parameters. */ + pragma [noinline] + private predicate callsite_entry_value_transfer(EssaVariable caller_var, PointsToContext caller_context, ScopeEntryDefinition entry_def, PointsToContext callee_context) { + exists(CallNode callsite, FunctionObject f, Variable var | + scope_entry_function_and_variable(entry_def, f, var) and + callee_context.fromCall(callsite, f, caller_context) and + caller_var.getSourceVariable() = var and + caller_var.getAUse() = callsite + ) + } + + /** Helper for callsite_entry_value_transfer to improve join-order */ + private predicate scope_entry_function_and_variable(ScopeEntryDefinition entry_def, FunctionObject f, Variable var) { + exists(Function func | + func = f.getFunction() | + entry_def.getDefiningNode() = func.getEntryNode() and + not var.getScope() = func and + entry_def.getSourceVariable() = var + ) + } + + /** Helper for `scope_entry_value_transfer`. */ + private predicate class_entry_value_transfer(EssaVariable pred_var, ScopeEntryDefinition succ_def) { + exists(ImportTimeScope scope, ControlFlowNode class_def | + class_def = pred_var.getAUse() and + scope.entryEdge(class_def, succ_def.getDefiningNode()) and + pred_var.getSourceVariable() = succ_def.getSourceVariable() + ) + } + + /** Gets the ESSA variable from which `def` acquires its value, when a call occurs. + * Helper for `callsite_points_to`. */ + cached predicate callsite_exit_value_transfer(EssaVariable callee_var, PointsToContext callee_context, CallsiteRefinement def, PointsToContext callsite_context) { + exists(FunctionObject func, Variable var | + callee_context.fromCall(def.getCall(), func, callsite_context) and + def.getSourceVariable() = var and + var_at_exit(var, func, callee_var) + ) + } + + /* Helper for callsite_exit_value_transfer */ + private predicate var_at_exit(Variable var, FunctionObject func, EssaVariable evar) { + not var instanceof LocalVariable and + evar.getSourceVariable() = var and + evar.getScope() = func.getFunction() and + BaseFlow::reaches_exit(evar) + } + + /** Holds if the `(argument, caller)` pair matches up with `(param, callee)` pair across call. */ + cached predicate callsite_argument_transfer(ControlFlowNode argument, PointsToContext caller, ParameterDefinition param, PointsToContext callee) { + exists(CallNode call, PyFunctionObject func, int n, int offset | + callsite_calls_function(call, caller, func, callee, offset) and + argument = call.getArg(n) and + param = func.getParameter(n+offset) + ) + } + + cached predicate callsite_calls_function(CallNode call, PointsToContext caller, PyFunctionObject func, PointsToContext callee, int parameter_offset) { + /* Functions */ + callee.fromCall(call, func, caller) and + function_call(func, caller, call) and + parameter_offset = 0 + or + /* Methods */ + callee.fromCall(call, func, caller) and + method_call(func, caller, call) and + parameter_offset = 1 + or + /* Classes */ + exists(ClassObject cls | + instantiation(call, caller, cls) and + Types::class_attribute_lookup(cls, "__init__", func, _, _) and + parameter_offset = 1 and + callee.fromCall(call, caller) + ) + } + + /** Helper for `import_star_points_to`. */ + cached predicate module_and_name_for_import_star(ModuleObject mod, string name, ImportStarRefinement def, PointsToContext context) { + points_to(def.getDefiningNode().(ImportStarNode).getModule(), context, mod, _, _) and + name = def.getSourceVariable().getName() + } + + /** Holds if `def` is technically a definition of `var`, but the `from ... import *` does not in fact define `var`. */ + cached predicate variable_not_redefined_by_import_star(EssaVariable var, PointsToContext context, ImportStarRefinement def) { + var = def.getInput() and + exists(ModuleObject mod | + points_to(def.getDefiningNode().(ImportStarNode).getModule(), context, mod, _, _) | + module_exports_boolean(mod, var.getSourceVariable().getName()) = false + or + exists(Module m, string name | + m = mod.getModule() and name = var.getSourceVariable().getName() | + not m.declaredInAll(_) and name.charAt(0) = "_" + ) + ) + } + + } + + private module SSA { + + + /** Holds if the phi-function `phi` refers to `(value, cls, origin)` given the context `context`. */ + pragma [noinline] + private predicate ssa_phi_points_to(PhiFunction phi, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + exists(EssaVariable input, BasicBlock pred | + input = phi.getInput(pred) and + ssa_variable_points_to(input, context, value, cls, origin) + | + Layer::controlledReachableEdge(pred, phi.getBasicBlock(), context) + or + not exists(ConditionBlock guard | guard.controlsEdge(pred, phi.getBasicBlock(), _)) + ) + or + ssa_variable_points_to(phi.getShortCircuitInput(), context, value, cls, origin) + } + + /** Holds if the ESSA definition `def` refers to `(value, cls, origin)` given the context `context`. */ + predicate ssa_definition_points_to(EssaDefinition def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + ssa_phi_points_to(def, context, value, cls, origin) + or + ssa_node_definition_points_to(def, context, value, cls, origin) + or + Filters::ssa_filter_definition_points_to(def, context, value, cls, origin) + or + ssa_node_refinement_points_to(def, context, value, cls, origin) + } + + pragma [nomagic] + private predicate ssa_node_definition_points_to_unpruned(EssaNodeDefinition def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + assignment_points_to(def, context, value, cls, origin) + or + parameter_points_to(def, context, value, cls, origin) + or + self_parameter_points_to(def, context, value, cls, origin) + or + delete_points_to(def, context, value, cls, origin) + or + scope_entry_points_to(def, context, value, cls, origin) + or + implicit_submodule_points_to(def, context, value, cls, origin) + or + module_name_points_to(def, context, value, cls, origin) + or + iteration_definition_points_to(def, context, value, cls, origin) + /* + * No points-to for non-local function entry definitions yet. + */ + } + + pragma [noinline] + private predicate reachable_definitions(EssaNodeDefinition def) { + Layer::reachableBlock(def.getDefiningNode().getBasicBlock(), _) + } + + pragma [noinline] + private predicate ssa_node_definition_points_to(EssaNodeDefinition def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + reachable_definitions(def) and + ssa_node_definition_points_to_unpruned(def, context, value, cls, origin) + } + + pragma [noinline] + private predicate ssa_node_refinement_points_to(EssaNodeRefinement def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + method_callsite_points_to(def, context, value, cls, origin) + or + import_star_points_to(def, context, value, cls, origin) + or + attribute_assignment_points_to(def, context, value, cls, origin) + or + callsite_points_to(def, context, value, cls, origin) + or + argument_points_to(def, context, value, cls, origin) + or + attribute_delete_points_to(def, context, value, cls, origin) + or + Filters::uni_edged_phi_points_to(def, context, value, cls, origin) + } + + /** Points-to for normal assignments `def = ...`. */ + pragma [noinline] + private predicate assignment_points_to(AssignmentDefinition def, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + points_to(def.getValue(), context, value, cls, origin) + } + + /** Helper for `parameter_points_to` */ + pragma [noinline] + private predicate positional_parameter_points_to(ParameterDefinition def, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(PointsToContext caller, ControlFlowNode arg | + points_to(arg, caller, value, cls, origin) and + Flow::callsite_argument_transfer(arg, caller, def, context) + ) + or + not def.isSelf() and not def.getParameter().isVarargs() and not def.getParameter().isKwargs() and + context.isRuntime() and value = unknownValue() and cls = theUnknownType() and origin = def.getDefiningNode() + } + + /** Helper for `parameter_points_to` */ + pragma [noinline] + private predicate named_parameter_points_to(ParameterDefinition def, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + exists(CallNode call, PointsToContext caller, FunctionObject func, string name | + context.fromCall(call, func, caller) and + def.getParameter() = func.getFunction().getArgByName(name) and + points_to(call.getArgByName(name), caller, value, cls, origin) + ) + } + + /** Points-to for parameter. `def foo(param): ...`. */ + pragma [noinline] + private predicate parameter_points_to(ParameterDefinition def, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + positional_parameter_points_to(def, context, value, cls, origin) + or + named_parameter_points_to(def, context, value, cls, origin) + or + default_parameter_points_to(def, context, value, cls, origin) + or + special_parameter_points_to(def, context, value, cls, origin) + } + + /** Helper for parameter_points_to */ + private predicate default_parameter_points_to(ParameterDefinition def, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + default_value_points_to(def, value, cls, origin) and + context_for_default_value(def, context) + } + + /** Helper for default_parameter_points_to */ + pragma [noinline] + private predicate default_value_points_to(ParameterDefinition def, Object value, ClassObject cls, ControlFlowNode origin) { + exists(PointsToContext imp | imp.isImport() | points_to(def.getDefault(), imp, value, cls, origin)) + } + + /** Helper for default_parameter_points_to */ + pragma [noinline] + private predicate context_for_default_value(ParameterDefinition def, PointsToContext context) { + context.isRuntime() + or + exists(PointsToContext caller, CallNode call, FunctionObject func, int n | + context.fromCall(call, func, caller) and + func.getFunction().getArg(n) = def.getParameter() and + not exists(call.getArg(n)) and + not exists(call.getArgByName(def.getParameter().asName().getId())) and + not exists(call.getNode().getKwargs()) and + not exists(call.getNode().getStarargs()) + ) + } + + /** Helper for parameter_points_to */ + pragma [noinline] + private predicate special_parameter_points_to(ParameterDefinition def, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + context.isRuntime() and + exists(ControlFlowNode param | + param = def.getDefiningNode() | + varargs_points_to(param, cls) and value = theEmptyTupleObject() and origin = param + or + varargs_points_to(param, cls) and value = param and origin = param + or + kwargs_points_to(param, cls) and value = param and origin = param + ) + or + exists(PointsToContext caller, CallNode call, FunctionObject func, Parameter p | + context.fromCall(call, func, caller) and + func.getFunction().getAnArg() = p and p = def.getParameter() and + not p.isSelf() and + not exists(call.getArg(p.getPosition())) and + not exists(call.getArgByName(p.getName())) and + (exists(call.getNode().getKwargs()) or exists(call.getNode().getStarargs())) and + value = unknownValue() and cls = theUnknownType() and origin = def.getDefiningNode() + ) + } + + /** Holds if the `(obj, caller)` pair matches up with `(self, callee)` pair across call. */ + pragma [noinline] + private predicate callsite_self_argument_transfer(EssaVariable obj, PointsToContext caller, ParameterDefinition self, PointsToContext callee) { + self.isSelf() and + exists(CallNode call, PyFunctionObject meth | + meth.getParameter(0) = self and + callee.fromCall(call, caller) | + Calls::plain_method_call(meth, caller, call) and + obj.getASourceUse() = call.getFunction().(AttrNode).getObject() + or + Calls::super_method_call(caller, call, obj, meth) + ) + } + + /** Points-to for self parameter: `def meth(self, ...): ...`. */ + pragma [noinline] + private predicate self_parameter_points_to(ParameterDefinition def, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + def.isSelf() and + exists(FunctionObject meth, Function scope | + meth.getFunction() = scope | + def.getDefiningNode().getScope() = scope and + context.isRuntime() and context.appliesToScope(scope) and + scope.getScope() = cls.getPyClass() and + Types::concrete_class(cls) and + value = def.getDefiningNode() and origin = value and + /* We want to allow decorated functions, otherwise we lose a lot of useful information. + * However, we want to exclude any function whose arguments are permuted by the decorator. + * In general we can't do that, but we can special case the most common ones. + */ + neither_class_nor_static_method(scope) + ) + or + exists(EssaVariable obj, PointsToContext caller | + ssa_variable_points_to(obj, caller, value, cls, origin) and + callsite_self_argument_transfer(obj, caller, def, context) + ) + or + cls_parameter_points_to(def, context, value, cls, origin) + } + + private predicate neither_class_nor_static_method(Function f) { + not exists(f.getADecorator()) + or + exists(ControlFlowNode deco | + deco = f.getADecorator().getAFlowNode() | + exists(Object o | + points_to(deco, _, o, _, _) | + not o = theStaticMethodType() and + not o = theClassMethodType() + ) + or not deco instanceof NameNode + ) + } + + + pragma [noinline] + private predicate cls_parameter_points_to(ParameterDefinition def, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + def.isSelf() and + exists(CallNode call, PyFunctionObject meth, Object obj, ClassObject objcls, PointsToContext caller | + context.fromCall(call, caller) and + cls_method_object_points_to(call, caller, meth, obj, objcls, origin) and + def.getScope() = meth.getFunction() + | + obj instanceof ClassObject and value = obj and cls = objcls + or + not obj instanceof ClassObject and value = objcls and cls = Types::class_get_meta_class(objcls) + ) + } + + /* Factor out part of `cls_parameter_points_to` to prevent bad join-order */ + pragma [noinline] + private predicate cls_method_object_points_to(CallNode call, PointsToContext context, PyFunctionObject meth, Object value, ClassObject cls, ControlFlowNode origin) { + exists(AttrNode attr | + class_method_call(_, attr, meth, context, call) and + points_to(attr.getObject(), context, value, cls, origin) + ) + } + + /** Points-to for deletion: `del name`. */ + pragma [noinline] + private predicate delete_points_to(DeletionDefinition def, PointsToContext context, Object value, ClassObject cls, ControlFlowNode origin) { + value = undefinedVariable() and cls = theUnknownType() and origin = def.getDefiningNode() and context.appliesToScope(def.getScope()) + } + + /** Implicit "definition" of the names of submodules at the start of an `__init__.py` file. + * + * PointsTo isn't exactly how the interpreter works, but is the best approximation we can manage statically. + */ + pragma [noinline] + private predicate implicit_submodule_points_to(ImplicitSubModuleDefinition def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + exists(PackageObject package | + package.getInitModule().getModule() = def.getDefiningNode().getScope() | + value = package.submodule(def.getSourceVariable().getName()) and + cls = theModuleType() and + origin = value and + context.isImport() + ) + } + + /** Implicit "definition" of `__name__` at the start of a module. */ + pragma [noinline] + private predicate module_name_points_to(ScopeEntryDefinition def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + def.getVariable().getName() = "__name__" and + exists(Module m | + m = def.getScope() + | + value = module_dunder_name(m) and context.isImport() + or + value = object_for_string("__main__") and context.isMain() and context.appliesToScope(m) + ) and + cls = theStrType() and origin = def.getDefiningNode() + } + + private Object module_dunder_name(Module m) { + exists(string name | + result = object_for_string(name) | + if m.isPackageInit() then + name = m.getPackage().getName() + else + name = m.getName() + ) + } + + /** Definition of iteration variable in loop */ + pragma [noinline] + private predicate iteration_definition_points_to(IterationDefinition def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + points_to(def.getSequence(), context, unknownValue(), _, _) and + value = unknownValue() and cls = theUnknownType() and origin = def.getDefiningNode() + } + + /** Points-to for implicit variable declarations at scope-entry. */ + pragma [noinline] + private predicate scope_entry_points_to(ScopeEntryDefinition def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + /* Transfer from another scope */ + exists(EssaVariable var, PointsToContext outer | + Flow::scope_entry_value_transfer(var, outer, def, context) and + ssa_variable_points_to(var, outer, value, cls, origin) + ) + or + /* Undefined variable */ + exists(Scope scope | + not def.getVariable().getName() = "__name__" and + not def.getVariable().getName() = "*" and + def.getScope() = scope and context.appliesToScope(scope) | + def.getSourceVariable() instanceof GlobalVariable and scope instanceof Module + or + def.getSourceVariable() instanceof LocalVariable and (context.isImport() or context.isRuntime() or context.isMain()) + ) and + value = undefinedVariable() and cls = theUnknownType() and origin = def.getDefiningNode() + or + /* Builtin not defined in outer scope */ + exists(Module mod, GlobalVariable var | + var = def.getSourceVariable() and + mod = def.getScope().getEnclosingModule() and + context.appliesToScope(def.getScope()) and + not exists(EssaVariable v | v.getSourceVariable() = var and v.getScope() = mod) and + builtin_name_points_to(var.getId(), value, cls) and origin = value + ) + } + + /** Points-to for a variable (possibly) redefined by a call: + * `var = ...; foo(); use(var)` + * Where var may be redefined in call to `foo` if `var` escapes (is global or non-local). + */ + pragma [noinline] + private predicate callsite_points_to(CallsiteRefinement def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + exists(EssaVariable var, PointsToContext callee | + Flow::callsite_exit_value_transfer(var, callee, def, context) and + ssa_variable_points_to(var, callee, value, cls, origin) + ) + or + callsite_points_to_python(def, context, value, cls, origin) + or + callsite_points_to_builtin(def, context, value, cls, origin) + } + + pragma [noinline] + private predicate callsite_points_to_python(CallsiteRefinement def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + ssa_variable_points_to(def.getInput(), context, value, cls, origin) and + exists(CallNode call, PythonSsaSourceVariable var | + call = def.getCall() and + var = def.getSourceVariable() and + context.untrackableCall(call) and + exists(PyFunctionObject modifier | + call = get_a_call(modifier, context) and + not modifies_escaping_variable(modifier, var) + ) + ) + } + + private predicate modifies_escaping_variable(FunctionObject modifier, PythonSsaSourceVariable var) { + exists(var.redefinedAtCallSite()) and + modifier.getFunction().getBody().contains(var.(Variable).getAStore()) + } + + pragma [noinline] + private predicate callsite_points_to_builtin(CallsiteRefinement def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + ssa_variable_points_to(def.getInput(), context, value, cls, origin) and + exists(CallNode call | + call = def.getCall() | + // An identifiable callee is a builtin + exists(BuiltinCallable opaque | get_a_call(opaque, _) = call) + ) + } + + /** Pass through for `self` for the implicit re-definition of `self` in `self.foo()`. */ + private predicate method_callsite_points_to(MethodCallsiteRefinement def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + /* The value of self remains the same, only the attributes may change */ + ssa_variable_points_to(def.getInput(), context, value, cls, origin) + } + + /** Points-to for `from ... import *`. */ + private predicate import_star_points_to(ImportStarRefinement def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + exists(ModuleObject mod, string name | + Flow::module_and_name_for_import_star(mod, name, def, context) | + /* Attribute from imported module */ + module_exports(mod, name) and + Layer::module_attribute_points_to(mod, name, value, cls, origin) + ) + or + exists(EssaVariable var | + /* Retain value held before import */ + Flow::variable_not_redefined_by_import_star(var, context, def) and + ssa_variable_points_to(var, context, value, cls, origin) + ) + } + + /** Attribute assignments have no effect as far as value tracking is concerned, except for `__class__`. */ + pragma [noinline] + private predicate attribute_assignment_points_to(AttributeAssignment def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + if def.getName() = "__class__" then + ssa_variable_points_to(def.getInput(), context, value, _, _) and points_to(def.getValue(), _, cls, _,_) and + origin = def.getDefiningNode() + else + ssa_variable_points_to(def.getInput(), context, value, cls, origin) + } + + /** Ignore the effects of calls on their arguments. PointsTo is an approximation, but attempting to improve accuracy would be very expensive for very little gain. */ + pragma [noinline] + private predicate argument_points_to(ArgumentRefinement def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + ssa_variable_points_to(def.getInput(), context, value, cls, origin) + } + + /** Attribute deletions have no effect as far as value tracking is concerned. */ + pragma [noinline] + private predicate attribute_delete_points_to(EssaAttributeDeletion def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + ssa_variable_points_to(def.getInput(), context, value, cls, origin) + } + + /* Data flow for attributes. These mirror the "normal" points-to predicates. + * For each points-to predicate `xxx_points_to(XXX def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin)` + * There is an equivalent predicate that tracks the values in attributes: + * `xxx_named_attribute_points_to(XXX def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin)` + * */ + + /** INTERNAL -- Public for testing only. + * + * Hold if the attribute `name` of the ssa variable `var` refers to `(value, cls, origin)`. + */ + predicate ssa_variable_named_attribute_points_to(EssaVariable var, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + ssa_definition_named_attribute_points_to(var.getDefinition(), context, name, value, cls, origin) + } + + /** Helper for `ssa_variable_named_attribute_points_to`. */ + private predicate ssa_definition_named_attribute_points_to(EssaDefinition def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + ssa_phi_named_attribute_points_to(def, context, name, value, cls, origin) + or + ssa_node_definition_named_attribute_points_to(def, context, name, value, cls, origin) + or + ssa_node_refinement_named_attribute_points_to(def, context, name, value, cls, origin) + or + Filters::ssa_filter_definition_named_attribute_points_to(def, context, name, value, cls, origin) + } + + /** Holds if the attribute `name` of the ssa phi-function definition `phi` refers to `(value, cls, origin)`. */ + pragma[noinline] + private predicate ssa_phi_named_attribute_points_to(PhiFunction phi, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + ssa_variable_named_attribute_points_to(phi.getAnInput(), context, name, value, cls, origin) + } + + /** Helper for `ssa_definition_named_attribute_points_to`. */ + pragma[noinline] + private predicate ssa_node_definition_named_attribute_points_to(EssaNodeDefinition def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + assignment_named_attribute_points_to(def, context, name, value, cls, origin) + or + delete_named_attribute_points_to(def, context, name, value, cls, origin) + or + self_parameter_named_attribute_points_to(def, context, name, value, cls, origin) + or + scope_entry_named_attribute_points_to(def, context, name, value, cls, origin) + } + + /** Helper for `ssa_definition_named_attribute_points_to`. */ + pragma[noinline] + private predicate ssa_node_refinement_named_attribute_points_to(EssaNodeRefinement def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + attribute_assignment_named_attribute_points_to(def, context, name, value, cls, origin) + or + attribute_delete_named_attribute_points_to(def, context, name, value, cls, origin) + or + import_star_named_attribute_points_to(def, context, name, value, cls, origin) + or + self_callsite_named_attribute_points_to(def, context, name, value, cls, origin) + or + argument_named_attribute_points_to(def, context, name, value, cls, origin) + } + + pragma[noinline] + private predicate scope_entry_named_attribute_points_to(ScopeEntryDefinition def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + exists(EssaVariable var, PointsToContext outer | + Flow::scope_entry_value_transfer(var, outer, def, context) and + ssa_variable_named_attribute_points_to(var, outer, name, value, cls, origin) + ) + or + origin = def.getDefiningNode() and + def.getSourceVariable().getName() = "*" and + context.isImport() and + exists(PackageObject package | + package.getInitModule().getModule() = def.getScope() | + explicitly_imported(package.submodule(name)) and + value = undefinedVariable() and + cls = theUnknownType() + ) + } + + pragma[noinline] + private predicate assignment_named_attribute_points_to(AssignmentDefinition def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + named_attribute_points_to(def.getValue(), context, name, value, cls, origin) + } + + pragma[noinline] + private predicate attribute_assignment_named_attribute_points_to(AttributeAssignment def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + points_to(def.getValue(), context, value, cls, origin) and name = def.getName() + or + ssa_variable_named_attribute_points_to(def.getInput(), context, name, value, cls, origin) and not name = def.getName() + } + + /** Holds if `def` defines the attribute `name`. + * + * `def` takes the form `setattr(use, "name")` where `use` is the input to the definition. + */ + private boolean sets_attribute(ArgumentRefinement def, string name) { + exists(ControlFlowNode func, Object obj | + two_args_first_arg_string(def, func, name) and + points_to(func, _, obj, _, _) | + obj = builtin_object("setattr") and result = true + or + obj != builtin_object("setattr") and result = false + ) + } + + private predicate two_args_first_arg_string(ArgumentRefinement def, ControlFlowNode func, string name) { + exists(CallNode call | + call = def.getDefiningNode() and + call.getFunction() = func and + def.getInput().getAUse() = call.getArg(0) and + call.getArg(1).getNode().(StrConst).getText() = name + ) + } + + pragma[noinline] + private predicate argument_named_attribute_points_to(ArgumentRefinement def, PointsToContext context, string name, Object value, ClassObject cls, ObjectOrCfg origin) { + not two_args_first_arg_string(def, _, name) and ssa_variable_named_attribute_points_to(def.getInput(), context, name, value, cls, origin) + or + sets_attribute(def, name) = true and points_to(def.getDefiningNode().(CallNode).getArg(2), context, value, cls, origin) + or + sets_attribute(def, name) = false and ssa_variable_named_attribute_points_to(def.getInput(), context, name, value, cls, origin) + } + + /** Holds if the self variable in the callee (`(var, callee)`) refers to the same object as `def` immediately after the call, (`(def, caller)`). */ + pragma[noinline] + private predicate callee_self_variable(EssaVariable var, PointsToContext callee, SelfCallsiteRefinement def, PointsToContext caller) { + exists(FunctionObject func, LocalVariable self | + callee.fromCall(def.getCall(), func, caller) and + BaseFlow::reaches_exit(var) and + var.getSourceVariable() = self and + self.isSelf() and + self.getScope() = func.getFunction() + ) + } + + pragma[noinline] + private predicate self_callsite_named_attribute_points_to(SelfCallsiteRefinement def, PointsToContext context, string name, Object value, ClassObject cls, ObjectOrCfg origin) { + exists(EssaVariable var, PointsToContext callee | + ssa_variable_named_attribute_points_to(var, callee, name, value, cls, origin) and + callee_self_variable(var, callee, def, context) + ) + } + + /** Gets the (temporally) preceding variable for `self`, e.g. `def` is in method `foo()` and `result` is in `__init__()`. */ + private EssaVariable preceding_self_variable(ParameterDefinition def) { + def.isSelf() and + exists(Function preceding, Function method | + method = def.getScope() and + // Only methods + preceding.isMethod() and preceding.precedes(method) and + BaseFlow::reaches_exit(result) and result.getSourceVariable().(Variable).isSelf() and + result.getScope() = preceding + ) + } + + pragma [noinline] + private predicate self_parameter_named_attribute_points_to(ParameterDefinition def, PointsToContext context, string name, Object value, ClassObject vcls, ControlFlowNode origin) { + context.isRuntime() and executes_in_runtime_context(def.getScope()) and + ssa_variable_named_attribute_points_to(preceding_self_variable(def), context, name, value, vcls, origin) + or + exists(FunctionObject meth, CallNode call, PointsToContext caller_context, ControlFlowNode obj | + meth.getFunction() = def.getScope() and + method_call(meth, caller_context, call) and + call.getFunction().(AttrNode).getObject() = obj and + context.fromCall(call, meth, caller_context) and + named_attribute_points_to(obj, caller_context, name, value, vcls, origin) + ) + } + + private predicate delete_named_attribute_points_to(DeletionDefinition def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + none() + } + + private predicate attribute_delete_named_attribute_points_to(EssaAttributeDeletion def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + none() + } + + /* Helper for import_star_named_attribute_points_to */ + pragma [noinline] + private predicate star_variable_import_star_module(ImportStarRefinement def, ImportStarNode imp, PointsToContext context, ModuleObject mod) { + def.getSourceVariable().getName() = "*" and + exists(ControlFlowNode fmod | + fmod = imp.getModule() and + imp = def.getDefiningNode() and + points_to(fmod, context, mod, _, _) + ) + } + + /* Helper for import_star_named_attribute_points_to */ + pragma [noinline, nomagic] + private predicate ssa_star_variable_input_points_to(ImportStarRefinement def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + exists(EssaVariable var | + ssa_star_import_star_input(def, var) and + ssa_variable_named_attribute_points_to(var, context, name, value, cls, origin) + ) + } + + /* Helper for ssa_star_variable_input_points_to */ + pragma [noinline] + private predicate ssa_star_import_star_input(ImportStarRefinement def, EssaVariable var) { + def.getSourceVariable().getName() = "*" and var = def.getInput() + } + + pragma [noinline] + private predicate import_star_named_attribute_points_to(ImportStarRefinement def, PointsToContext context, string name, Object value, ClassObject cls, ControlFlowNode origin) { + exists(ImportStarNode imp, ModuleObject mod | + star_variable_import_star_module(def, imp, context, mod) | + /* Attribute from imported module */ + module_exports_boolean(mod, name) = true and + exists(ObjectOrCfg obj | + Layer::module_attribute_points_to(mod, name, value, cls, obj) and + not exists(Variable v | v.getId() = name and v.getScope() = imp.getScope()) and + origin = origin_from_object_or_here(obj, imp) + ) + or + /* Retain value held before import */ + module_exports_boolean(mod, name) = false and + ssa_star_variable_input_points_to(def, context, name, value, cls, origin) + ) + } + + } + + private module Filters { + + /** Holds if `expr` is the operand of a unary `not` expression. */ + private ControlFlowNode not_operand(ControlFlowNode expr) { + expr.(UnaryExprNode).getNode().getOp() instanceof Not and + result = expr.(UnaryExprNode).getOperand() + } + + /** Gets the value that `expr` evaluates to (when converted to a boolean) when `use` refers to `(val, cls, origin)` + * and `expr` contains `use` and both are contained within a test. */ + pragma [nomagic] + boolean evaluates_boolean(ControlFlowNode expr, ControlFlowNode use, PointsToContext context, Object val, ClassObject cls, ControlFlowNode origin) { + result = isinstance_test_evaluates_boolean(expr, use, context, val, cls, origin) + or + result = issubclass_test_evaluates_boolean(expr, use, context, val, cls, origin) + or + result = equality_test_evaluates_boolean(expr, use, context, val, cls, origin) + or + result = callable_test_evaluates_boolean(expr, use, context, val, cls, origin) + or + result = hasattr_test_evaluates_boolean(expr, use, context, val, cls, origin) + or + result = evaluates(not_operand(expr), use, context, val, cls, origin).booleanNot() + } + + /** Gets the value that `expr` evaluates to (when converted to a boolean) when `use` refers to `(val, cls, origin)` + * and `expr` contains `use` and both are contained within a test. */ + pragma [nomagic] + boolean evaluates(ControlFlowNode expr, ControlFlowNode use, PointsToContext context, Object val, ClassObject cls, ControlFlowNode origin) { + result = evaluates_boolean(expr, use, context, val, cls, origin) + or + result = true and evaluates_int(expr, use, context, val, cls, origin) != 0 + or + result = false and evaluates_int(expr, use, context, val, cls, origin) = 0 + or + result = truth_test_evaluates_boolean(expr, use, context, val, cls, origin) + } + + private boolean maybe() { + result = true or result = false + } + + pragma [nomagic] + private boolean issubclass_test_evaluates_boolean(ControlFlowNode expr, ControlFlowNode use, PointsToContext context, Object val, ClassObject cls, ControlFlowNode origin) { + points_to(use, context, val, cls, origin) and + exists(ControlFlowNode clsNode | + BaseFilters::issubclass(expr, clsNode, use) | + exists(ClassObject scls | + points_to(clsNode, context, scls, _, _) | + result = Types::is_improper_subclass_bool(val, scls) + ) + or exists(TupleObject t, ClassObject scls | + points_to(clsNode, context, t, _, _) and + result = Types::is_improper_subclass_bool(val, scls) and result = true + | + scls = t.getBuiltinElement(_) + or + points_to(t.getSourceElement(_), _, scls, _, _) + ) + or + val = unknownValue() and result = maybe() + or + val = theUnknownType() and result = maybe() + ) + } + + pragma [nomagic] + private boolean isinstance_test_evaluates_boolean(ControlFlowNode expr, ControlFlowNode use, PointsToContext context, Object val, ClassObject cls, ControlFlowNode origin) { + points_to(use, context, val, cls, origin) and + exists(ControlFlowNode clsNode | + BaseFilters::isinstance(expr, clsNode, use) | + exists(ClassObject scls | + points_to(clsNode, context, scls, _, _) | + result = Types::is_improper_subclass_bool(cls, scls) + ) + or exists(TupleObject t, ClassObject scls | + points_to(clsNode, context, t, _, _) and + result = Types::is_improper_subclass_bool(cls, scls) and result = true + | + scls = t.getBuiltinElement(_) + or + points_to(t.getSourceElement(_), _, scls, _, _) + ) + or + val = unknownValue() and result = maybe() + ) + } + + pragma [noinline] + private boolean equality_test_evaluates_boolean(ControlFlowNode expr, ControlFlowNode use, PointsToContext context, Object val, ClassObject cls, ControlFlowNode origin) { + exists(ControlFlowNode l, ControlFlowNode r, boolean sense | + contains_interesting_expression_within_test(expr, use) and + BaseFilters::equality_test(expr, l, sense, r) | + exists(int il, int ir | + il = evaluates_int(l, use, context, val, cls, origin) and ir = simple_int_value(r) + | + result = sense and il = ir + or + result = sense.booleanNot() and il != ir + ) + or + use = l and + exists(Object other | + /* Must be discrete values, not just types of things */ + equatable_value(val) and equatable_value(other) and + points_to(use, context, val, cls, origin) and + points_to(r, context, other, _, _) | + other != val and result = sense.booleanNot() + or + other = val and result = sense + ) + ) + } + + pragma [noinline] + private boolean truth_test_evaluates_boolean(ControlFlowNode expr, ControlFlowNode use, PointsToContext context, Object val, ClassObject cls, ControlFlowNode origin) { + contains_interesting_expression_within_test(expr, use) and + points_to(use, context, val, cls, origin) and + ( + expr = use and val.booleanValue() = result + or + expr = use and Types::instances_always_true(cls) and result = true + ) + } + + pragma [noinline] + private boolean callable_test_evaluates_boolean(ControlFlowNode expr, ControlFlowNode use, PointsToContext context, Object val, ClassObject cls, ControlFlowNode origin) { + contains_interesting_expression_within_test(expr, use) and + points_to(use, context, val, cls, origin) and + BaseFilters::is_callable(expr, use) and + ( + result = Types::class_has_attribute_bool(cls, "__call__") + or + cls = theUnknownType() and result = maybe() + ) + } + + pragma [noinline] + private boolean hasattr_test_evaluates_boolean(ControlFlowNode expr, ControlFlowNode use, PointsToContext context, Object val, ClassObject cls, ControlFlowNode origin) { + contains_interesting_expression_within_test(expr, use) and + points_to(use, context, val, cls, origin) and + exists(string name | + BaseFilters::hasattr(expr, use, name) | + result = Types::class_has_attribute_bool(cls, name) + ) + } + + /** Holds if meaningful equality tests can be made with `o`. + * True for basic objects like 3 or None, but it is also true for sentinel objects. + */ + predicate equatable_value(Object o) { + comparable_value(o) + or + o.(ControlFlowNode).getScope() instanceof Module and + exists(ClassObject c | + c.isBuiltin() and + points_to(o.(CallNode).getFunction(), _, c, _, _) + ) + } + + /** Holds if meaningful comparisons can be made with `o`. + * True for basic objects like 3 or None. + */ + predicate comparable_value(Object o) { + o.isBuiltin() and not o = unknownValue() and not o = undefinedVariable() + or + exists(o.booleanValue()) + } + + + /** Holds if the test on `use` is a test that we can potentially understand */ + private predicate comprehensible_test(ControlFlowNode test, ControlFlowNode use) { + BaseFilters::issubclass(test, _, use) + or + BaseFilters::isinstance(test, _, use) + or + BaseFilters::equality_test(test, use, _, _) + or + exists(ControlFlowNode l | + BaseFilters::equality_test(test, l, _, _) | + literal_or_len(l) + ) + or + BaseFilters::is_callable(test, use) + or + BaseFilters::hasattr(test, use, _) + or + test = use + or + literal_or_len(test) + or + comprehensible_test(not_operand(test), use) + } + + + /** Gets the simple integer value of `f` for numeric literals. */ + private int simple_int_value(ControlFlowNode f) { + exists(NumericObject num | + points_to(f, _, num, _, _) and + result = num.intValue() + ) + } + + /** Gets the integer value that `expr` evaluates to given that `use` refers to `val` and `use` is a part of `expr`. + * Only applies to numeric literal and `len()` of sequences. */ + pragma [noinline] + private int evaluates_int(ControlFlowNode expr, ControlFlowNode use, PointsToContext context, Object val, ClassObject cls, ControlFlowNode origin) { + contains_interesting_expression_within_test(expr, use) and + points_to(use, context, val, cls, origin) and + ( + exists(CallNode call | + call = expr and + points_to(call.getFunction(), context, theLenFunction(), _, _) and + use = call.getArg(0) and + val.(SequenceObject).getLength() = result + ) + or + expr = use and result = val.(NumericObject).intValue() + ) + } + + private predicate literal_or_len(ControlFlowNode expr) { + expr.getNode() instanceof Num + or + expr.(CallNode).getFunction().(NameNode).getId() = "len" + } + + /** Holds if ESSA edge refinement, `def`, refers to `(value, cls, origin)`. */ + predicate ssa_filter_definition_points_to(PyEdgeRefinement def, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + exists(ControlFlowNode test, ControlFlowNode use | + refinement_test(test, use, test_evaluates_boolean(test, use, context, value, cls, origin), def) + ) + or + /* If we can't understand the test, assume that value passes through. + * Or, if the value is `unknownValue()` then let it pass through as well. */ + exists(ControlFlowNode test, ControlFlowNode use | + refinement_test(test, use, _, def) and + ssa_variable_points_to(def.getInput(), context, value, cls, origin) | + not comprehensible_test(test, use) or + value = unknownValue() + ) + } + + /** Holds if ESSA definition, `uniphi`, refers to `(value, cls, origin)`. */ + pragma [noinline] + predicate uni_edged_phi_points_to(SingleSuccessorGuard uniphi, PointsToContext context, Object value, ClassObject cls, ObjectOrCfg origin) { + exists(ControlFlowNode test, ControlFlowNode use | + /* Because calls such as `len` may create a new variable, we need to go via the source variable + * That is perfectly safe as we are only dealing with calls that do not mutate their arguments. + */ + use = uniphi.getInput().getSourceVariable().(Variable).getAUse() and + test = uniphi.getDefiningNode() and + uniphi.getSense() = test_evaluates_boolean(test, use, context, value, cls, origin) + ) + } + + /** Holds if the named attibute of ESSA edge refinement, `def`, refers to `(value, cls, origin)`. */ + pragma[noinline] + predicate ssa_filter_definition_named_attribute_points_to(PyEdgeRefinement def, PointsToContext context, string name, Object value, ClassObject cls, ObjectOrCfg origin) { + exists(ControlFlowNode test, AttrNode use, boolean sense | + edge_refinement_attr_use_sense(def, test, use, name, sense) and + sense = test_evaluates_boolean(test, use, context, value, cls, origin) + ) + or + exists(EssaVariable input | + input = def.getInput() and + not edge_refinement_test(def, input, name) and + SSA::ssa_variable_named_attribute_points_to(input, context, name, value, cls, origin) + ) + } + + /* Helper for ssa_filter_definition_named_attribute_points_to + * Holds if `use` is of the form `var.name` in the test of `def`, and `var` is the source variable of `def`, and `def` has sense `sense`. + */ + pragma[noinline] + private predicate edge_refinement_attr_use_sense(PyEdgeRefinement def, ControlFlowNode test, AttrNode use, string name, boolean sense) { + def.getSense() = sense and + exists(EssaVariable input | + input = def.getInput() and + test = def.getTest() and + use.getObject(name) = def.getInput().getSourceVariable().(Variable).getAUse() and + test_contains(test, use) + ) + } + + /* Helper for ssa_filter_definition_named_attribute_points_to */ + pragma[noinline] + private predicate edge_refinement_test(PyEdgeRefinement def, EssaVariable input, string name) { + exists(ControlFlowNode test | + input = def.getInput() and + test = def.getTest() | + exists(AttrNode use | + refinement_test(test, use.getObject(name), _, def) + ) + ) + } + + } + + cached module Types { + + /** INTERNAL -- Use `ClassObject.getBaseType(n)` instead. + * + * Gets the nth base class of the class. */ + cached Object class_base_type(ClassObject cls, int n) { + not result = unknownValue() and + exists(ClassExpr cls_expr | cls.getOrigin() = cls_expr | + points_to(cls_expr.getBase(n).getAFlowNode(), _, result, _, _) + or + is_new_style(cls) and not exists(cls_expr.getBase(0)) and result = theObjectType() and n = 0 + ) + or + result = builtin_base_type(cls) and n = 0 + or + cls = theUnknownType() and result = theObjectType() and n = 0 + } + + private Object py_base_type(ClassObject cls, int n) { + not result = unknownValue() and + exists(ClassExpr cls_expr | cls.getOrigin() = cls_expr | + points_to(cls_expr.getBase(n).getAFlowNode(), _, result, _, _) + ) + } + + cached int class_base_count(ClassObject cls) { + exists(ClassExpr cls_expr | + cls.getOrigin() = cls_expr | + result = strictcount(cls_expr.getABase()) + or + is_new_style_bool(cls) = true and not exists(cls_expr.getBase(0)) and result = 1 + or + is_new_style_bool(cls) = false and not exists(cls_expr.getBase(0)) and result = 0 + ) + or + cls = theObjectType() and result = 0 + or + exists(builtin_base_type(cls)) and not cls = theObjectType() and result = 1 + or + cls = theUnknownType() and result = 1 + } + + /** INTERNAL -- Do not not use. + * + * Holds if a call to this class will return an instance of this class. + */ + cached predicate callToClassWillReturnInstance(ClassObject cls) { + callToClassWillReturnInstance(cls, 0) and + not callToPythonClassMayNotReturnInstance(cls.getPyClass()) + } + + private predicate callToClassWillReturnInstance(ClassObject cls, int n) { + n = class_base_count(cls) + or + callToClassWillReturnInstance(cls, n+1) and + exists(ClassObject base | + base = class_base_type(cls, n) | + /* Most builtin types "declare" `__new__`, such as `int`, yet are well behaved. */ + base.isBuiltin() + or + exists(Class c | + c = cls.getPyClass() and + not callToPythonClassMayNotReturnInstance(c) + ) + ) + } + + private predicate callToPythonClassMayNotReturnInstance(Class cls) { + /* Django does this, so we need to account for it */ + exists(Function init, LocalVariable self | + /* `self.__class__ = ...` in the `__init__` method */ + cls.getInitMethod() = init and + self.isSelf() and self.getScope() = init and + exists(AttrNode a | a.isStore() and a.getObject("__class__") = self.getAUse()) + ) + or + exists(Function new | new.getName() = "__new__" and new.getScope() = cls) + } + + cached boolean is_new_style_bool(ClassObject cls) { + major_version() = 3 and result = true + or + cls.isBuiltin() and result = true + or + get_an_improper_super_type(class_get_meta_class(cls)) = theTypeType() and result = true + or + class_get_meta_class(cls) = theClassType() and result = false + } + + /** INTERNAL -- Use `ClassObject.isNewStyle()` instead. */ + cached predicate is_new_style(ClassObject cls) { + is_new_style_bool(cls) = true + or + is_new_style(get_a_super_type(cls)) + } + + /** INTERNAL -- Use `ClassObject.getASuperType()` instead. */ + cached ClassObject get_a_super_type(ClassObject cls) { + result = class_base_type(cls, _) + or + result = class_base_type(get_a_super_type(cls), _) + } + + /** INTERNAL -- Use `ClassObject.getAnImproperSuperType()` instead. */ + cached ClassObject get_an_improper_super_type(ClassObject cls) { + result = cls + or + result = get_a_super_type(cls) + } + + cached boolean is_subclass_bool(ClassObject cls, ClassObject sup) { + if abcSubclass(cls, sup) then ( + /* Hard-code some abc subclass pairs -- In future we may change this to use stubs. */ + result = true + ) else ( + sup = class_base_type(cls, _) and result = true + or + is_subclass_bool(class_base_type(cls, _), sup) = true and result = true + or + result = is_subclass_bool(cls, sup, 0) + ) + } + + private predicate abcSubclass(ClassObject cls, ClassObject sup) { + cls = theListType() and sup = collectionsAbcClass("Iterable") + or + cls = theSetType() and sup = collectionsAbcClass("Iterable") + or + cls = theDictType() and sup = collectionsAbcClass("Iterable") + or + cls = theSetType() and sup = collectionsAbcClass("Set") + or + cls = theListType() and sup = collectionsAbcClass("Sequence") + or + cls = theDictType() and sup = collectionsAbcClass("Mapping") + } + + cached boolean is_improper_subclass_bool(ClassObject cls, ClassObject sup) { + result = is_subclass_bool(cls, sup) + or + result = true and cls = sup + } + + private boolean is_subclass_bool(ClassObject cls, ClassObject sup, int n) { + relevant_subclass_relation(cls, sup) and + ( + n = class_base_count(cls) and result = false and not cls = sup + or + exists(ClassObject basetype | + basetype = class_base_type(cls, n) | + not basetype = sup and + result = is_subclass_bool(cls, sup, n+1).booleanOr(is_subclass_bool(basetype, sup)) + or + basetype = sup and result = true + ) + ) + } + + private predicate relevant_subclass_relation(ClassObject cls, ClassObject sup) { + exists(ControlFlowNode supnode, ControlFlowNode use | + points_to(supnode, _, sup, _, _) + or exists(TupleObject t | + points_to(supnode, _, t, _, _) | + sup = t.getBuiltinElement(_) + or + points_to(t.getSourceElement(_), _, sup, _, _) + ) + | + BaseFilters::issubclass(_, supnode, use) and points_to(use, _, cls, _, _) + or + BaseFilters::isinstance(_, supnode, use) and points_to(use, _, _, cls, _) + ) + or + exists(ClassObject sub | + relevant_subclass_relation(sub, sup) and + class_base_type(sub, _) = cls + ) + } + + cached ClassList get_mro(ClassObject cls) { + result = new_style_mro(cls) and is_new_style_bool(cls) = true + or + result = old_style_mro(cls) and is_new_style_bool(cls) = false + } + + /** INTERNAL -- Use `ClassObject.declaredAttribute(name). instead. */ + cached predicate class_declared_attribute(ClassObject owner, string name, Object value, ClassObject vcls, ObjectOrCfg origin) { + /* Note that src_var must be a local variable, we aren't interested in the value that any global variable may hold */ + not value = undefinedVariable() and + exists(EssaVariable var, LocalVariable src_var | + var.getSourceVariable() = src_var and + src_var.getId() = name and + var.getAUse() = owner.getImportTimeScope().getANormalExit() | + ssa_variable_points_to(var, _, value, vcls, origin) + ) + or + value = builtin_class_attribute(owner, name) and class_declares_attribute(owner, name) and + origin = value and vcls = builtin_object_type(value) + } + + private predicate interesting_class_attribute(ClassList mro, string name) { + exists(ControlFlowNode use, ClassObject cls | + mro = cls.getMro() and + BaseFilters::hasattr(_, use, name) | + points_to(use, _, cls, _, _) or + points_to(use, _, _, cls, _) + ) + or + exists(ClassList sublist | + sublist.getTail() = mro and + interesting_class_attribute(sublist, name) + ) + or + name = "__call__" + } + + private predicate does_not_have_attribute(ClassList mro, string name) { + interesting_class_attribute(mro, name) and + ( + mro.isEmpty() + or + exists(ClassObject head, ClassList tail | + head = mro.getHead() and tail = mro.getTail() | + does_not_have_attribute(tail, name) and + not class_declares_attribute(head, name) + ) + ) + } + + /** Holds if the class `cls` has an attribute called `name` */ + cached predicate class_has_attribute(ClassObject cls, string name) { + class_declares_attribute(get_an_improper_super_type(cls), name) + } + + /** Gets `true` if the class `cls` is known to have attribute `name`, + * or `false` if the class `cls` is known to not have attribute `name`. + */ + cached boolean class_has_attribute_bool(ClassObject cls, string name) { + exists(ClassList mro | + mro = cls.getMro() | + mro.declares(name) and result = true + or + does_not_have_attribute(mro, name) and result = false + ) + } + + /** INTERNAL -- Use `ClassObject.attributeRefersTo(name, value, vlcs, origin). instead. + */ + cached predicate class_attribute_lookup(ClassObject cls, string name, Object value, ClassObject vcls, ObjectOrCfg origin) { + exists(ClassObject defn | + defn = get_mro(cls).findDeclaringClass(name) and + class_declared_attribute(defn, name, value, vcls, origin) + ) + } + + /** INTERNAL -- Use `ClassObject.failedInference(reason). instead. + * + * Holds if type inference failed to compute the full class hierarchy for this class for the reason given. */ + cached predicate failed_inference(ClassObject cls, string reason) { + strictcount(cls.getPyClass().getADecorator()) > 1 and reason = "Multiple decorators" + or + exists(cls.getPyClass().getADecorator()) and not six_add_metaclass(_, cls, _) and reason = "Decorator not understood" + or + exists(int i | + exists(((ClassExpr)cls.getOrigin()).getBase(i)) and reason = "Missing base " + i + | + not exists(class_base_type(cls, i)) + ) + or + exists(cls.getPyClass().getMetaClass()) and not exists(class_get_meta_class(cls)) and reason = "Failed to infer metaclass" + or + exists(int i | failed_inference(class_base_type(cls, i), _) and reason = "Failed inference for base class at position " + i) + or + exists(int i | strictcount(class_base_type(cls, i)) > 1 and reason = "Multiple bases at position " + i) + or + exists(int i, int j | class_base_type(cls, i) = class_base_type(cls, j) and i != j and reason = "Duplicate bases classes") + or + cls = theUnknownType() and reason = "Unknown Type" + } + + /** INTERNAL -- Use `ClassObject.getMetaClass()` instead. + * + * Gets the metaclass for this class */ + cached ClassObject class_get_meta_class(ClassObject cls) { + result = declared_meta_class(cls) + or + has_declared_metaclass(cls) = false and result = get_inherited_metaclass(cls) + or + cls = theUnknownType() and result = theUnknownType() + } + + private ClassObject declared_meta_class(ClassObject cls) { + exists(Object obj | + ssa_variable_points_to(metaclass_var(cls), _, obj, _, _) | + result = obj + or + obj = unknownValue() and result = theUnknownType() + ) + or + py_cobjecttypes(cls, result) and is_c_metaclass(result) + or + exists(ControlFlowNode meta | + Types::six_add_metaclass(_, cls, meta) and + points_to(meta, _, result, _, _) + ) + } + + private boolean has_metaclass_var_metaclass(ClassObject cls) { + exists(Object obj | + ssa_variable_points_to(metaclass_var(cls), _, obj, _, _) | + obj = undefinedVariable() and result = false + or + obj != undefinedVariable() and result = true + ) + or + not exists(metaclass_var(cls)) and result = false + } + + private boolean has_declared_metaclass(ClassObject cls) { + py_cobjecttypes(cls, _) and result = true + or + not cls.isBuiltin() and + result = has_six_add_metaclass(cls).booleanOr(has_metaclass_var_metaclass(cls)) + } + + private EssaVariable metaclass_var(ClassObject cls) { + result.getASourceUse() = cls.getPyClass().getMetaClass().getAFlowNode() + or + major_version() = 2 and not exists(cls.getPyClass().getMetaClass()) and + result.getName() = "__metaclass__" and + cls.getPyClass().(ImportTimeScope).entryEdge(result.getAUse(), _) + } + + private ClassObject get_inherited_metaclass(ClassObject cls) { + result = get_inherited_metaclass(cls, 0) + or + // Best guess if base is not a known class + exists(Object base | + base = class_base_type(cls, _) and + result = theUnknownType() | + not base instanceof ClassObject + or + base = theUnknownType() + ) + } + + private ClassObject get_inherited_metaclass(ClassObject cls, int n) { + exists(Class c | + c = cls.getPyClass() and + n = count(c.getABase()) + | + major_version() = 3 and result = theTypeType() + or + major_version() = 2 and result = theClassType() + ) + or + exists(ClassObject meta1, ClassObject meta2 | + meta1 = class_get_meta_class(py_base_type(cls, n)) and + meta2 = get_inherited_metaclass(cls, n+1) + | + /* Choose sub-class */ + get_an_improper_super_type(meta1) = meta2 and result = meta1 + or + get_an_improper_super_type(meta2) = meta1 and result = meta2 + or + /* Choose new-style meta-class over old-style */ + meta2 = theClassType() and result = meta1 + or + /* Make sure we have a metaclass, even if base is unknown */ + meta1 = theUnknownType() and result = theTypeType() + or + meta2 = theUnknownType() and result = meta1 + ) + } + + private Object six_add_metaclass_function() { + exists(Module six, FunctionExpr add_metaclass | + add_metaclass.getInnerScope().getName() = "add_metaclass" and + add_metaclass.getScope() = six and + result.getOrigin() = add_metaclass + ) + } + + private ControlFlowNode decorator_call_callee(ClassObject cls) { + exists(CallNode decorator_call, CallNode decorator | + decorator_call.getArg(0) = cls and + decorator = decorator_call.getFunction() and + result = decorator.getFunction() + ) + } + + /** INTERNAL -- Do not use */ + cached boolean has_six_add_metaclass(ClassObject cls) { + exists(ControlFlowNode callee, Object func | + callee = decorator_call_callee(cls) and + points_to(callee, _, func, _, _) | + func = six_add_metaclass_function() and result = true + or + not func = six_add_metaclass_function() and result = false + ) + or + not exists(six_add_metaclass_function()) and result = false + or + not exists(decorator_call_callee(cls)) and result = false + } + + /** INTERNAL -- Do not use */ + cached predicate six_add_metaclass(CallNode decorator_call, ClassObject decorated, ControlFlowNode metaclass) { + exists(CallNode decorator | + decorator_call.getArg(0) = decorated and + decorator = decorator_call.getFunction() and + decorator.getArg(0) = metaclass | + points_to(decorator.getFunction(), _, six_add_metaclass_function(), _, _) + or + exists(ModuleObject six | + six.getName() = "six" and + points_to(decorator.getFunction().(AttrNode).getObject("add_metaclass"), _, six, _, _) + ) + ) + } + + /** INTERNAL -- Use `not cls.isAbstract()` instead. */ + cached predicate concrete_class(ClassObject cls) { + Types::class_get_meta_class(cls) != theAbcMetaClassObject() + or + exists(Class c | + c = cls.getPyClass() and + not exists(c.getMetaClass()) + | + forall(Function f | + f.getScope() = c | + not exists(Raise r, Name ex | + r.getScope() = f and + (r.getException() = ex or r.getException().(Call).getFunc() = ex) and + (ex.getId() = "NotImplementedError" or ex.getId() = "NotImplemented") + ) + ) + ) + } + + /** Holds if instances of class `cls` are always truthy. */ + cached predicate instances_always_true(ClassObject cls) { + cls = theObjectType() + or + instances_always_true(cls, 0) and + not exists(string meth | + class_declares_attribute(cls, meth) | + meth = "__bool__" or meth = "__len__" or + meth = "__nonzero__" and major_version() = 2 + ) + } + + /** Holds if instances of class `cls` are always truthy. */ + cached predicate instances_always_true(ClassObject cls, int n) { + not cls = theNoneType() and + n = class_base_count(cls) + or + instances_always_true(cls, n+1) and + instances_always_true(class_base_type(cls, n)) + } + + } + + /** INTERNAL -- Public for testing only */ + module Test { + + import Calls + import SSA + import Layer + + } + +} + +/* Helper classes for `super` dispatching. */ + +class SuperCall extends Object { + + EssaVariable self; + ClassObject start; + + override string toString() { + result = "super()" + } + + SuperCall() { + exists(CallNode call, PointsToContext context | + call = this and + PointsTo::points_to(call.getFunction(), _, theSuperType(), _, _) | + PointsTo::points_to(call.getArg(0), context, start, _, _) and + self.getASourceUse() = call.getArg(1) + or + major_version() = 3 and + not exists(call.getArg(0)) and + exists(Function func | + call.getScope() = func and + context.appliesToScope(func) and + /* Implicit class argument is lexically enclosing scope */ + func.getScope() = start.getPyClass() and + /* Implicit 'self' is the 0th parameter */ + self.getDefinition().(ParameterDefinition).getDefiningNode() = func.getArg(0).asName().getAFlowNode() + ) + ) + } + + ClassObject startType() { + result = start + } + + ClassObject selfType(PointsToContext ctx) { + PointsTo::ssa_variable_points_to(self, ctx, _, result, _) + } + + predicate instantiation(PointsToContext ctx, ControlFlowNode f) { + PointsTo::points_to(this.(CallNode).getArg(0), ctx, start, _, _) and f = this + } + + EssaVariable getSelf() { + result = self + } +} + +class SuperBoundMethod extends Object { + + override string toString() { + result = "super()." + name + } + + SuperCall superObject; + string name; + + cached + SuperBoundMethod() { + exists(ControlFlowNode object | + this.(AttrNode).getObject(name) = object | + PointsTo::points_to(object, _, superObject, _, _) + ) + } + + FunctionObject getFunction(PointsToContext ctx) { + exists(ClassList mro | + mro = PointsTo::Types::get_mro(superObject.selfType(ctx)) | + result = mro.startingAt(superObject.startType()).getTail().lookup(name) + ) + } + + predicate instantiation(PointsToContext ctx, ControlFlowNode f) { + PointsTo::points_to(this.(AttrNode).getObject(name), ctx, superObject, _, _) and f = this + } + + EssaVariable getSelf() { + result = superObject.getSelf() + } + +} + diff --git a/python/ql/src/semmle/python/pointsto/PointsToContext.qll b/python/ql/src/semmle/python/pointsto/PointsToContext.qll new file mode 100755 index 00000000000..abbf5117e66 --- /dev/null +++ b/python/ql/src/semmle/python/pointsto/PointsToContext.qll @@ -0,0 +1,276 @@ +import python +private import semmle.python.pointsto.PointsTo + +/* + * A note on 'cost'. Cost doesn't represent the cost to compute, + * but (a vague estimate of) the cost to compute per value gained. + * This is constantly evolving, so see the various cost functions below for more details. + */ + +private int given_cost() { + exists(string depth | + py_flags_versioned("context.cost", depth, _) and + result = depth.toInt() + ) +} + +private int max_context_cost() { + not py_flags_versioned("context.cost", _, _) and result = 7 + or + result = max(int cost | cost = given_cost() | cost) +} + +private int syntactic_call_count(Scope s) { + exists(Function f | + f = s and f.getName() != "__init__" | + result = count(CallNode call | + call.getFunction().(NameNode).getId() = f.getName() + or + call.getFunction().(AttrNode).getName() = f.getName() + ) + ) + or + s.getName() = "__init__" and result = 1 + or + not s instanceof Function and result = 0 +} + +private int incoming_call_cost(Scope s) { + /* Syntactic call count will often be a considerable overestimate + * of the actual number of calls, so we use the square root. + * Cost = log(sqrt(call-count)) + */ + result = ((syntactic_call_count(s)+1).log(2)*0.5).floor() +} + +private int context_cost(TPointsToContext ctx) { + ctx = TMainContext() and result = 0 + or + ctx = TRuntimeContext() and result = 0 + or + ctx = TImportContext() and result = 0 + or + ctx = TCallContext(_, _, result) +} + +private int call_cost(CallNode call) { + if call.getScope().inSource() then + result = 2 + else + result = 3 +} + +private int outgoing_calls(Scope s) { + result = strictcount(CallNode call | call.getScope() = s) +} + +predicate super_method_call(CallNode call) { + call.getFunction().(AttrNode).getObject().(CallNode).getFunction().(NameNode).getId() = "super" +} + +private int outgoing_call_cost(CallNode c) { + /* Cost = log(outgoing-call-count) */ + result = outgoing_calls(c.getScope()).log(2).floor() +} + +/** Cost of contexts for a call, the more callers the + * callee of call has the more expensive it is to add contexts for it. + * This seems to be an effective heuristics for preventing an explosion + * in the number of contexts while retaining good results. + */ +private int splay_cost(CallNode c) { + if super_method_call(c) then + result = 0 + else + result = outgoing_call_cost(c) + incoming_call_cost(c.getScope()) +} + +private predicate call_to_init_or_del(CallNode call) { + exists(string mname | + mname = "__init__" or mname = "__del__" | + mname = call.getFunction().(AttrNode).getName() + ) +} + +/** Total cost estimate */ +private int total_call_cost(CallNode call) { + /* We want to always follow __init__ and __del__ calls as they tell us about object construction, + * but we need to be aware of cycles, so they must have a non-zero cost. + */ + if call_to_init_or_del(call) then + result = 1 + else + result = call_cost(call) + splay_cost(call) +} + +private int total_cost(CallNode call, PointsToContext ctx) { + ctx.appliesTo(call) and + result = total_call_cost(call) + context_cost(ctx) +} + +private cached newtype TPointsToContext = + TMainContext() + or + TRuntimeContext() + or + TImportContext() + or + TCallContext(ControlFlowNode call, PointsToContext outerContext, int cost) { + total_cost(call, outerContext) = cost and + cost <= max_context_cost() + } + +/** Points-to context. Context can be one of: + * * "main": Used for scripts. + * * "import": Use for non-script modules. + * * "default": Use for functions and methods without caller context. + * * All other contexts are call contexts and consist of a pair of call-site and caller context. + */ +class PointsToContext extends TPointsToContext { + + cached string toString() { + this = TMainContext() and result = "main" + or + this = TRuntimeContext() and result = "runtime" + or + this = TImportContext() and result = "import" + or + exists(CallNode callsite, PointsToContext outerContext | + this = TCallContext(callsite, outerContext, _) and + result = callsite.getLocation() + " from " + outerContext.toString() + ) + } + + /** Holds if `call` is the call-site from which this context was entered and `outer` is the caller's context. */ + predicate fromCall(CallNode call, PointsToContext caller) { + caller.appliesTo(call) and + this = TCallContext(call, caller, _) + } + + /** Holds if `call` is the call-site from which this context was entered and `caller` is the caller's context. */ + predicate fromCall(CallNode call, FunctionObject callee, PointsToContext caller) { + call = PointsTo::get_a_call(callee, caller) and + this = TCallContext(call, caller, _) + } + + /** Gets the caller context for this callee context. */ + PointsToContext getOuter() { + this = TCallContext(_, result, _) + } + + /** Holds if this context is relevant to the given scope. */ + predicate appliesToScope(Scope s) { + /* Scripts */ + this = TMainContext() and maybe_main(s) + or + /* Modules and classes evaluated at import */ + s instanceof ImportTimeScope and this = TImportContext() + or + this = TRuntimeContext() and executes_in_runtime_context(s) + or + /* Called functions, regardless of their name */ + exists(FunctionObject func, ControlFlowNode call, TPointsToContext outerContext | + call = PointsTo::get_a_call(func, outerContext) and + this = TCallContext(call, outerContext, _) and + s = func.getFunction() + ) + or + exists(FunctionObject func | + PointsTo::Flow::callsite_calls_function(_, _, func, this, _) and + s = func.getFunction() + ) + } + + /** Holds if this context can apply to the CFG node `n`. */ + pragma [inline] + predicate appliesTo(ControlFlowNode n) { + this.appliesToScope(n.getScope()) + } + + /** Holds if this context is a call context. */ + predicate isCall() { + this = TCallContext(_, _, _) + } + + /** Holds if this is the "main" context. */ + predicate isMain() { + this = TMainContext() + } + + /** Holds if this is the "import" context. */ + predicate isImport() { + this = TImportContext() + } + + /** Holds if this is the "default" context. */ + predicate isRuntime() { + this = TRuntimeContext() + } + + /** Holds if this context or one of its caller contexts is the default context. */ + predicate fromRuntime() { + this.isRuntime() + or + this.getOuter().fromRuntime() + } + + /** Gets the depth (number of calls) for this context. */ + int getDepth() { + not exists(this.getOuter()) and result = 0 + or + result = this.getOuter().getDepth() + 1 + } + + int getCost() { + result = context_cost(this) + } + + /** Holds if a call would be too expensive to create a new context for */ + predicate untrackableCall(CallNode call) { + total_cost(call, this) > max_context_cost() + } + + CallNode getRootCall() { + this = TCallContext(result, TImportContext(), _) + or + result = this.getOuter().getRootCall() + } + + /** Gets a version of Python that this context includes */ + pragma [inline] + Version getAVersion() { + /* Currently contexts do not include any version information, but may do in the future */ + result = major_version() + } + +} + +private predicate in_source(Scope s) { + exists(s.getEnclosingModule().getFile().getRelativePath()) +} + +/** Holds if this scope can be executed in the default context. + * All modules and classes executed at import time and + * all "public" functions and methods, including those invoked by the VM. + */ +predicate executes_in_runtime_context(Function f) { + /* "Public" scope, i.e. functions whose name starts not with an underscore, or special methods */ + (f.getName().charAt(0) != "_" or f.isSpecialMethod() or f.isInitMethod()) + and + in_source(f) +} + +private predicate maybe_main(Module m) { + exists(If i, Compare cmp, Name name, StrConst main | + m.getAStmt() = i and i.getTest() = cmp | + cmp.compares(name, any(Eq eq), main) and + name.getId() = "__name__" and + main.getText() = "__main__" + ) +} + + +/* For backwards compatibility */ +/** DEPRECATED: Use `PointsToContext` instead */ +deprecated class FinalContext = PointsToContext; + diff --git a/python/ql/src/semmle/python/protocols.qll b/python/ql/src/semmle/python/protocols.qll new file mode 100644 index 00000000000..31808ff3c53 --- /dev/null +++ b/python/ql/src/semmle/python/protocols.qll @@ -0,0 +1,31 @@ +import python + +/** Retained for backwards compatibility use ClassObject.isIterator() instead. */ +predicate is_iterator(ClassObject c) { + c.isIterator() +} + +/** Retained for backwards compatibility use ClassObject.isIterable() instead. */ +predicate is_iterable(ClassObject c) { + c.isIterable() +} + +/** Retained for backwards compatibility use ClassObject.isCollection() instead. */ +predicate is_collection(ClassObject c) { + c.isCollection() +} + +/** Retained for backwards compatibility use ClassObject.isMapping() instead. */ +predicate is_mapping(ClassObject c) { + c.isMapping() +} + +/** Retained for backwards compatibility use ClassObject.isSequence() instead. */ +predicate is_sequence(ClassObject c) { + c.isSequence() +} + +/** Retained for backwards compatibility use ClassObject.isContextManager() instead. */ +predicate is_context_manager(ClassObject c) { + c.isContextManager() +} diff --git a/python/ql/src/semmle/python/regex.qll b/python/ql/src/semmle/python/regex.qll new file mode 100644 index 00000000000..0636c485f06 --- /dev/null +++ b/python/ql/src/semmle/python/regex.qll @@ -0,0 +1,709 @@ +import python + +private predicate re_module_function(string name, int flags) { + name = "compile" and flags = 1 or + name = "search" and flags = 2 or + name = "match" and flags = 2 or + name = "split" and flags = 3 or + name = "findall" and flags = 2 or + name = "finditer" and flags = 2 or + name = "sub" and flags = 4 or + name = "subn" and flags = 4 +} + +predicate used_as_regex(Expr s, string mode) { + (s instanceof Bytes or s instanceof Unicode) + and + exists(ModuleObject re | re.getName() = "re" | + /* Call to re.xxx(regex, ... [mode]) */ + exists(CallNode call, string name | + call.getArg(0).refersTo(_, _, s.getAFlowNode()) and + call.getFunction().refersTo(re.getAttribute(name)) | + mode = "None" + or + exists(Object obj | + mode = mode_from_mode_object(obj) | + exists(int flags_arg | + re_module_function(name, flags_arg) and + call.getArg(flags_arg).refersTo(obj) + ) + or + call.getArgByName("flags").refersTo(obj) + ) + ) + ) +} + +string mode_from_mode_object(Object obj) { + ( + result = "DEBUG" or result = "IGNORECASE" or result = "LOCALE" or + result = "MULTILINE" or result = "DOTALL" or result = "UNICODE" or + result = "VERBOSE" + ) and + exists(ModuleObject re | re.getName() = "re" and re.getAttribute(result) = obj) + or + exists(BinaryExpr be, Object sub | obj.getOrigin() = be | + be.getOp() instanceof BitOr and + be.getASubExpression().refersTo(sub) and + result = mode_from_mode_object(sub) + ) +} + +/** A StrConst used as a regular expression */ +abstract class RegexString extends Expr { + + RegexString() { + (this instanceof Bytes or this instanceof Unicode) + } + + predicate char_set_start(int start, int end) { + this.nonEscapedCharAt(start) = "[" and + ( + this.getChar(start+1) = "^" and end = start + 2 + or + not this.getChar(start+1) = "^" and end = start + 1 + ) + } + + /** Whether there is a character class, between start (inclusive) and end (exclusive) */ + predicate charSet(int start, int end) { + exists(int inner_start, int inner_end | + this.char_set_start(start, inner_start) | + end = inner_end + 1 and inner_end > inner_start and + this.nonEscapedCharAt(inner_end) = "]" and + not exists(int mid | this.nonEscapedCharAt(mid) = "]" | + mid > inner_start and mid < inner_end + ) + ) + } + + predicate escapingChar(int pos) { + this.escaping(pos) = true + } + + private boolean escaping(int pos) { + pos = -1 and result = false + or + this.getChar(pos) = "\\" and result = this.escaping(pos-1).booleanNot() + or + this.getChar(pos) != "\\" and result = false + } + + /** Gets the text of this regex */ + string getText() { + result = ((Unicode)this).getS() + or + result = ((Bytes)this).getS() + } + + string getChar(int i) { + result = this.getText().charAt(i) + } + + string nonEscapedCharAt(int i) { + result = this.getText().charAt(i) and + not this.escapingChar(i-1) + } + + private predicate isOptionDivider(int i) { + this.nonEscapedCharAt(i) = "|" + } + + private predicate isGroupEnd(int i) { + this.nonEscapedCharAt(i) = ")" + } + + private predicate isGroupStart(int i) { + this.nonEscapedCharAt(i) = "(" + } + + predicate failedToParse(int i) { + exists(this.getChar(i)) + and + not exists(int start, int end | + this.top_level(start, end) and + start <= i and + end > i + ) + } + + private predicate escapedCharacter(int start, int end) { + this.escapingChar(start) and not exists(this.getText().substring(start+1, end+1).toInt()) and + ( + this.getChar(start+1) = "x" and end = start + 4 + or + end in [start+2..start+4] and + exists(this.getText().substring(start+1, end).toInt()) + or + this.getChar(start+1) != "x" and end = start + 2 + ) + } + + private predicate inCharSet(int index) { + exists(int x, int y | this.charSet(x, y) and index in [x+1 .. y-2]) + } + + /* 'simple' characters are any that don't alter the parsing of the regex. + */ + private predicate simpleCharacter(int start, int end) { + end = start+1 and + not this.charSet(start, _) and + not this.charSet(_, start+1) and + exists(string c | + c = this.getChar(start) | + exists(int x, int y, int z | + this.charSet(x, z) and + this.char_set_start(x, y) | + start = y + or + start = z-2 + or + start > y and start < z-2 and not c = "-" + ) + or + not this.inCharSet(start) and + not c = "(" and not c = "[" and + not c = ")" and not c = "|" and + not this.qualifier(start, _, _) + ) + } + + predicate character(int start, int end) { + ( + this.simpleCharacter(start, end) and + not exists(int x, int y | this.escapedCharacter(x, y) and x <= start and y >= end) + or + this.escapedCharacter(start, end) + ) + and + not exists(int x, int y | + this.group_start(x, y) and x <= start and y >= end + ) + } + + predicate normalCharacter(int start, int end) { + this.character(start, end) + and + not this.specialCharacter(start, end, _) + } + + predicate specialCharacter(int start, int end, string char) { + this.character(start, end) + and + end = start+1 + and + char = this.getChar(start) + and + (char = "$" or char = "^" or char = ".") + and + not this.inCharSet(start) + } + + /** Whether the text in the range start,end is a group */ + predicate group(int start, int end) { + this.groupContents(start, end, _, _) + or + this.emptyGroup(start, end) + } + + /** Gets the number of the group in start,end */ + int getGroupNumber(int start, int end) { + this.group(start, end) and + result = count(int i | this.group(i, _) and i < start and not this.non_capturing_group_start(i, _)) + 1 + } + + /** Gets the name, if it has one, of the group in start,end */ + string getGroupName(int start, int end) { + this.group(start, end) + and + exists(int name_end | + this.named_group_start(start, name_end) and + result = this.getText().substring(start+4, name_end-1) + ) + } + + /** Whether the text in the range start, end is a group and can match the empty string. */ + predicate zeroWidthMatch(int start, int end) { + this.emptyGroup(start, end) + or + this.negativeAssertionGroup(start, end) + or + positiveLookaheadAssertionGroup(start, end) + or + this.positiveLookbehindAssertionGroup(start, end) + } + + private predicate emptyGroup(int start, int end) { + exists(int endm1 | + end = endm1+1 | + this.group_start(start, endm1) and + this.isGroupEnd(endm1) + ) + } + + private predicate emptyMatchAtStartGroup(int start, int end) { + this.emptyGroup(start, end) + or + this.negativeAssertionGroup(start, end) + or + this.positiveLookaheadAssertionGroup(start, end) + } + + private predicate emptyMatchAtEndGroup(int start, int end) { + this.emptyGroup(start, end) + or + this.negativeAssertionGroup(start, end) + or + this.positiveLookbehindAssertionGroup(start, end) + } + + private predicate negativeAssertionGroup(int start, int end) { + exists(int in_start | + this.negative_lookahead_assertion_start(start, in_start) + or + this.negative_lookbehind_assertion_start(start, in_start) | + this.groupContents(start, end, in_start, _) + ) + } + + private predicate positiveLookaheadAssertionGroup(int start, int end) { + exists(int in_start | + this.lookahead_assertion_start(start, in_start) | + this.groupContents(start, end, in_start, _) + ) + } + + private predicate positiveLookbehindAssertionGroup(int start, int end) { + exists(int in_start | + this.lookbehind_assertion_start(start, in_start) | + this.groupContents(start, end, in_start, _) + ) + } + + private predicate group_start(int start, int end) { + this.non_capturing_group_start(start, end) + or + this.flag_group_start(start, end, _) + or + this.named_group_start(start, end) + or + this.named_backreference_start(start, end) + or + this.lookahead_assertion_start(start, end) + or + this.negative_lookahead_assertion_start(start, end) + or + this.lookbehind_assertion_start(start, end) + or + this.negative_lookbehind_assertion_start(start, end) + or + this.comment_group_start(start, end) + or + this.simple_group_start(start, end) + } + + private predicate non_capturing_group_start(int start, int end) { + this.isGroupStart(start) and + this.getChar(start+1) = "?" and + this.getChar(start+2) = ":" and + end = start+3 + } + + private predicate simple_group_start(int start, int end) { + this.isGroupStart(start) and + this.getChar(start+1) != "?" and end = start+1 + } + + private predicate named_group_start(int start, int end) { + this.isGroupStart(start) and + this.getChar(start+1) = "?" and + this.getChar(start+2) = "P" and + this.getChar(start+3) = "<" and + not this.getChar(start+4) = "=" and + not this.getChar(start+4) = "!" and + exists(int name_end | + name_end = min(int i | i > start+4 and this.getChar(i) = ">") and + end = name_end + 1 + ) + } + + private predicate named_backreference_start(int start, int end) { + this.isGroupStart(start) and + this.getChar(start+1) = "?" and + this.getChar(start+2) = "P" and + this.getChar(start+3) = "=" and + end = min(int i | i > start+4 and this.getChar(i) = "?") + } + + private predicate flag_group_start(int start, int end, string c) { + this.isGroupStart(start) and + this.getChar(start+1) = "?" and + end = start+3 and + c = this.getChar(start+2) and + ( + c = "i" or + c = "L" or + c = "m" or + c = "s" or + c = "u" or + c = "x" + ) + } + + /** Gets the mode of this regular expression string if + * it is defined by a prefix. + */ + string getModeFromPrefix() { + exists(string c | + this.flag_group_start(_, _, c) | + c = "i" and result = "IGNORECASE" + or + c = "L" and result = "LOCALE" + or + c = "m" and result = "MULTILINE" + or + c = "s" and result = "DOTALL" + or + c = "u" and result = "UNICODE" + or + c = "x" and result = "VERBOSE" + ) + } + + private predicate lookahead_assertion_start(int start, int end) { + this.isGroupStart(start) and + this.getChar(start+1) = "?" and + this.getChar(start+2) = "=" and + end = start+3 + } + + private predicate negative_lookahead_assertion_start(int start, int end) { + this.isGroupStart(start) and + this.getChar(start+1) = "?" and + this.getChar(start+2) = "!" and + end = start+3 + } + + private predicate lookbehind_assertion_start(int start, int end) { + this.isGroupStart(start) and + this.getChar(start+1) = "?" and + this.getChar(start+2) = "<" and + this.getChar(start+3) = "=" and + end = start+4 + } + + private predicate negative_lookbehind_assertion_start(int start, int end) { + this.isGroupStart(start) and + this.getChar(start+1) = "?" and + this.getChar(start+2) = "<" and + this.getChar(start+3) = "!" and + end = start+4 + } + + private predicate comment_group_start(int start, int end) { + this.isGroupStart(start) and + this.getChar(start+1) = "?" and + this.getChar(start+2) = "#" and + end = start+3 + } + + predicate groupContents(int start, int end, int in_start, int in_end) { + this.group_start(start, in_start) and + end = in_end + 1 and + this.top_level(in_start, in_end) and + this.isGroupEnd(in_end) + } + + private predicate named_backreference(int start, int end, string name) { + this.named_backreference_start(start, start+4) and + end = min(int i | i > start+4 and this.getChar(i) = ")") + 1 and + name = this.getText().substring(start+4, end-2) + } + + private predicate numbered_backreference(int start, int end, int value) { + this.escapingChar(start) + and + exists(string text, string svalue, int len | + end = start + len and + text = this.getText() and len in [2..3] | + svalue = text.substring(start+1, start+len) and + value = svalue.toInt() and + not exists(text.substring(start+1, start+len+1).toInt()) and + value != 0 + ) + } + + /** Whether the text in the range start,end is a back reference */ + predicate backreference(int start, int end) { + this.numbered_backreference(start, end, _) + or + this.named_backreference(start, end, _) + } + + /** Gets the number of the back reference in start,end */ + int getBackrefNumber(int start, int end) { + this.numbered_backreference(start, end, result) + } + + /** Gets the name, if it has one, of the back reference in start,end */ + string getBackrefName(int start, int end) { + this.named_backreference(start, end, result) + } + + private predicate baseItem(int start, int end) { + this.character(start, end) and not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end) + or + this.group(start, end) + or + this.charSet(start, end) + } + + private predicate qualifier(int start, int end, boolean maybe_empty) { + this.short_qualifier(start, end, maybe_empty) and not this.getChar(end) = "?" + or + exists(int short_end | + this.short_qualifier(start, short_end, maybe_empty) | + if this.getChar(short_end) = "?" then + end = short_end+1 + else + end = short_end + ) + } + + private predicate short_qualifier(int start, int end, boolean maybe_empty) { + ( + this.getChar(start) = "+" and maybe_empty = false + or + this.getChar(start) = "*" and maybe_empty = true + or + this.getChar(start) = "?" and maybe_empty = true + ) and end = start + 1 + or + exists(int endin | end = endin + 1 | + this.getChar(start) = "{" and this.getChar(endin) = "}" and + end > start and + exists(string multiples | + multiples = this.getText().substring(start+1, endin) | + multiples.regexpMatch("0*,[0-9]*") and maybe_empty = true + or + multiples.regexpMatch("0*[1-9][0-9]*,[0-9]*") and maybe_empty = false + ) + and + not exists(int mid | + this.getChar(mid) = "}" and + mid > start and mid < endin + ) + ) + } + + /** Whether the text in the range start,end is a qualified item, where item is a character, + * a character set or a group. + */ + predicate qualifiedItem(int start, int end, boolean maybe_empty) { + this.qualifiedPart(start, _, end, maybe_empty) + } + + private predicate qualifiedPart(int start, int part_end, int end, boolean maybe_empty) { + this.baseItem(start, part_end) and + this.qualifier(part_end, end, maybe_empty) + } + + private predicate item(int start, int end) { + this.qualifiedItem(start, end, _) + or + this.baseItem(start, end) and not this.qualifier(end, _, _) + } + + private predicate subsequence(int start, int end) { + ( + start = 0 or + this.group_start(_, start) or + this.isOptionDivider(start-1) + ) + and + this.item(start, end) or + ( + exists(int mid | + this.subsequence(start, mid) and + this.item(mid, end) + ) + ) + } + + /** Whether the text in the range start,end is a sequence of 1 or more items, where an item is a character, + * a character set or a group. + */ + predicate sequence(int start, int end) { + this.sequenceOrQualified(start, end) and + not this.qualifiedItem(start, end, _) + } + + private predicate sequenceOrQualified(int start, int end) { + this.subsequence(start, end) and + not this.item_start(end) + } + + private predicate item_start(int start) { + this.character(start, _) or + this.isGroupStart(start) or + this.charSet(start, _) + } + + private predicate item_end(int end) { + this.character(_, end) or + exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1) or + this.charSet(_, end) or + this.qualifier(_, end, _) + } + + private predicate top_level(int start, int end) { + this.subalternation(start, end, _) and + not this.isOptionDivider(end) + } + + private predicate subalternation(int start, int end, int item_start) { + this.sequenceOrQualified(start, end) and not this.isOptionDivider(start-1) and + item_start = start + or + exists(int endp1 | end = endp1-1 | + start = end and not this.item_end(start) and this.isOptionDivider(endp1) and + item_start = start + ) + or + exists(int mid | + this.subalternation(start, mid, _) and + this.isOptionDivider(mid) and + item_start = mid+1 | + this.sequenceOrQualified(item_start, end) + or + not this.item_start(end) and end = item_start + ) + } + + /** Whether the text in the range start,end is an alternation + */ + predicate alternation(int start, int end) { + this.top_level(start, end) and + exists(int less | this.subalternation(start, less, _) and less < end) + } + + /** Whether the text in the range start,end is an alternation and the text in part_start, part_end is one of the + * options in that alternation. + */ + predicate alternationOption(int start, int end, int part_start, int part_end) { + this.alternation(start, end) and + this.subalternation(start, part_end, part_start) + } + + /** A part of the regex that may match the start of the string. */ + private predicate firstPart(int start, int end) { + start = 0 and end = this.getText().length() + or + exists(int x | + this.firstPart(x, end) | + this.emptyMatchAtStartGroup(x, start) or + this.qualifiedItem(x, start, true) or + this.specialCharacter(x, start, "^") + ) + or + exists(int y | + this.firstPart(start, y) | + this.item(start, end) + or + this.qualifiedPart(start, end, y, _) + ) + or + exists(int x, int y | + this.firstPart(x, y) | + this.groupContents(x, y, start, end) + or + this.alternationOption(x, y, start, end) + ) + } + + /** A part of the regex that may match the end of the string. */ + private predicate lastPart(int start, int end) { + start = 0 and end = this.getText().length() + or + exists(int y | + this.lastPart(start, y) | + this.emptyMatchAtEndGroup(end, y) or + this.qualifiedItem(end, y, true) or + this.specialCharacter(end, y, "$") + ) + or + exists(int x | + this.lastPart(x, end) and + this.item(start, end) + ) + or + exists(int y | + this.lastPart(start, y) | + this.qualifiedPart(start, end, y, _) + ) + or + exists(int x, int y | + this.lastPart(x, y) | + this.groupContents(x, y, start, end) + or + this.alternationOption(x, y, start, end) + ) + } + + /** Whether the item at [start, end) is one of the first items + * to be matched. + */ + predicate firstItem(int start, int end) { + ( + this.character(start, end) + or + this.qualifiedItem(start, end, _) + or + this.charSet(start, end) + ) + and + this.firstPart(start, end) + } + + /** Whether the item at [start, end) is one of the last items + * to be matched. + */ + predicate lastItem(int start, int end) { + ( + this.character(start, end) + or + this.qualifiedItem(start, end, _) + or + this.charSet(start, end) + ) + and + this.lastPart(start, end) + } + +} + + +/** A StrConst used as a regular expression */ +class Regex extends RegexString { + + Regex() { + used_as_regex(this, _) + } + + /** Gets a mode (if any) of this regular expression. Can be any of: + * DEBUG + * IGNORECASE + * LOCALE + * MULTILINE + * DOTALL + * UNICODE + * VERBOSE + */ + string getAMode() { + result != "None" and + used_as_regex(this, result) + or + result = this.getModeFromPrefix() + } + +} \ No newline at end of file diff --git a/python/ql/src/semmle/python/security/Crypto.qll b/python/ql/src/semmle/python/security/Crypto.qll new file mode 100644 index 00000000000..38ce0fa3541 --- /dev/null +++ b/python/ql/src/semmle/python/security/Crypto.qll @@ -0,0 +1,195 @@ +import python +import semmle.python.security.TaintTracking + +private import semmle.python.security.SensitiveData +private import semmle.crypto.Crypto as CryptoLib + + +abstract class WeakCryptoSink extends TaintSink { + + override predicate sinks(TaintKind taint) { + taint instanceof SensitiveData + } +} + +module Pycrypto { + + ModuleObject cipher(string name) { + exists(PackageObject crypto | + crypto.getName() = "Crypto.Cipher" | + crypto.submodule(name) = result + ) + } + + class CipherInstance extends TaintKind { + + string name; + + CipherInstance() { + this = "Crypto.Cipher." + name and + exists(cipher(name)) + } + + string getName() { + result = name + } + + CryptoLib::CryptographicAlgorithm getAlgorithm() { + result.getName() = name + } + + predicate isWeak() { + this.getAlgorithm().isWeak() + } + + } + + class CipherInstanceSource extends TaintSource { + + CipherInstance instance; + + CipherInstanceSource() { + exists(AttrNode attr | + this.(CallNode).getFunction() = attr and + attr.getObject("new").refersTo(cipher(instance.getName())) + ) + } + + override string toString() { + result = "Source of " + instance + } + + override predicate isSourceOf(TaintKind kind) { + kind = instance + } + + } + + class PycryptoWeakCryptoSink extends WeakCryptoSink { + + string name; + + PycryptoWeakCryptoSink() { + exists(CallNode call, AttrNode method, CipherInstance Cipher | + call.getAnArg() = this and + call.getFunction() = method and + Cipher.taints(method.getObject("encrypt")) and + Cipher.isWeak() and + Cipher.getName() = name + ) + } + + override string toString() { + result = "Use of weak crypto algorithm " + name + } + + } + +} + +module Cryptography { + + PackageObject ciphers() { + result.getName() = "cryptography.hazmat.primitives.ciphers" + } + + class CipherClass extends ClassObject { + CipherClass() { + ciphers().getAttribute("Cipher") = this + } + + } + + class AlgorithmClass extends ClassObject { + + AlgorithmClass() { + ciphers().submodule("algorithms").getAttribute(_) = this + } + + string getAlgorithmName() { + result = this.declaredAttribute("name").(StringObject).getText() + } + + predicate isWeak() { + exists(CryptoLib::CryptographicAlgorithm algo | + algo.getName() = this.getAlgorithmName() and + algo.isWeak() + ) + } + } + + class CipherInstance extends TaintKind { + + AlgorithmClass cls; + + CipherInstance() { + this = "cryptography.Cipher." + cls.getAlgorithmName() + } + + AlgorithmClass getAlgorithm() { + result = cls + } + + predicate isWeak() { + cls.isWeak() + } + + override TaintKind getTaintOfMethodResult(string name) { + name = "encryptor" and + result.(Encryptor).getAlgorithm() = this.getAlgorithm() + } + + } + + class CipherSource extends TaintSource { + + CipherSource() { + this.(CallNode).getFunction().refersTo(any(CipherClass cls)) + } + + override predicate isSourceOf(TaintKind kind) { + this.(CallNode).getArg(0).refersTo(_, kind.(CipherInstance).getAlgorithm(), _) + } + + override string toString() { + result = "cryptography.Cipher.source" + } + + } + + class Encryptor extends TaintKind { + + AlgorithmClass cls; + + Encryptor() { + this = "cryptography.encryptor." + cls.getAlgorithmName() + + } + + AlgorithmClass getAlgorithm() { + result = cls + } + + } + + class CryptographyWeakCryptoSink extends WeakCryptoSink { + + CryptographyWeakCryptoSink() { + exists(CallNode call, AttrNode method, Encryptor encryptor | + call.getAnArg() = this and + call.getFunction() = method and + encryptor.taints(method.getObject("update")) and + encryptor.getAlgorithm().isWeak() + ) + } + + override string toString() { + result = "Use of weak crypto algorithm" + } + + } + + +} + + diff --git a/python/ql/src/semmle/python/security/Exceptions.qll b/python/ql/src/semmle/python/security/Exceptions.qll new file mode 100644 index 00000000000..a321c9df839 --- /dev/null +++ b/python/ql/src/semmle/python/security/Exceptions.qll @@ -0,0 +1,127 @@ +/** + * Provides classes and predicates for tracking exceptions and information + * associated with exceptions. + */ + +import python +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Basic + +private ModuleObject theTracebackModule() { + result.getName() = "traceback" +} + +private FunctionObject traceback_function(string name) { + result = theTracebackModule().getAttribute(name) +} + +/** + * This represents information relating to an exception, for instance the + * message, arguments or parts of the exception traceback. + */ +class ExceptionInfo extends StringKind { + + ExceptionInfo() { + this = "exception.info" + } +} + + +/** + * This kind represents exceptions themselves. + */ +class ExceptionKind extends TaintKind { + + ExceptionKind() { + this = "exception.kind" + } + + override TaintKind getTaintOfAttribute(string name) { + name = "args" and result instanceof ExceptionInfoSequence + or + name = "message" and result instanceof ExceptionInfo + } +} + +/** + * A source of exception objects, either explicitly created, or captured by an + * `except` statement. + */ +class ExceptionSource extends TaintSource { + + ExceptionSource() { + exists(ClassObject cls | + cls.isSubclassOf(theExceptionType()) and + this.(ControlFlowNode).refersTo(_, cls, _) + ) + or + this = any(ExceptStmt s).getName().getAFlowNode() + } + + override string toString() { + result = "exception.source" + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExceptionKind + } +} + +/** + * Represents a sequence of pieces of information relating to an exception, + * for instance the contents of the `args` attribute, or the stack trace. + */ +class ExceptionInfoSequence extends SequenceKind { + ExceptionInfoSequence() { + this.getItem() instanceof ExceptionInfo + } +} + + +/** + * Represents calls to functions in the `traceback` module that return + * sequences of exception information. + */ +class CallToTracebackFunction extends TaintSource { + + CallToTracebackFunction() { + exists(string name | + name = "extract_tb" or + name = "extract_stack" or + name = "format_list" or + name = "format_exception_only" or + name = "format_exception" or + name = "format_tb" or + name = "format_stack" + | + this = traceback_function(name).getACall() + ) + } + + override string toString() { + result = "exception.info.sequence.source" + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExceptionInfoSequence + } +} + +/** + * Represents calls to functions in the `traceback` module that return a single + * string of information about an exception. + */ +class FormattedTracebackSource extends TaintSource { + + FormattedTracebackSource() { + this = traceback_function("format_exc").getACall() + } + + override string toString() { + result = "exception.info.source" + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExceptionInfo + } +} diff --git a/python/ql/src/semmle/python/security/README.md b/python/ql/src/semmle/python/security/README.md new file mode 100644 index 00000000000..1b8af176c2f --- /dev/null +++ b/python/ql/src/semmle/python/security/README.md @@ -0,0 +1,95 @@ +# Python Taint Tracking Library + +The taint tracking library can be broken down into three parts. + +1. Specification of sources, sinks and flows. +2. The high level query API +3. The implementation. + + +## Specification + +There are five parts to the specification of a taint tracking query. +These are: + +1. Kinds + + The Python taint tracking library supports arbitrary kinds of taint. This is useful where you want to track something related to "taint", but that is in itself not dangerous. +For example, we might want to track the flow of requests objects. Request objects are not in themselves tainted, but they do contain tainted data. For example, the length or timestamp of a request may not pose a risk, but the GET or POST string probably do. +So, we would want to track request objects distinctly from the request data in the GET or POST field. + +2. Sources + + Sources of taint can be added by importing a predefined sub-type of `TaintSource`, or defining new ones. + +3. Sinks (or vulnerabilities) + + Sinks can be add by importing a predefined sub-type of `TaintSink` or defining new ones. + +4. Data flow extensions + + Additional dataflow edges; node->node, node->var, var->var or var->node can be added by importing predefined extensions or by adding new ones. Additional edges can be specified by overriding `DataFlowExtension::DataFlowNode` or `DataFlowExtension::DataFlowVariable`. + +5. Taint tracking extensions + + Taint tracking extensions, where a only a particular kind of taint flows, can be added by overriding any or all of following the methods on `TaintKind`: + + The two general purpose extensions: + + `predicate additionalTaintStep(ControlFlowNode fromnode, ControlFlowNode tonode)` + + `predicate additionalTaintStepVar(EssaVariable fromvar, EssaVariable var)` + + And the two special purpose extensions for tainted methods or attributes. These allow simple taint-tracking extensions, without worrying about the underlying flow graph. + + `TaintKind getTaintFromAttribute(string name)` + + `TaintKind getTaintFromMethod(string name)` + + +## The high-level query API + +The `TaintedNode` fully describes the taint flow graph. +The full graph can be expressed as: + +```ql +from TaintedNode n, TaintedNode s +where s = n.getASuccessor() +select n, s +``` + +The source -> sink relation can be expressed either using `TaintedNode`: +```ql +from TaintedNode src, TaintedNode sink +where src.isSource() and sink.isSink() and src.getASuccessor*() = sink +select src, sink +``` +or, using the specification API: +```ql +from TaintSource src, TaintSink sink +where src.flowsToSink(sink) +select src, sink +``` + +## The implementation + +The data-flow graph used by the taint-tracking library is the one created by the points-to analysis, +and consists of the course data-flow graph produced by `semmle/python/data-flow/SsaDefinitions.qll` +enhanced with precise variable flows, call graph and type information. +This graph is then enhanced with additional flows specified in part 1 above. +Since the call graph and points-to information is context sensitive, the taint graph must also be context sensitive. + +The taint graph is a simple directed graph where each node consists of a +`(CFG node, context, taint)` triple although it could be thought of more naturally +as a number of distinct graphs, one for each input taint-kind consisting of data flow nodes, +`(CFG node, context)` pairs, labelled with their `taint`. + +The `TrackedValue` used in the implementation is not the taint kind specified by the user, +but describes both the kind of taint and how that taint relates to any object referred to by a data-flow graph node or edge. +Currently, only two types of `taint` are supported: simple taint, where the object is actually tainted; +and attribute taint where a named attribute of the referred object is tainted. + +Support for tainted members (both specific members of tuples and the like, +and generic members for mutable collections) are likely to be added in the near future and others form are possible. +The types of taints are hard-wired with no user-visible extension method at the moment. + diff --git a/python/ql/src/semmle/python/security/SensitiveData.qll b/python/ql/src/semmle/python/security/SensitiveData.qll new file mode 100644 index 00000000000..6786c2498f5 --- /dev/null +++ b/python/ql/src/semmle/python/security/SensitiveData.qll @@ -0,0 +1,103 @@ +/** + * Provides classes and predicates for identifying sensitive data and methods for security. + * + * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. This + * library tries to guess where sensitive data may either be stored in a variable or produced by a + * method. + * + * In addition, there are methods that ought not to be executed or not in a fashion that the user + * can control. This includes authorization methods such as logins, and sending of data, etc. + */ + +import python +import semmle.python.security.TaintTracking + + +/** A regular expression that identifies strings that look like they represent secret data that are not passwords. */ +private string suspiciousNonPassword() { + result = "(?is).*(account|accnt|(? sink relation can be expressed either using `TaintedNode`: + * ```ql + * from TaintedNode src, TaintedNode sink + * where src.isSource() and sink.isSink() and src.getASuccessor*() = sink + * select src, sink + * ``` + * or, using the specification API: + * ```ql + * from TaintSource src, TaintSink sink + * where src.flowsToSink(sink) + * select src, sink + * ``` + * + * ## The implementation + * + * The data-flow graph used by the taint-tracking library is the one created by the points-to analysis, + * and consists of the base data-flow graph produced by `semmle/python/data-flow/SsaDefinitions.qll` + * enhanced with precise variable flows, call graph and type information. + * This graph is then enhanced with additional flows as specified above. + * Since the call graph and points-to information is context sensitive, the taint graph must also be context sensitive. + * + * The taint graph is a directed graph where each node consists of a + * `(CFG node, context, taint)` triple although it could be thought of more naturally + * as a number of distinct graphs, one for each input taint-kind consisting of data flow nodes, + * `(CFG node, context)` pairs, labelled with their `taint`. + * + * The `TrackedValue` used in the implementation is not the taint kind specified by the user, + * but describes both the kind of taint and how that taint relates to any object referred to by a data-flow graph node or edge. + * Currently, only two types of `taint` are supported: simple taint, where the object is actually tainted; + * and attribute taint where a named attribute of the referred object is tainted. + * + * Support for tainted members (both specific members of tuples and the like, + * and generic members for mutable collections) are likely to be added in the near future and other forms are possible. + * The types of taints are hard-wired with no user-visible extension method at the moment. + */ + +import python +private import semmle.python.pointsto.Filters as Filters + +/** A 'kind' of taint. This may be almost anything, + * but it is typically something like a "user-defined string". + * Examples include, data from a http request object, + * data from an SMS or other mobile data source, + * or, for a super secure system, environment variables or + * the local file system. + */ +abstract class TaintKind extends string { + + bindingset[this] + TaintKind() { any() } + + /** Gets the kind of taint that the named attribute will have if an object is tainted with this taint. + * In other words, if `x` has this kind of taint then it implies that `x.name` + * has `result` kind of taint. + */ + TaintKind getTaintOfAttribute(string name) { none() } + + /** Gets the kind of taint results from calling the named method if an object is tainted with this taint. + * In other words, if `x` has this kind of taint then it implies that `x.name()` + * has `result` kind of taint. + */ + TaintKind getTaintOfMethodResult(string name) { none() } + + /** Gets the taint resulting from the flow step `fromnode` -> `tonode`. + */ + TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { none() } + + /** DEPRECATED -- Use `TaintFlow.additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind)` instead. + * + * Holds if this kind of taint passes from variable `fromvar` to variable `tovar` + * This predicate is present for completeness. It is unlikely that any `TaintKind` + * implementation will ever need to override it. + */ + predicate additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar) { none() } + + /** Holds if this kind of taint can start from `expr`. + * In other words, is `expr` a source of this kind of taint. + */ + final predicate startsFrom(ControlFlowNode expr) { + expr.(TaintSource).isSourceOf(this, _) + } + + /** Holds if this kind of taint "taints" `expr`. + */ + final predicate taints(ControlFlowNode expr) { + exists(TaintedNode n | + n.getTaintKind() = this and n.getNode() = expr + ) + } + + /** Gets the class of this kind of taint. + * For example, if this were a kind of string taint + * the `result` would be `theStrType()`. + */ + ClassObject getClass() { + none() + } + +} + +/** Taint kinds representing collections of other taint kind. + * We use `{kind}` to represent a mapping of string to `kind` and + * `[kind]` to represent a flat collection of `kind`. + * The use of `{` and `[` is chosen to reflect dict and list literals + * in Python. We choose a single character prefix and suffix for simplicity + * and ease of preventing infinite recursion. + */ +abstract class CollectionKind extends TaintKind { + + bindingset[this] + CollectionKind() { + (this.charAt(0) = "[" or this.charAt(0) = "{") and + /* Prevent any collection kinds more than 2 deep */ + not this.charAt(2) = "[" and not this.charAt(2) = "{" + } +} + +/** A taint kind representing a flat collections of kinds. + * Typically a sequence, but can include sets. + */ +class SequenceKind extends CollectionKind { + + TaintKind itemKind; + + SequenceKind() { + this = "[" + itemKind + "]" + } + + TaintKind getItem() { + result = itemKind + } + + override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { + sequence_subscript_taint(tonode, fromnode, this, result) + or + result = this and + ( + slice(fromnode, tonode) or + tonode.(BinaryExprNode).getAnOperand() = fromnode + ) + or + result = this and copy_call(fromnode, tonode) + or + exists(BinaryExprNode mod | + mod = tonode and + mod.getOp() instanceof Mod and + mod.getAnOperand() = fromnode and + result = this.getItem() and + result.getClass() = theStrType() + ) + or + result = this and sequence_call(fromnode, tonode) + } + + override TaintKind getTaintOfMethodResult(string name) { + name = "pop" and result = this.getItem() + } + +} + +/* Helper for getTaintForStep() */ +pragma [noinline] +private predicate sequence_subscript_taint(SubscriptNode sub, ControlFlowNode obj, SequenceKind seq, TaintKind key) { + sub.isLoad() and + sub.getValue() = obj and + if sub.getNode().getIndex() instanceof Slice then + seq = key + else + key = seq.getItem() +} + +/* tonode = fromnode[:] */ +private predicate slice(ControlFlowNode fromnode, SubscriptNode tonode) { + exists(Slice all | + all = tonode.getIndex().getNode() and + not exists(all.getStart()) and not exists(all.getStop()) and + tonode.getValue() = fromnode + ) +} + +/* A call that returns a copy (or similar) of the argument */ +private predicate copy_call(ControlFlowNode fromnode, CallNode tonode) { + tonode.getFunction().(AttrNode).getObject("copy") = fromnode + or + exists(ModuleObject copy, string name | + name = "copy" or name = "deepcopy" | + copy.getAttribute(name).(FunctionObject).getACall() = tonode and + tonode.getArg(0) = fromnode + ) + or + tonode.getFunction().refersTo(builtin_object("reversed")) and + tonode.getArg(0) = fromnode +} + +/** A taint kind representing a mapping of objects to kinds. + * Typically a dict, but can include other mappings. + */ +class DictKind extends CollectionKind { + + TaintKind valueKind; + + DictKind() { + this = "{" + valueKind + "}" + } + + TaintKind getValue() { + result = valueKind + } + + override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { + result = valueKind and + tonode.(SubscriptNode).getValue() = fromnode and tonode.isLoad() + or + result = valueKind and + tonode.(CallNode).getFunction().(AttrNode).getObject("get") = fromnode + or + result = this and copy_call(fromnode, tonode) + or + result = this and + tonode.(CallNode).getFunction().refersTo(theDictType()) and + tonode.(CallNode).getArg(0) = fromnode + } + + override TaintKind getTaintOfMethodResult(string name) { + name = "get" and result = valueKind + or + name = "values" and result.(SequenceKind).getItem() = valueKind + or + name = "itervalues" and result.(SequenceKind).getItem() = valueKind + } + +} + + +/** A type of sanitizer of untrusted data. + * Examples include sanitizers for http responses, for DB access or for shell commands. + * Usually a sanitizer can only sanitize data for one particular use. + * For example, a sanitizer for DB commands would not be safe to use for http responses. + */ +abstract class Sanitizer extends string { + + bindingset[this] + Sanitizer() { any() } + + /** Holds if `taint` cannot flow through `node`. */ + predicate sanitizingNode(TaintKind taint, ControlFlowNode node) { none() } + + /** Holds if `call` removes removes the `taint` */ + predicate sanitizingCall(TaintKind taint, FunctionObject callee) { none() } + + /** Holds if `test` shows value to be untainted with `taint` */ + predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) { none() } + + /** Holds if `test` shows value to be untainted with `taint` */ + predicate sanitizingSingleEdge(TaintKind taint, SingleSuccessorGuard test) { none() } + + /** Holds if `def` shows value to be untainted with `taint` */ + predicate sanitizingDefinition(TaintKind taint, EssaDefinition def) { none() } + +} + +/** DEPRECATED -- Use DataFlowExtension instead. + * An extension to taint-flow. For adding library or framework specific flows. + * Examples include flow from a request to untrusted part of that request or + * from a socket to data from that socket. + */ +abstract class TaintFlow extends string { + + bindingset[this] + TaintFlow() { any() } + + /** Holds if `fromnode` being tainted with `fromkind` will result in `tonode` being tainted with `tokind`. + * Extensions to `TaintFlow` should override this to provide additional taint steps. + */ + predicate additionalFlowStep(ControlFlowNode fromnode, TaintKind fromkind, ControlFlowNode tonode, TaintKind tokind) { none() } + + /** Holds if the given `kind` of taint passes from variable `fromvar` to variable `tovar`. + * This predicate is present for completeness. Most `TaintFlow` implementations will not need to override it. + */ + predicate additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind) { none() } + + /** Holds if the given `kind` of taint cannot pass from variable `fromvar` to variable `tovar`. + * This predicate is present for completeness. Most `TaintFlow` implementations will not need to override it. + */ + predicate prunedFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind) { none() } + +} + +/** A source of taintedness. + * Users of the taint tracking library should override this + * class to provide their own sources. + */ +abstract class TaintSource extends @py_flow_node { + + string toString() { result = "Taint source" } + + /** + * Holds if `this` is a source of taint kind `kind` + * + * This must be overridden by subclasses to specify sources of taint. + * + * The smaller this predicate is, the faster `Taint.flowsTo()` will converge. + */ + abstract predicate isSourceOf(TaintKind kind); + + /** + * Holds if `this` is a source of taint kind `kind` for the given context. + * Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient. + * + * The smaller this predicate is, the faster `Taint.flowsTo()` will converge. + */ + predicate isSourceOf(TaintKind kind, CallContext context) { + context.appliesTo(this) and this.isSourceOf(kind) + } + + Location getLocation() { + result = this.(ControlFlowNode).getLocation() + } + + predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) { + this.getLocation().hasLocationInfo(fp, bl, bc, el, ec) + } + + /** Gets a TaintedNode for this taint source */ + TaintedNode getATaintNode() { + exists(TaintFlowImplementation::TrackedTaint taint, CallContext context | + this.isSourceOf(taint.getKind(), context) and + result = TTaintedNode_(taint, context, this) + ) + } + + /** Holds if taint can flow from this source to sink `sink` */ + final predicate flowsToSink(TaintKind srckind, TaintSink sink) { + exists(TaintedNode t | + t = this.getATaintNode() and + t.getTaintKind() = srckind and + t.flowsToSink(sink) + ) + } + + /** Holds if taint can flow from this source to taint sink `sink` */ + final predicate flowsToSink(TaintSink sink) { + this.flowsToSink(_, sink) + or + this instanceof ValidatingTaintSource and + sink instanceof ValidatingTaintSink and + exists(error()) + } +} + + +/** Warning: Advanced feature. Users are strongly recommended to use `TaintSource` instead. + * A source of taintedness on the ESSA data-flow graph. + * Users of the taint tracking library can override this + * class to provide their own sources on the ESSA graph. + */ +abstract class TaintedDefinition extends EssaNode { + + /** + * Holds if `this` is a source of taint kind `kind` + * + * This should be overridden by subclasses to specify sources of taint. + * + * The smaller this predicate is, the faster `Taint.flowsTo()` will converge. + */ + abstract predicate isSourceOf(TaintKind kind); + + /** + * Holds if `this` is a source of taint kind `kind` for the given context. + * Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient. + * + * The smaller this predicate is, the faster `Taint.flowsTo()` will converge. + */ + predicate isSourceOf(TaintKind kind, CallContext context) { + context.appliesToScope(this.getScope()) and this.isSourceOf(kind) + } + +} + +private class DictUpdate extends DataFlowExtension::DataFlowNode { + + MethodCallsiteRefinement call; + + DictUpdate() { + exists(CallNode c | + c = call.getCall() + | + c.getFunction().(AttrNode).getName() = "update" and + c.getArg(0) = this + ) + } + + override EssaVariable getASuccessorVariable() { + call.getVariable() = result + } + +} + +private class SequenceExtends extends DataFlowExtension::DataFlowNode { + + MethodCallsiteRefinement call; + + SequenceExtends() { + exists(CallNode c | + c = call.getCall() + | + c.getFunction().(AttrNode).getName() = "extend" and + c.getArg(0) = this + ) + } + + override EssaVariable getASuccessorVariable() { + call.getVariable() = result + } + +} + +/** A node that is vulnerable to one or more types of taint. + * These nodes provide the sinks when computing the taint flow graph. + * An example would be an argument to a write to a http response object, + * such an argument would be vulnerable to unsanitized user-input (XSS). + * + * Users of the taint tracking library should extend this + * class to provide their own sink nodes. + */ +abstract class TaintSink extends @py_flow_node { + + string toString() { result = "Taint sink" } + + /** + * Holds if `this` "sinks" taint kind `kind` + * Typically this means that `this` is vulnerable to taint kind `kind`. + * + * This must be overridden by subclasses to specify vulnerabilities or other sinks of taint. + */ + abstract predicate sinks(TaintKind taint); + + Location getLocation() { + result = this.(ControlFlowNode).getLocation() + } + + predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) { + this.getLocation().hasLocationInfo(fp, bl, bc, el, ec) + } + +} + +/** Extension for data-flow, to help express data-flow paths that are + * library or framework specific and cannot be inferred by the general + * data-flow machinery. + */ +module DataFlowExtension { + + /** A control flow node that modifies the basic data-flow. */ + abstract class DataFlowNode extends @py_flow_node { + + string toString() { + result = "Dataflow extension node" + } + + /** Gets a successor node for data-flow. + * Data (all forms) is assumed to flow from `this` to `result` + */ + ControlFlowNode getASuccessorNode() { none() } + + /** Gets a successor variable for data-flow. + * Data (all forms) is assumed to flow from `this` to `result`. + * Note: This is an unlikely form of flow. See `DataFlowVariable.getASuccessorVariable()` + */ + EssaVariable getASuccessorVariable() { none() } + + /** Holds if data cannot flow from `this` to `succ`, + * even though it would normally do so. + */ + predicate prunedSuccessor(ControlFlowNode succ) { none() } + + /** Gets a successor node, where the successor node will be tainted with `tokind` + * when `this` is tainted with `fromkind`. + * Extensions to `DataFlowNode` should override this to provide additional taint steps. + */ + ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) { none() } + + /** Gets a successor node for data-flow with a change of context from callee to caller + * (going *up* the call-stack) across call-site `call`. + * Data (all forms) is assumed to flow from `this` to `result` + * Extensions to `DataFlowNode` should override this to provide additional taint steps. + */ + ControlFlowNode getAReturnSuccessorNode(CallNode call) { none() } + + /** Gets a successor node for data-flow with a change of context from caller to callee + * (going *down* the call-stack) across call-site `call`. + * Data (all forms) is assumed to flow from `this` to `result` + * Extensions to `DataFlowNode` should override this to provide additional taint steps. + */ + ControlFlowNode getACalleeSuccessorNode(CallNode call) { none() } + + } + + /** Data flow variable that modifies the basic data-flow. */ + class DataFlowVariable extends EssaVariable { + + /** Gets a successor node for data-flow. + * Data (all forms) is assumed to flow from `this` to `result` + * Note: This is an unlikely form of flow. See `DataFlowNode.getASuccessorNode()` + */ + ControlFlowNode getASuccessorNode() { none() } + + /** Gets a successor variable for data-flow. + * Data (all forms) is assumed to flow from `this` to `result`. + */ + EssaVariable getASuccessorVariable() { none() } + + /** Holds if data cannot flow from `this` to `succ`, + * even though it would normally do so. + */ + predicate prunedSuccessor(EssaVariable succ) { none() } + + } +} + +private newtype TTaintedNode = + TTaintedNode_(TaintFlowImplementation::TrackedValue taint, CallContext context, ControlFlowNode n) { + exists(TaintKind kind | + taint = TaintFlowImplementation::TTrackedTaint(kind) | + n.(TaintSource).isSourceOf(kind, context) + ) + or + TaintFlowImplementation::step(_, taint, context, n) and + exists(TaintKind kind | + kind = taint.(TaintFlowImplementation::TrackedTaint).getKind() + or + kind = taint.(TaintFlowImplementation::TrackedAttribute).getKind(_) | + not exists(Sanitizer sanitizer | + sanitizer.sanitizingNode(kind, n) + ) + ) + or + user_tainted_def(_, taint, context, n) + } + +private predicate user_tainted_def(TaintedDefinition def, TaintFlowImplementation::TTrackedTaint taint, CallContext context, ControlFlowNode n) { + exists(TaintKind kind | + taint = TaintFlowImplementation::TTrackedTaint(kind) and + def.isSourceOf(kind, context) and + n = def.getDefiningNode() + ) +} + +/** A tainted data flow graph node. + * This is a triple of `(CFG node, data-flow context, taint)` + */ +class TaintedNode extends TTaintedNode { + + string toString() { result = this.getTrackedValue().toString() + " at " + this.getLocation() } + + TaintedNode getASuccessor() { + exists(TaintFlowImplementation::TrackedValue tokind, CallContext tocontext, ControlFlowNode tonode | + result = TTaintedNode_(tokind, tocontext, tonode) and + TaintFlowImplementation::step(this, tokind, tocontext, tonode) + ) + } + + /** Gets the taint for this node. */ + TaintFlowImplementation::TrackedValue getTrackedValue() { + this = TTaintedNode_(result, _, _) + } + + /** Gets the CFG node for this node. */ + ControlFlowNode getNode() { + this = TTaintedNode_(_, _, result) + } + + /** Gets the data-flow context for this node. */ + CallContext getContext() { + this = TTaintedNode_(_, result, _) + } + + Location getLocation() { + result = this.getNode().getLocation() + } + + /** Holds if this node is a source of taint */ + predicate isSource() { + exists(TaintFlowImplementation::TrackedTaint taint, CallContext context, TaintSource node | + this = TTaintedNode_(taint, context, node) and + node.isSourceOf(taint.getKind(), context) + ) + } + + /** Gets the kind of taint that node is tainted with. + * Doesn't apply if an attribute or item is tainted, only if this node directly tainted + * */ + TaintKind getTaintKind() { + this.getTrackedValue().(TaintFlowImplementation::TrackedTaint).getKind() = result + } + + /** Holds if taint flows from this node to the sink `sink` and + * reaches with a taint that `sink` is a sink of. + */ + predicate flowsToSink(TaintSink sink) { + exists(TaintedNode node | + this.getASuccessor*() = node and + node.getNode() = sink and + sink.sinks(node.getTaintKind()) + ) + } + + /** Holds if the underlying CFG node for this node is a vulnerable node + * and is vulnerable to this node's taint. + */ + predicate isVulnerableSink() { + exists(TaintedNode src, TaintSink vuln | + src.isSource() and + src.getASuccessor*() = this and + vuln = this.getNode() and + vuln.sinks(this.getTaintKind()) + ) + } + + TaintFlowImplementation::TrackedTaint fromAttribute(string name) { + result = this.getTrackedValue().(TaintFlowImplementation::TrackedAttribute).fromAttribute(name) + } + +} + +/** This module contains the implementation of taint-flow. + * It is recommended that users use the `TaintedNode` class, rather than using this module directly + * as the interface of this module may change without warning. + */ +library module TaintFlowImplementation { + + import semmle.python.pointsto.PointsTo + import DataFlowExtension + + newtype TTrackedValue = + TTrackedTaint(TaintKind kind) + or + TTrackedAttribute(string name, TaintKind kind) { + exists(AttributeAssignment def, TaintedNode origin | + def.getName() = name and + def.getValue() = origin.getNode() and + origin.getTaintKind() = kind + ) + or + exists(TaintedNode origin | + import_flow(origin, _, _, name) and + origin.getTaintKind() = kind + ) + or + exists(TaintKind src | + kind = src.getTaintOfAttribute(name) + ) + or + exists(TaintedNode origin, AttrNode lhs, ControlFlowNode rhs | + lhs.getName() = name and rhs = lhs.(DefinitionNode).getValue() | + origin.getNode() = rhs and + kind = origin.getTaintKind() + ) + } + + /** The "taint" tracked internal by the TaintFlow module. + * This is not the taint kind specified by the user, but describes both the kind of taint + * and how that taint relates to any object referred to by a data-flow graph node or edge. + */ + class TrackedValue extends TTrackedValue { + + abstract string toString(); + + abstract TrackedValue toKind(TaintKind kind); + + } + + class TrackedTaint extends TrackedValue, TTrackedTaint { + + override string toString() { + result = "Taint " + this.getKind() + } + + TaintKind getKind() { + this = TTrackedTaint(result) + } + + override TrackedValue toKind(TaintKind kind) { + result = TTrackedTaint(kind) + } + + } + + class TrackedAttribute extends TrackedValue, TTrackedAttribute { + + override string toString() { + exists(string name, TaintKind kind | + this = TTrackedAttribute(name, kind) and + result = "Attribute '" + name + "' taint " + kind + ) + } + + TaintKind getKind(string name) { + this = TTrackedAttribute(name, result) + } + + TrackedValue fromAttribute(string name) { + exists(TaintKind kind | + this = TTrackedAttribute(name, kind) and + result = TTrackedTaint(kind) + ) + } + + string getName() { + this = TTrackedAttribute(result, _) + } + + override TrackedValue toKind(TaintKind kind) { + result = TTrackedAttribute(this.getName(), kind) + } + + } + + predicate step(TaintedNode fromnode, TrackedValue totaint, CallContext tocontext, ControlFlowNode tonode) { + unpruned_step(fromnode, totaint, tocontext, tonode) and + tonode.getBasicBlock().likelyReachable() + } + + predicate unpruned_step(TaintedNode fromnode, TrackedValue totaint, CallContext tocontext, ControlFlowNode tonode) { + import_step(fromnode, totaint, tocontext, tonode) + or + from_import_step(fromnode, totaint, tocontext, tonode) + or + attribute_load_step(fromnode, totaint, tocontext, tonode) + or + attribute_store_step(fromnode, totaint, tocontext, tonode) + or + getattr_step(fromnode, totaint, tocontext, tonode) + or + use_step(fromnode, totaint, tocontext, tonode) + or + call_taint_step(fromnode, totaint, tocontext, tonode) + or + fromnode.getNode().(DataFlowNode).getASuccessorNode() = tonode and + fromnode.getContext() = tocontext and + totaint = fromnode.getTrackedValue() + or + exists(CallNode call | + fromnode.getNode().(DataFlowNode).getAReturnSuccessorNode(call) = tonode and + fromnode.getContext() = tocontext.getCallee(call) and + totaint = fromnode.getTrackedValue() + ) + or + exists(CallNode call | + fromnode.getNode().(DataFlowNode).getACalleeSuccessorNode(call) = tonode and + fromnode.getContext().getCallee(call) = tocontext and + totaint = fromnode.getTrackedValue() + ) + or + exists(TaintKind tokind | + fromnode.getNode().(DataFlowNode).getASuccessorNode(fromnode.getTaintKind(), tokind) = tonode and + totaint = fromnode.getTrackedValue().toKind(tokind) and + tocontext = fromnode.getContext() + ) + or + exists(TaintKind tokind | + tokind = fromnode.getTaintKind().getTaintForFlowStep(fromnode.getNode(), tonode) and + totaint = fromnode.getTrackedValue().toKind(tokind) and + tocontext = fromnode.getContext() + ) + or + exists(TaintFlow flow, TaintKind tokind | + flow.additionalFlowStep(fromnode.getNode(), fromnode.getTaintKind(), tonode, tokind) and + totaint = fromnode.getTrackedValue().toKind(tokind) and + tocontext = fromnode.getContext() + ) + or + data_flow_step(fromnode.getContext(), fromnode.getNode(), tocontext, tonode) and + totaint = fromnode.getTrackedValue() + or + exists(DataFlowVariable var | + tainted_var(var, tocontext, fromnode) and + var.getASuccessorNode() = tonode and + totaint = fromnode.getTrackedValue() + ) + or + exists(TaintKind tokind | + totaint = fromnode.getTrackedValue().toKind(tokind) and + tocontext = fromnode.getContext() + | + tokind.(DictKind).getValue() = fromnode.getTaintKind() and + dict_construct(fromnode.getNode(), tonode) + or + tokind.(SequenceKind).getItem() = fromnode.getTaintKind() and + sequence_construct(fromnode.getNode(), tonode) + ) + } + + pragma [noinline] + predicate import_step(TaintedNode fromnode, TrackedAttribute totaint, CallContext tocontext, ImportExprNode tonode) { + exists(string name | + import_flow(fromnode, tonode, tocontext, name) and + totaint.fromAttribute(name) = fromnode.getTrackedValue() + ) + } + + pragma [noinline] + private predicate import_flow(TaintedNode fromnode, ImportExprNode tonode, CallContext tocontext, string name) { + exists(ModuleObject mod | + tonode.refersTo(mod) and + module_attribute_tainted(mod, name, fromnode) and + tocontext.appliesTo(tonode) + ) + } + + pragma [noinline] + predicate data_flow_step(CallContext fromcontext, ControlFlowNode fromnode, CallContext tocontext, ControlFlowNode tonode) { + if_exp_step(fromcontext, fromnode, tocontext, tonode) + or + call_flow_step(fromcontext, fromnode, tocontext, tonode) + or + parameter_step(fromcontext, fromnode, tocontext, tonode) + } + + pragma [noinline] + predicate from_import_step(TaintedNode fromnode, TrackedValue totaint, CallContext tocontext, ControlFlowNode tonode) { + exists(string name, ImportExprNode fmod, ModuleObject mod | + fmod = tonode.(ImportMemberNode).getModule(name) and + fmod.refersTo(mod) and + tocontext.appliesTo(tonode) and + module_attribute_tainted(mod, name, fromnode) and + totaint = fromnode.getTrackedValue() + ) + } + + pragma [noinline] + predicate getattr_step(TaintedNode fromnode, TrackedValue totaint, CallContext tocontext, CallNode tonode) { + exists(ControlFlowNode arg, string name | + tonode.getFunction().refersTo(builtin_object("getattr")) and + arg = tonode.getArg(0) and + name = tonode.getArg(1).getNode().(StrConst).getText() and + arg = fromnode.getNode() and + totaint = fromnode.fromAttribute(name) and + tocontext = fromnode.getContext() + ) + } + + pragma [noinline] + predicate attribute_load_step(TaintedNode fromnode, TrackedValue totaint, CallContext tocontext, AttrNode tonode) { + tonode.isLoad() and + exists(string name, ControlFlowNode f | + f = tonode.getObject(name) and + tocontext = fromnode.getContext() and + f = fromnode.getNode() and + ( + totaint = TTrackedTaint(fromnode.getTaintKind().getTaintOfAttribute(name)) + or + totaint = fromnode.fromAttribute(name) + ) + ) + } + + pragma [noinline] + predicate attribute_store_step(TaintedNode fromnode, TrackedAttribute totaint, CallContext tocontext, ControlFlowNode tonode) { + exists(string name | + attribute_store_flow(fromnode.getNode(), tonode, name) and + totaint.fromAttribute(name) = fromnode.getTrackedValue() + ) and + tocontext = fromnode.getContext() + } + + pragma [noinline] + private predicate attribute_store_flow(ControlFlowNode fromnode, ControlFlowNode tonode, string name) { + exists(AttrNode lhs | + tonode = lhs.getObject(name) and fromnode = lhs.(DefinitionNode).getValue() + ) + } + + predicate module_attribute_tainted(ModuleObject m, string name, TaintedNode origin) { + exists(EssaVariable var, CallContext c | + var.getName() = name and + BaseFlow::reaches_exit(var) and + var.getScope() = m.getModule() and + tainted_var(var, c, origin) and + c = TTop() + ) + } + + predicate use_step(TaintedNode fromnode, TrackedValue totaint, CallContext tocontext, ControlFlowNode tonode) { + exists(EssaVariable var | + var.getASourceUse() = tonode and + tainted_var(var, tocontext, fromnode) and + totaint = fromnode.getTrackedValue() + ) + } + + pragma [noinline] + predicate call_flow_step(CallContext callee, ControlFlowNode fromnode, CallContext caller, ControlFlowNode call) { + exists(PyFunctionObject func | + callee.appliesToScope(func.getFunction()) and + func.getACall() = call and + func.getAReturnedNode() = fromnode | + callee = caller.getCallee(call) + or + caller = callee and caller = TTop() + ) + } + + predicate call_taint_step(TaintedNode fromnode, TrackedValue totaint, CallContext tocontext, CallNode call) { + exists(string name | + call.getFunction().(AttrNode).getObject(name) = fromnode.getNode() and + totaint = TTrackedTaint(fromnode.getTaintKind().getTaintOfMethodResult(name)) and + tocontext = fromnode.getContext() + ) + or + exists(EssaVariable self, CallContext callee | + self_init_end_transfer(self, callee, call, tocontext) and + tainted_var(self, callee, fromnode) and + totaint = fromnode.getTrackedValue() + ) + } + + predicate self_init_end_transfer(EssaVariable self, CallContext callee, CallNode call, CallContext caller) { + exists(ClassObject cls, Function init | + PointsTo::instantiation(call, _, cls) and + init = cls.lookupAttribute("__init__").(FunctionObject).getFunction() and + self.getSourceVariable().(Variable).isSelf() and self.getScope() = init + | + callee = caller.getCallee(call) + or + caller = callee and caller = TTop() + ) + } + + predicate tainted_var(EssaVariable var, CallContext context, TaintedNode origin) { + tainted_def(var.getDefinition(), context, origin) + or + exists(EssaVariable prev | + tainted_var(prev, context, origin) and + prev.(DataFlowVariable).getASuccessorVariable() = var + ) + or + origin.getNode().(DataFlowNode).getASuccessorVariable() = var and + context = origin.getContext() + or + exists(TrackedTaint taint, EssaVariable prev | + tainted_var(prev, context, origin) and + origin.getTrackedValue() = taint and + taint.getKind().additionalFlowStepVar(prev, var) + ) + or + exists(TaintFlow flow, TrackedTaint taint, EssaVariable prev | + tainted_var(prev, context, origin) and + origin.getTrackedValue() = taint and + flow.additionalFlowStepVar(prev, var, taint.getKind()) + ) + } + + predicate tainted_def(EssaDefinition def, CallContext context, TaintedNode origin) { + unsanitized_tainted_def(def, context, origin) and + ( + origin.getTrackedValue() instanceof TrackedAttribute + or + exists(TaintKind kind | + kind = origin.getTaintKind() and + not exists(Sanitizer san | + san.sanitizingDefinition(kind, def) + or + san.sanitizingNode(kind, def.(EssaNode).getDefiningNode()) + ) + ) + ) + } + + predicate unsanitized_tainted_def(EssaDefinition def, CallContext context, TaintedNode origin) { + exists(TrackedValue val, ControlFlowNode node | + user_tainted_def(def, val, context, node) and + origin = TTaintedNode_(val, context, node) + ) + or + tainted_phi(def, context, origin) + or + tainted_assignment(def, context, origin) + or + tainted_attribute_assignment(def, context, origin) + or + tainted_parameter_def(def, context, origin) + or + tainted_callsite(def, context, origin) + or + tainted_method_callsite(def, context, origin) + or + tainted_edge(def, context, origin) + or + tainted_argument(def, context, origin) + or + tainted_import_star(def, context, origin) + or + tainted_uni_edge(def, context, origin) + or + tainted_scope_entry(def, context, origin) + or + tainted_with(def, context, origin) + or + tainted_exception_capture(def, context, origin) + } + + predicate tainted_scope_entry(ScopeEntryDefinition def, CallContext context, TaintedNode origin) { + exists(EssaVariable var | + BaseFlow::scope_entry_value_transfer_from_earlier(var, _, def, _) and + tainted_var(var, context, origin) + ) + } + + pragma [noinline] + predicate tainted_phi(PhiFunction phi, CallContext context, TaintedNode origin) { + exists(BasicBlock pred, EssaVariable predvar | + predvar = phi.getInput(pred) and + tainted_var(predvar, context, origin) and + not pred.unlikelySuccessor(phi.getBasicBlock()) and + not predvar.(DataFlowExtension::DataFlowVariable).prunedSuccessor(phi.getVariable()) + ) + } + + pragma [noinline] + predicate tainted_assignment(AssignmentDefinition def, CallContext context, TaintedNode origin) { + origin.getNode() = def.getValue() and + context = origin.getContext() + } + + pragma [noinline] + predicate tainted_attribute_assignment(AttributeAssignment def, CallContext context, TaintedNode origin) { + context = origin.getContext() and + origin.getNode() = def.getDefiningNode().(AttrNode).getObject() + } + + pragma [noinline] + predicate tainted_callsite(CallsiteRefinement call, CallContext context, TaintedNode origin) { + /* In the interest of simplicity and performance we assume that tainted escaping variables remain tainted across calls. + * In the cases were this assumption is false, it is easy enough to add an additional sanitizer. + */ + tainted_var(call.getInput(), context, origin) + } + + pragma [noinline] + predicate parameter_step(CallContext caller, ControlFlowNode argument, CallContext callee, NameNode param) { + exists(ParameterDefinition def | + def.getDefiningNode() = param and + exists(FunctionObject func, CallNode call | + exists(int n | argument = func.getArgumentForCall(call, n) and param.getNode() = func.getFunction().getArg(n)) + or + exists(string name | argument = func.getNamedArgumentForCall(call, name) and param.getNode() = func.getFunction().getArgByName(name)) + or + class_initializer_argument(_, _, call, func, argument, param) + | + callee = caller.getCallee(call) + ) + ) + } + + pragma [noinline] + predicate class_initializer_argument(ClassObject cls, int n, CallNode call, FunctionObject func, ControlFlowNode argument, NameNode param) { + PointsTo::instantiation(call, _, cls) and + cls.lookupAttribute("__init__") = func and + call.getArg(n) = argument and + param.getNode() = func.getFunction().getArg(n+1) + } + + pragma [noinline] + predicate tainted_parameter_def(ParameterDefinition def, CallContext context, TaintedNode fromnode) { + fromnode.getNode() = def.getDefiningNode() and + context = fromnode.getContext() + } + + pragma [noinline] + predicate if_exp_step(CallContext fromcontext, ControlFlowNode operand, CallContext tocontext, IfExprNode ifexp) { + fromcontext = tocontext and fromcontext.appliesTo(operand) and + ifexp.getAnOperand() = operand + } + + pragma [noinline] + predicate tainted_method_callsite(MethodCallsiteRefinement call, CallContext context, TaintedNode origin) { + tainted_var(call.getInput(), context, origin) and + exists(TaintKind kind | + kind = origin.getTaintKind() | + not exists(FunctionObject callee, Sanitizer sanitizer | + callee.getACall() = call.getCall() and + sanitizer.sanitizingCall(kind, callee) + ) + ) + } + + pragma [noinline] + predicate tainted_edge(PyEdgeRefinement test, CallContext context, TaintedNode origin) { + exists(EssaVariable var, TaintKind kind | + kind = origin.getTaintKind() and + var = test.getInput() and + tainted_var(var, context, origin) and + not exists(Sanitizer sanitizer | + sanitizer.sanitizingEdge(kind, test) + ) + | + not Filters::isinstance(test.getTest(), _, var.getSourceVariable().getAUse()) + or + exists(ControlFlowNode c, ClassObject cls | + Filters::isinstance(test.getTest(), c, var.getSourceVariable().getAUse()) + and c.refersTo(cls) + | + test.getSense() = true and kind.getClass().getAnImproperSuperType() = cls + or + test.getSense() = false and not kind.getClass().getAnImproperSuperType() = cls + ) + ) + } + + pragma [noinline] + predicate tainted_argument(ArgumentRefinement def, CallContext context, TaintedNode origin) { + tainted_var(def.getInput(), context, origin) + } + + pragma [noinline] + predicate tainted_import_star(ImportStarRefinement def, CallContext context, TaintedNode origin) { + exists(ModuleObject mod, string name | + PointsTo::Flow::module_and_name_for_import_star(mod, name, def, _) | + if mod.exports(name) then ( + /* Attribute from imported module */ + module_attribute_tainted(mod, name, origin) and + context.appliesTo(def.getDefiningNode()) + ) else ( + /* Retain value held before import */ + exists(EssaVariable var | + var = def.getInput() and + tainted_var(var, context, origin) + ) + ) + ) + } + + pragma [noinline] + predicate tainted_uni_edge(SingleSuccessorGuard uniphi, CallContext context, TaintedNode origin) { + exists(EssaVariable var, TaintKind kind | + kind = origin.getTaintKind() and + var = uniphi.getInput() and + tainted_var(var, context, origin) and + not exists(Sanitizer sanitizer | + sanitizer.sanitizingSingleEdge(kind, uniphi) + ) + ) + } + + pragma [noinline] + predicate tainted_with(WithDefinition def, CallContext context, TaintedNode origin) { + with_flow(_, origin.getNode(),def.getDefiningNode()) and + context = origin.getContext() + } + + pragma [noinline] + predicate tainted_exception_capture(ExceptionCapture def, CallContext context, TaintedNode fromnode) { + fromnode.getNode() = def.getDefiningNode() and + context = fromnode.getContext() + } + +} + +/* Helper predicate for tainted_with */ +private predicate with_flow(With with, ControlFlowNode contextManager, ControlFlowNode var) { + with.getContextExpr() = contextManager.getNode() and + with.getOptionalVars() = var.getNode() and + contextManager.strictlyDominates(var) +} + +/* "Magic" sources and sinks which only have `toString()`s when + * no sources are defined or no sinks are defined or no kinds are present. + * In those cases, these classes make sure that an informative error + * message is presented to the user. + */ + +library class ValidatingTaintSource extends TaintSource { + + override string toString() { + result = error() + } + + ValidatingTaintSource() { + this = uniqueCfgNode() + } + + override predicate isSourceOf(TaintKind kind) { none() } + + override predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) { + fp = error() and bl = 0 and bc = 0 and el = 0 and ec = 0 + } + + +} + +library class ValidatingTaintSink extends TaintSink { + + override string toString() { + result = error() + } + + ValidatingTaintSink() { + this = uniqueCfgNode() + } + + override predicate sinks(TaintKind kind) { none() } + + override predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) { + fp = error() and bl = 0 and bc = 0 and el = 0 and ec = 0 + } + +} + + +/* Helpers for Validating classes */ + +private string locatable_module_name() { + exists(Module m | + exists(m.getLocation()) and + result = m.getName() + ) +} + +private ControlFlowNode uniqueCfgNode() { + exists(Module m | + result = m.getEntryNode() and + m.getName() = min(string name | name = locatable_module_name()) + ) +} + +private string error() { + forall(TaintSource s | s instanceof ValidatingTaintSource) and + result = "No sources defined" + or + forall(TaintSink s | s instanceof ValidatingTaintSink) and + result = "No sinks defined" +} + + +private newtype TCallContext = + TTop() + or + TCalleeContext(CallNode call, CallContext caller, int depth) { + caller.appliesToScope(call.getScope()) and + depth = caller.getDepth() + 1 and depth < 7 and + exists(TaintedNode n | + n = TTaintedNode_(_, caller, call.getAnArg()) + ) + } + +private import semmle.python.pointsto.PointsTo + +pragma [inline] +private string shortLocation(Location l) { + result = l.getFile().getShortName() + ":" + l.getStartLine() +} + +/** Call context for use in taint-tracking. + * Using call contexts prevents "cross talk" between different calls + * to the same function. For example, if a function f is defined as + * ```python + * def f(arg): + * return arg + * ``` + * Then `f("tainted")` is "tainted", but `f("ok") is "ok". + */ +class CallContext extends TCallContext { + + string toString() { + this = TTop() and result = "" + or + exists(CallNode callsite, CallContext caller | + this = TCalleeContext(callsite, caller, _) | + result = shortLocation(callsite.getLocation()) + " from " + caller.toString() and caller = TCalleeContext(_, _, _) + or + result = shortLocation(callsite.getLocation()) and caller = TTop() + ) + } + + /** Holds if this context can apply to `n`. + */ + pragma[inline] + predicate appliesTo(ControlFlowNode n) { + this.appliesToScope(n.getScope()) + } + + /** Holds if this context can apply to `s` + */ + predicate appliesToScope(Scope s) { + this = TTop() + or + exists(FunctionObject f, CallNode call | + this = TCalleeContext(call, _, _) and + f.getFunction() = s and f.getACall() = call + ) + or + exists(ClassObject cls,CallNode call | + this = TCalleeContext(call, _, _) and + PointsTo::instantiation(call, _, cls) and + s = cls.lookupAttribute("__init__").(FunctionObject).getFunction() and + call.getFunction().refersTo(cls) + ) + } + + /** Gets the call depth of this context. + */ + int getDepth() { + this = TTop() and result = 0 + or + this = TCalleeContext(_, _, result) + } + + CallContext getCallee(CallNode call) { + result = TCalleeContext(call, this, _) + } + + CallContext getCaller() { + this = TCalleeContext(_, result, _) + } + +} + +pragma [noinline] +private predicate dict_construct(ControlFlowNode itemnode, ControlFlowNode dictnode) { + dictnode.(DictNode).getAValue() = itemnode + or + dictnode.(CallNode).getFunction().refersTo(theDictType()) and + dictnode.(CallNode).getArgByName(_) = itemnode +} + +pragma [noinline] +private predicate sequence_construct(ControlFlowNode itemnode, ControlFlowNode seqnode) { + seqnode.isLoad() and + ( + seqnode.(ListNode).getElement(_) = itemnode + or + seqnode.(TupleNode).getElement(_) = itemnode + or + seqnode.(SetNode).getAnElement() = itemnode + ) +} + + +/* A call to construct a sequence from a sequence or iterator*/ +pragma [noinline] +private predicate sequence_call(ControlFlowNode fromnode, CallNode tonode) { + tonode.getArg(0) = fromnode and + exists(ControlFlowNode cls | + cls = tonode.getFunction() | + cls.refersTo(theListType()) + or + cls.refersTo(theTupleType()) + or + cls.refersTo(theSetType()) + ) +} diff --git a/python/ql/src/semmle/python/security/flow/AnyCall.qll b/python/ql/src/semmle/python/security/flow/AnyCall.qll new file mode 100644 index 00000000000..7a766bf25a5 --- /dev/null +++ b/python/ql/src/semmle/python/security/flow/AnyCall.qll @@ -0,0 +1,15 @@ +import python +import semmle.python.security.strings.Basic + +/** Assume that taint flows from argument to result for *any* call */ +class AnyCallStringFlow extends DataFlowExtension::DataFlowNode { + + AnyCallStringFlow() { + any(CallNode call).getAnArg() = this + } + + override ControlFlowNode getASuccessorNode() { + result.(CallNode).getAnArg() = this + } + +} diff --git a/python/ql/src/semmle/python/security/injection/Command.qll b/python/ql/src/semmle/python/security/injection/Command.qll new file mode 100644 index 00000000000..11ff52a053a --- /dev/null +++ b/python/ql/src/semmle/python/security/injection/Command.qll @@ -0,0 +1,114 @@ +/** Provides class and predicates to track external data that + * may represent malicious OS commands. + * + * This module is intended to be imported into a taint-tracking query + * to extend `TaintKind` and `TaintSink`. + * + */ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + + +private ModuleObject subprocessModule() { + result.getName() = "subprocess" +} + +private ModuleObject osModule() { + result.getName() = "os" +} + +private Object makeOsCall() { + exists(string name | + result = subprocessModule().getAttribute(name) | + name = "Popen" or + name = "call" or + name = "check_call" or + name = "check_output" + ) +} + +/**Special case for first element in sequence. */ +class FirstElementKind extends TaintKind { + + FirstElementKind() { + this = "sequence[" + any(ExternalStringKind key) + "][0]" + } + + + /** Gets the taint kind for item in this sequence. */ + ExternalStringKind getItem() { + this = "sequence[" + result + "][0]" + } + +} + +class FirstElementFlow extends DataFlowExtension::DataFlowNode { + + FirstElementFlow() { + this = any(SequenceNode s).getElement(0) + } + + override + ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) { + result.(SequenceNode).getElement(0) = this and tokind.(FirstElementKind).getItem() = fromkind + } + +} + +/** A taint sink that is potentially vulnerable to malicious shell commands. + * The `vuln` in `subprocess.call(shell=vuln)` and similar calls. + */ +class ShellCommand extends TaintSink { + + override string toString() { result = "shell command" } + + ShellCommand() { + exists(CallNode call, Object istrue | + call.getFunction().refersTo(makeOsCall()) and + call.getAnArg() = this and + call.getArgByName("shell").refersTo(istrue) and + istrue.booleanValue() = true + ) + or + exists(CallNode call | + call.getFunction().refersTo(osModule().getAttribute("system")) and + call.getAnArg() = this + ) + } + + override predicate sinks(TaintKind kind) { + /* Tainted string command */ + kind instanceof ExternalStringKind + or + /* List (or tuple) containing a tainted string command */ + kind instanceof ExternalStringSequenceKind + } + +} + +/** A taint sink that is potentially vulnerable to malicious shell commands. + * The `vuln` in `subprocess.call(vuln, ...)` and similar calls. + */ +class OsCommandFirstArgument extends TaintSink { + + override string toString() { result = "OS command first argument" } + + OsCommandFirstArgument() { + not this instanceof ShellCommand and + exists(CallNode call| + call.getFunction().refersTo(makeOsCall()) and + call.getArg(0) = this + ) + } + + override predicate sinks(TaintKind kind) { + /* Tainted string command */ + kind instanceof ExternalStringKind + or + /* List (or tuple) whose first element is tainted */ + kind instanceof FirstElementKind + } + +} diff --git a/python/ql/src/semmle/python/security/injection/Exec.qll b/python/ql/src/semmle/python/security/injection/Exec.qll new file mode 100644 index 00000000000..f16c3bfd439 --- /dev/null +++ b/python/ql/src/semmle/python/security/injection/Exec.qll @@ -0,0 +1,42 @@ +/** Provides class and predicates to track external data that + * may represent malicious Python code. + * + * This module is intended to be imported into a taint-tracking query + * to extend `TaintKind` and `TaintSink`. + * + */ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + + +private FunctionObject exec_or_eval() { + result = builtin_object("exec") + or + result = builtin_object("eval") +} + +/** A taint sink that represents an argument to exec or eval that is vulnerable to malicious input. + * The `vuln` in `exec(vuln)` or similar. + */ +class StringEvaluationNode extends TaintSink { + + override string toString() { result = "exec or eval" } + + StringEvaluationNode() { + exists(Exec exec | + exec.getASubExpression().getAFlowNode() = this + ) + or + exists(CallNode call | + exec_or_eval().getACall() = call and + call.getAnArg() = this + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + +} diff --git a/python/ql/src/semmle/python/security/injection/Marshal.qll b/python/ql/src/semmle/python/security/injection/Marshal.qll new file mode 100644 index 00000000000..b9b712c43da --- /dev/null +++ b/python/ql/src/semmle/python/security/injection/Marshal.qll @@ -0,0 +1,36 @@ +/** Provides class and predicates to track external data that + * may represent malicious marshals. + * + * This module is intended to be imported into a taint-tracking query + * to extend `TaintKind` and `TaintSink`. + * + */ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + + +private FunctionObject marshalLoads() { + result = any(ModuleObject marshal | marshal.getName() = "marshal").getAttribute("loads") +} + + +/** A taint sink that is potentially vulnerable to malicious marshaled objects. + * The `vuln` in `marshal.loads(vuln)`. */ +class UnmarshalingNode extends TaintSink { + + override string toString() { result = "unmarshaling vulnerability" } + + UnmarshalingNode() { + exists(CallNode call | + marshalLoads().getACall() = call and + call.getAnArg() = this + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + +} diff --git a/python/ql/src/semmle/python/security/injection/Path.qll b/python/ql/src/semmle/python/security/injection/Path.qll new file mode 100644 index 00000000000..bd318c7f1e6 --- /dev/null +++ b/python/ql/src/semmle/python/security/injection/Path.qll @@ -0,0 +1,99 @@ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + +/** Prevents taint flowing through ntpath.normpath() + * NormalizedPath below handles that case. + */ +private class PathSanitizer extends Sanitizer { + + PathSanitizer() { + this = "path.sanitizer" + } + + override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) { + taint instanceof ExternalStringKind and + abspath_call(node, _) + } + +} + +private FunctionObject abspath() { + exists(ModuleObject os, ModuleObject os_path | + os.getName() = "os" and + os.getAttribute("path") = os_path | + os_path.getAttribute("abspath") = result + or + os_path.getAttribute("normpath") = result + ) +} + +/** A path that has been normalized, but not verified to be safe */ +class NormalizedPath extends TaintKind { + + NormalizedPath() { + this = "normalized.path.injection" + } + +} + +private predicate abspath_call(CallNode call, ControlFlowNode arg) { + call.getFunction().refersTo(abspath()) and + arg = call.getArg(0) +} + + +class AbsPath extends DataFlowExtension::DataFlowNode { + + AbsPath() { + abspath_call(_, this) + } + + override + ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) { + abspath_call(result, this) and tokind instanceof NormalizedPath and fromkind instanceof ExternalStringKind + } + +} + +class NormalizedPathSanitizer extends Sanitizer { + + NormalizedPathSanitizer() { + this = "normalized.path.sanitizer" + } + + override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) { + taint instanceof NormalizedPath and + test.getTest().(CallNode).getFunction().(AttrNode).getName() = "startswith" and + test.getSense() = true + } + +} + +/** A taint sink that is vulnerable to malicious paths. + * The `vuln` in `open(vuln)` and similar. + */ +class OpenNode extends TaintSink { + + override string toString() { result = "argument to open()" } + + OpenNode() { + exists(CallNode call | + call.getFunction().refersTo(builtin_object("open")) and + call.getAnArg() = this + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + or + kind instanceof NormalizedPath + } + +} + + + + + diff --git a/python/ql/src/semmle/python/security/injection/Pickle.qll b/python/ql/src/semmle/python/security/injection/Pickle.qll new file mode 100644 index 00000000000..50914650d9f --- /dev/null +++ b/python/ql/src/semmle/python/security/injection/Pickle.qll @@ -0,0 +1,40 @@ +/** Provides class and predicates to track external data that + * may represent malicious pickles. + * + * This module is intended to be imported into a taint-tracking query + * to extend `TaintKind` and `TaintSink`. + * + */ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + + +private ModuleObject pickleModule() { + result.getName() = "pickle" + or + result.getName() = "cPickle" +} + +private FunctionObject pickleLoads() { + result = pickleModule().getAttribute("loads") +} + +/** `pickle.loads(untrusted)` vulnerability. */ +class UnpicklingNode extends TaintSink { + + override string toString() { result = "unpickling untrusted data" } + + UnpicklingNode() { + exists(CallNode call | + pickleLoads().getACall() = call and + call.getAnArg() = this + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + +} diff --git a/python/ql/src/semmle/python/security/injection/Sql.qll b/python/ql/src/semmle/python/security/injection/Sql.qll new file mode 100644 index 00000000000..26bea04c6a7 --- /dev/null +++ b/python/ql/src/semmle/python/security/injection/Sql.qll @@ -0,0 +1,96 @@ +/** Provides class and predicates to track external data that + * may represent malicious SQL queries or parts of queries. + * + * This module is intended to be imported into a taint-tracking query + * to extend `TaintKind` and `TaintSink`. + * + */ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + + +private StringObject first_part(ControlFlowNode command) { + command.(BinaryExprNode).getOp() instanceof Add and + command.(BinaryExprNode).getLeft().refersTo(result) + or + exists(CallNode call, SequenceObject seq | + call = command | + call = theStrType().lookupAttribute("join") and + call.getArg(0).refersTo(seq) and + seq.getInferredElement(0) = result + ) + or + command.(BinaryExprNode).getOp() instanceof Mod and + command.getNode().(StrConst).getLiteralObject() = result +} + +/** Holds if `command` appears to be a SQL command string of which `inject` is a part. */ +predicate probable_sql_command(ControlFlowNode command, ControlFlowNode inject) { + exists(string prefix | + inject = command.getAChild*() and + first_part(command).getText().regexpMatch(" *" + prefix + ".*") + | + prefix = "CREATE" or prefix = "SELECT" + ) +} + +/** A taint kind representing a DB cursor. + * This will be overridden to provide specific kinds of DB cursor. + */ +abstract class DbCursor extends TaintKind { + + bindingset[this] + DbCursor() { any() } + + string getExecuteMethodName() { result = "execute" } + +} + + +/** A part of a string that appears to be a SQL command and is thus + * vulnerable to malicious input. + */ +class SimpleSqlStringInjection extends TaintSink { + + override string toString() { result = "simple SQL string injection" } + + SimpleSqlStringInjection() { + probable_sql_command(_, this) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + +} + +/** A taint source representing sources of DB connections. + * This will be overridden to provide specific kinds of DB connection sources. + */ +abstract class DbConnectionSource extends TaintSource { + +} + +/** A taint sink that is vulnerable to malicious SQL queries. + * The `vuln` in `db.connection.execute(vuln)` and similar. + */ +class DbConnectionExecuteArgument extends TaintSink { + + override string toString() { result = "db.connection.execute" } + + DbConnectionExecuteArgument() { + exists(CallNode call, DbCursor cursor, string name | + cursor.taints(call.getFunction().(AttrNode).getObject(name)) and + cursor.getExecuteMethodName() = name and + call.getArg(0) = this + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } +} + + diff --git a/python/ql/src/semmle/python/security/injection/Xml.qll b/python/ql/src/semmle/python/security/injection/Xml.qll new file mode 100644 index 00000000000..0c4a8136bbb --- /dev/null +++ b/python/ql/src/semmle/python/security/injection/Xml.qll @@ -0,0 +1,95 @@ +/** Provides class and predicates to track external data that + * may represent malicious XML objects. + * + * This module is intended to be imported into a taint-tracking query + * to extend `TaintKind` and `TaintSink`. + */ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + + +private ModuleObject xmlElementTreeModule() { + result.getName() = "xml.etree.ElementTree" +} + +private ModuleObject xmlMiniDomModule() { + result.getName() = "xml.dom.minidom" +} + +private ModuleObject xmlPullDomModule() { + result.getName() = "xml.dom.pulldom" +} + +private ModuleObject xmlSaxModule() { + result.getName() = "xml.sax" +} + +private class ExpatParser extends TaintKind { + + ExpatParser() { this = "expat.parser" } + +} + +private FunctionObject expatCreateParseFunction() { + exists(ModuleObject expat | + expat.getName() = "xml.parsers.expat" and + result = expat.getAttribute("ParserCreate") + ) +} + +private class ExpatCreateParser extends TaintSource { + + ExpatCreateParser() { + expatCreateParseFunction().getACall() = this + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExpatParser + } + + string toString() { + result = "expat.create.parser" + } +} + +private FunctionObject xmlFromString() { + result = xmlElementTreeModule().getAttribute("fromstring") + or + result = xmlMiniDomModule().getAttribute("parseString") + or + result = xmlPullDomModule().getAttribute("parseString") + or + result = xmlSaxModule().getAttribute("parseString") +} + +/** A (potentially) malicious XML string. */ +class ExternalXmlString extends ExternalStringKind { + + ExternalXmlString() { + this = "external xml encoded object" + } + +} + +/** A call to an XML library function that is potentially vulnerable to a + * specially crafted XML string. + */ +class XmlLoadNode extends TaintSink { + + override string toString() { result = "xml.load vulnerability" } + + XmlLoadNode() { + exists(CallNode call | + call.getAnArg() = this | + xmlFromString().getACall() = call or + any(ExpatParser parser).taints(call.getFunction().(AttrNode).getObject("Parse")) + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalXmlString + } + +} diff --git a/python/ql/src/semmle/python/security/injection/Yaml.qll b/python/ql/src/semmle/python/security/injection/Yaml.qll new file mode 100644 index 00000000000..e2e1b983ea9 --- /dev/null +++ b/python/ql/src/semmle/python/security/injection/Yaml.qll @@ -0,0 +1,40 @@ +/** Provides class and predicates to track external data that + * may represent malicious yaml-encoded objects. + * + * This module is intended to be imported into a taint-tracking query + * to extend `TaintKind` and `TaintSink`. + * + */ + +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + + +private ModuleObject yamlModule() { + result.getName() = "yaml" +} + + +private FunctionObject yamlLoad() { + result = yamlModule().getAttribute("load") +} + +/** `yaml.load(untrusted)` vulnerability. */ +class YamlLoadNode extends TaintSink { + + override string toString() { result = "yaml.load vulnerability" } + + YamlLoadNode() { + exists(CallNode call | + yamlLoad().getACall() = call and + call.getAnArg() = this + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + +} diff --git a/python/ql/src/semmle/python/security/strings/Basic.qll b/python/ql/src/semmle/python/security/strings/Basic.qll new file mode 100755 index 00000000000..cbb35668c5a --- /dev/null +++ b/python/ql/src/semmle/python/security/strings/Basic.qll @@ -0,0 +1,118 @@ +import python +private import Common + +import semmle.python.security.TaintTracking + +/** An extensible kind of taint representing any kind of string. + */ +abstract class StringKind extends TaintKind { + + bindingset[this] + StringKind() { + this = this + } + + override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { + result = this and + ( + str_method_call(fromnode, tonode) or + slice(fromnode, tonode) or + tonode.(BinaryExprNode).getAnOperand() = fromnode or + os_path_join(fromnode, tonode) or + str_format(fromnode, tonode) or + encode_decode(fromnode, tonode) or + to_str(fromnode, tonode) + ) + or + result = this and copy_call(fromnode, tonode) + } + + override ClassObject getClass() { + result = theStrType() or result = theUnicodeType() + } + +} + +private class StringEqualitySanitizer extends Sanitizer { + + StringEqualitySanitizer() { this = "string equality sanitizer" } + + /** The test `if untrusted == "KNOWN_VALUE":` sanitizes `untrusted` on its `true` edge. */ + override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) { + taint instanceof StringKind and + exists(ControlFlowNode const, Cmpop op | + const.getNode() instanceof StrConst | + ( + test.getTest().(CompareNode).operands(const, op, _) + or + test.getTest().(CompareNode).operands(_, op, const) + ) and ( + op instanceof Eq and test.getSense() = true + or + op instanceof NotEq and test.getSense() = false + ) + ) + } + +} + +/* tonode = fromnode.xxx() where the call to xxx returns an identical or similar string */ +private predicate str_method_call(ControlFlowNode fromnode, CallNode tonode) { + exists(string method_name | + tonode.getFunction().(AttrNode).getObject(method_name) = fromnode + | + method_name = "strip" or method_name = "format" or + method_name = "lstrip" or method_name = "rstrip" or + method_name = "ljust" or method_name = "rjust" or + method_name = "title" or method_name = "capitalize" + ) +} + +/* tonode = ....format(fromnode) */ +private predicate str_format(ControlFlowNode fromnode, CallNode tonode) { + tonode.getFunction().(AttrNode).getName() = "format" and + ( + tonode.getAnArg() = fromnode + or + tonode.getNode().getAKeyword().getValue() = fromnode.getNode() + ) +} + +/* tonode = codec.[en|de]code(fromnode)*/ +private predicate encode_decode(ControlFlowNode fromnode, CallNode tonode) { + exists(FunctionObject func, string name | + func.getACall() = tonode and + tonode.getAnArg() = fromnode and + func.getName() = name | + name = "encode" or name = "decode" or + name = "decodestring" + ) +} + +/* tonode = str(fromnode)*/ +private predicate to_str(ControlFlowNode fromnode, CallNode tonode) { + tonode.getAnArg() = fromnode and + exists(ClassObject str | + tonode.getFunction().refersTo(str) | + str = theUnicodeType() or str = theBytesType() + ) +} + +/* tonode = fromnode[:] */ +private predicate slice(ControlFlowNode fromnode, SubscriptNode tonode) { + exists(Slice all | + all = tonode.getIndex().getNode() and + not exists(all.getStart()) and not exists(all.getStop()) and + tonode.getValue() = fromnode + ) +} + +/* tonode = os.path.join(..., fromnode, ...) */ +private predicate os_path_join(ControlFlowNode fromnode, CallNode tonode) { + exists(FunctionObject path_join | + exists(ModuleObject os | os.getName() = "os" | + os.getAttribute("path").(ModuleObject).getAttribute("join") = path_join + ) | + tonode = path_join.getACall() and tonode.getAnArg() = fromnode + ) +} diff --git a/python/ql/src/semmle/python/security/strings/Common.qll b/python/ql/src/semmle/python/security/strings/Common.qll new file mode 100644 index 00000000000..12b73acad8b --- /dev/null +++ b/python/ql/src/semmle/python/security/strings/Common.qll @@ -0,0 +1,16 @@ +import python + + +/* A call that returns a copy (or similar) of the argument */ +predicate copy_call(ControlFlowNode fromnode, CallNode tonode) { + tonode.getFunction().(AttrNode).getObject("copy") = fromnode + or + exists(ModuleObject copy, string name | + name = "copy" or name = "deepcopy" | + copy.getAttribute(name).(FunctionObject).getACall() = tonode and + tonode.getArg(0) = fromnode + ) + or + tonode.getFunction().refersTo(builtin_object("reversed")) and + tonode.getArg(0) = fromnode +} diff --git a/python/ql/src/semmle/python/security/strings/External.qll b/python/ql/src/semmle/python/security/strings/External.qll new file mode 100644 index 00000000000..e1f0406c691 --- /dev/null +++ b/python/ql/src/semmle/python/security/strings/External.qll @@ -0,0 +1,98 @@ +import python +import Basic +private import Common + +/** An extensible kind of taint representing an externally controlled string. + */ +abstract class ExternalStringKind extends StringKind { + + bindingset[this] + ExternalStringKind() { + this = this + } + + override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { + result = StringKind.super.getTaintForFlowStep(fromnode, tonode) + or + tonode.(SequenceNode).getElement(_) = fromnode and result.(ExternalStringSequenceKind).getItem() = this + or + json_load(fromnode, tonode) and result.(ExternalJsonKind).getValue() = this + or + tonode.(DictNode).getAValue() = fromnode and result.(ExternalStringDictKind).getValue() = this + } + +} + +/** A kind of "taint", representing a sequence, with a "taint" member */ +class ExternalStringSequenceKind extends SequenceKind { + + ExternalStringSequenceKind() { + this.getItem() instanceof ExternalStringKind + } + +} + +/** An hierachical dictionary or list where the entire structure is externally controlled + * This is typically a parsed JSON object. + */ +class ExternalJsonKind extends TaintKind { + + ExternalJsonKind() { + this = "json[" + any(ExternalStringKind key) + "]" + } + + + /** Gets the taint kind for item in this sequence */ + TaintKind getValue() { + this = "json[" + result + "]" + or + result = this + } + + override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { + this.taints(fromnode) and + json_subscript_taint(tonode, fromnode, this, result) + or + result = this and copy_call(fromnode, tonode) + } + + override TaintKind getTaintOfMethodResult(string name) { + name = "get" and result = this.getValue() + } + +} + +/** A kind of "taint", representing a dictionary mapping str->"taint" */ +class ExternalStringDictKind extends DictKind { + + ExternalStringDictKind() { + this.getValue() instanceof ExternalStringKind + } + +} + +/** A kind of "taint", representing a dictionary mapping strings to sequences of + * tainted strings */ + +class ExternalStringSequenceDictKind extends DictKind { + ExternalStringSequenceDictKind() { + this.getValue() instanceof ExternalStringSequenceKind + } +} + +/* Helper for getTaintForStep() */ +pragma [noinline] +private predicate json_subscript_taint(SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key) { + sub.isLoad() and + sub.getValue() = obj and + key = seq.getValue() +} + + +private predicate json_load(ControlFlowNode fromnode, CallNode tonode) { + exists(FunctionObject json_loads | + any(ModuleObject json | json.getName() = "json").getAttribute("loads") = json_loads and + json_loads.getACall() = tonode and tonode.getArg(0) = fromnode + ) +} + diff --git a/python/ql/src/semmle/python/security/strings/Untrusted.qll b/python/ql/src/semmle/python/security/strings/Untrusted.qll new file mode 100644 index 00000000000..65e624aa1b7 --- /dev/null +++ b/python/ql/src/semmle/python/security/strings/Untrusted.qll @@ -0,0 +1,14 @@ +import python +import External + + +/** A kind of taint representing an externally controlled string. + * This class is a simple sub-class of `ExternalStringKind`. + */ +class UntrustedStringKind extends ExternalStringKind { + + UntrustedStringKind() { + this = "externally controlled string" + } + +} diff --git a/python/ql/src/semmle/python/strings.qll b/python/ql/src/semmle/python/strings.qll new file mode 100644 index 00000000000..b35f831c248 --- /dev/null +++ b/python/ql/src/semmle/python/strings.qll @@ -0,0 +1,59 @@ +import python + +predicate format_string(StrConst e) { + exists(BinaryExpr b | b.getOp() instanceof Mod and b.getLeft() = e) +} + +predicate mapping_format(StrConst e) { + conversion_specifier(e, _).regexpMatch("%\\([A-Z_a-z0-9]+\\).*") +} + +/* +MAPPING_KEY = "(\\([^)]+\\))?" +CONVERSION_FLAGS = "[#0\\- +]?" +MINIMUM_FIELD_WIDTH = "(\\*|[0-9]*)" +PRECISION = "(\\.(\\*|[0-9]*))?" +LENGTH_MODIFIER = "[hlL]?" +TYPE = "[bdiouxXeEfFgGcrs%]" +*/ + +private +string conversion_specifier_string(StrConst e, int number, int position) { + exists(string s, string REGEX | s = e.getText() | + REGEX = "%(\\([^)]*\\))?[#0\\- +]*(\\*|[0-9]*)(\\.(\\*|[0-9]*))?(h|H|l|L)?[badiouxXeEfFgGcrs%]" and + result = s.regexpFind(REGEX, number, position)) +} + +private +string conversion_specifier(StrConst e, int number) { + result = conversion_specifier_string(e, number, _) and result != "%%" +} + +int illegal_conversion_specifier(StrConst e) { + format_string(e) and + "%" = e.getText().charAt(result) and + // not the start of a conversion specifier or the second % of a %% + not exists(conversion_specifier_string(e, _, result)) and + not exists(conversion_specifier_string(e, _, result - 1)) +} + +/** Gets the number of format items in a format string */ +int format_items(StrConst e) { + result = count(int i | | conversion_specifier(e, i)) + + // a conversion specifier uses an extra item for each * + count(int i, int j | conversion_specifier(e, i).charAt(j) = "*") +} + +private string str(Expr e) { + result = ((Num)e).getN() + or + result = "'" + ((StrConst)e).getText() + "'" +} + +/** Gets a string representation of an expression more suited for embedding in message strings than .toString() */ +string repr(Expr e) { + if exists(str(e)) then + result = str(e) + else + result = e.toString() +} diff --git a/python/ql/src/semmle/python/templates/PyxlTags.qll b/python/ql/src/semmle/python/templates/PyxlTags.qll new file mode 100644 index 00000000000..3ab3f8980bc --- /dev/null +++ b/python/ql/src/semmle/python/templates/PyxlTags.qll @@ -0,0 +1,107 @@ + + +import python + +/** A Tag in Pyxl (which gets converted to a call in Python). + * + */ +class PyxlTag extends Call { + + PyxlTag() { + pyxl_tag(this, _) + } + + string getPyxlTagName() { + pyxl_tag(this, result) + } + + /** Gets the pyxl or Python node that is enclosed by this one in the pyxl source */ + Expr getEnclosedNode() { + none() + } + + /** Gets the Python code (if any) that is contained in this pyxl node */ + Expr getEnclosedPythonCode() { + result = this.getEnclosedNode() and not result instanceof PyxlTag + or + result = ((PyxlTag)this.getEnclosedNode()).getEnclosedPythonCode() + } + +} + +private predicate pyxl_tag(Call c, string name) { + exists(Attribute tag, Name html | + tag = c.getFunc() and + html = tag.getObject() and + name = tag.getName() and + html.getId() = "html" + ) +} + +class PyxlHtmlTag extends PyxlTag { + + PyxlHtmlTag() { + this.getPyxlTagName().prefix(2) = "x_" + } + + string getTagName() { + result = this.getPyxlTagName().suffix(2) + } + + /** Html tags get transformed into a call. This node is the callee function and the enclosed node is an argument. */ + override Expr getEnclosedNode() { + exists(Call c | + c.getFunc() = this and + result = c.getAnArg() + ) + } + +} + +class PyxlIfTag extends PyxlTag { + + PyxlIfTag() { + this.getPyxlTagName() = "_push_condition" + } + + override Expr getEnclosedNode() { + result = this.getAnArg() + } +} + +class PyxlEndIfTag extends PyxlTag { + + PyxlEndIfTag() { + this.getPyxlTagName() = "_leave_if" + } + + override Expr getEnclosedNode() { + result = this.getAnArg() + } + +} + +class PyxlRawHtml extends PyxlTag{ + + PyxlRawHtml() { + this.getPyxlTagName() = "rawhtml" + } + + /** The text for this raw html, if it is simple text. */ + string getText() { + exists(Unicode text | + text = this.getValue() and + result = text.getS() + ) + } + + Expr getValue() { + result = this.getArg(0) + } + + override Expr getEnclosedNode() { + result = this.getAnArg() + } + +} + diff --git a/python/ql/src/semmle/python/templates/Templates.qll b/python/ql/src/semmle/python/templates/Templates.qll new file mode 100644 index 00000000000..2aec12119a2 --- /dev/null +++ b/python/ql/src/semmle/python/templates/Templates.qll @@ -0,0 +1,24 @@ +import python + + +abstract class Template extends Module { + +} + +class SpitfireTemplate extends Template { + + SpitfireTemplate() { + this.getKind() = "Spitfire template" + } + +} + +class PyxlModule extends Template { + + PyxlModule() { + exists(Comment c | c.getLocation().getFile() = this.getFile() | + c.getText().regexpMatch("# *coding.*pyxl.*") + ) + } + +} diff --git a/python/ql/src/semmle/python/types/ClassObject.qll b/python/ql/src/semmle/python/types/ClassObject.qll new file mode 100644 index 00000000000..2aa5e9f4a92 --- /dev/null +++ b/python/ql/src/semmle/python/types/ClassObject.qll @@ -0,0 +1,612 @@ +import python +private import semmle.python.pointsto.PointsTo +private import semmle.python.pointsto.Base +private import semmle.python.pointsto.MRO as MRO + +predicate is_c_metaclass(Object o) { + py_special_objects(o, "type") + or + exists(Object sup | py_cmembers_versioned(o, ".super.", sup, major_version().toString()) and is_c_metaclass(sup)) +} + + +library class ObjectOrCfg extends @py_object { + + string toString() { + /* Not to be displayed */ + none() + } + + ControlFlowNode getOrigin() { + result = this + } + +} + +/** A class whose instances represents Python classes. + * Instances of this class represent either builtin classes + * such as `list` or `str`, or program-defined Python classes + * present in the source code. + * + * Generally there is a one-to-one mapping between classes in + * the Python program and instances of this class in the database. + * However, that is not always the case. For example, dynamically + * generated classes may share a single QL class instance. + * Also the existence of a class definition in the source code + * does not guarantee that such a class will ever exist in the + * running program. + */ +class ClassObject extends Object { + + ClassObject() { + this.getOrigin() instanceof ClassExpr or + py_cobjecttypes(_, this) or + exists(Object meta | py_cobjecttypes(this, meta) and is_c_metaclass(meta)) or + py_special_objects(this, "_semmle_unknown_type") + } + + private predicate isStr() { + py_special_objects(this, "bytes") and major_version() = 2 + or + py_special_objects(this, "unicode") and major_version() = 3 + } + + /** Gets the short (unqualified) name of this class */ + string getName() { + this.isStr() and result = "str" + or + not this.isStr() and py_cobjectnames(this, result) and not this = theUnknownType() + or + result = this.getPyClass().getName() + } + + /** Gets the qualified name for this class. + * Should return the same name as the `__qualname__` attribute on classes in Python 3. + */ + string getQualifiedName() { + py_cobjectnames(this, _) and result = this.getName() + or + result = this.getPyClass().getQualifiedName() + } + + /** Gets the nth base class of this class */ + Object getBaseType(int n) { + result = PointsTo::Types::class_base_type(this, n) + } + + /** Gets a base class of this class */ + Object getABaseType() { + result = this.getBaseType(_) + } + + /** Whether this class has a base class */ + predicate hasABase() { + exists(ClassExpr cls | this.getOrigin() = cls | exists(cls.getABase())) + or + /* The extractor uses the special name ".super." to indicate the super class of a builtin class */ + py_cmembers_versioned(this, ".super.", _, major_version().toString()) + } + + /** Gets a super class of this class (includes transitive super classes) */ + ClassObject getASuperType() { + result = PointsTo::Types::get_a_super_type(this) + } + + /** Gets a super class of this class (includes transitive super classes) or this class */ + ClassObject getAnImproperSuperType() { + result = PointsTo::Types::get_an_improper_super_type(this) + } + + /** Whether this class is a new style class. + A new style class is one that implicitly or explicitly inherits from `object`. */ + predicate isNewStyle() { + PointsTo::Types::is_new_style(this) + } + + /** Whether this class is a legal exception class. + * What constitutes a legal exception class differs between major versions */ + predicate isLegalExceptionType() { + not this.isNewStyle() or + this.getAnImproperSuperType() = theBaseExceptionType() + or + major_version() = 2 and this = theTupleType() + } + + /** Gets the scope associated with this class, if it is not a builtin class */ + Class getPyClass() { + result.getClassObject() = this + } + + /** Returns an attribute declared on this class (not on a super-class) */ + Object declaredAttribute(string name) { + PointsTo::Types::class_declared_attribute(this, name, result, _, _) + } + + /** Returns an attribute declared on this class (not on a super-class) */ + predicate declaresAttribute(string name) { + class_declares_attribute(this, name) + } + + /** Returns an attribute as it would be when looked up at runtime on this class. + Will include attributes of super-classes */ + Object lookupAttribute(string name) { + result = this.getMro().lookup(name) + } + + MRO::ClassList getMro() { + PointsTo::Types::is_new_style_bool(this) = true and + result = MRO::new_style_mro(this) + or + result = MRO::old_style_mro(this) + } + + /** Looks up an attribute by searching this class' MRO starting at `start` */ + Object lookupMro(ClassObject start, string name) { + result = this.getMro().startingAt(start).lookup(name) + } + + /** Whether the named attribute refers to the object and origin */ + predicate attributeRefersTo(string name, Object obj, ControlFlowNode origin) { + PointsTo::Types::class_attribute_lookup(this, name, obj, _, origin) + } + + /** Whether the named attribute refers to the object, class and origin */ + predicate attributeRefersTo(string name, Object obj, ClassObject cls, ControlFlowNode origin) { + not obj = unknownValue() and + PointsTo::Types::class_attribute_lookup(this, name, obj, cls, origin) + } + + /** Whether this class has a attribute named `name`, either declared or inherited.*/ + predicate hasAttribute(string name) { + PointsTo::Types::class_has_attribute(this, name) + } + + /** Whether it is impossible to know all the attributes of this class. Usually because it is + impossible to calculate the full class hierarchy or because some attribute is too dynamic. */ + predicate unknowableAttributes() { + /* True for a class with undeterminable superclasses, unanalysable metaclasses, or other confusions */ + this.failedInference() + or + this.getMetaClass().failedInference() + or + exists(Object base | + base = this.getABaseType() | + base.(ClassObject).unknowableAttributes() + or + not base instanceof ClassObject + ) + } + + /** Gets the metaclass for this class */ + ClassObject getMetaClass() { + result = PointsTo::Types::class_get_meta_class(this) + and + not this.failedInference() + } + + /* Whether this class is abstract. */ + predicate isAbstract() { + this.getMetaClass() = theAbcMetaClassObject() + or + exists(FunctionObject f, Raise r, Name ex | + f = this.lookupAttribute(_) and + r.getScope() = f.getFunction() | + (r.getException() = ex or r.getException().(Call).getFunc() = ex) and + (ex.getId() = "NotImplementedError" or ex.getId() = "NotImplemented") + ) + } + + ControlFlowNode declaredMetaClass() { + result = this.getPyClass().getMetaClass().getAFlowNode() + } + + /** Has type inference failed to compute the full class hierarchy for this class for the reason given. */ + predicate failedInference(string reason) { + PointsTo::Types::failed_inference(this, reason) + } + + /** Has type inference failed to compute the full class hierarchy for this class */ + predicate failedInference() { + this.failedInference(_) + } + + /** Gets an object which is the sole instance of this class, if this class is probably a singleton. + * Note the 'probable' in the name; there is no guarantee that this class is in fact a singleton. + * It is guaranteed that getProbableSingletonInstance() returns at most one Object for each ClassObject. */ + Object getProbableSingletonInstance() { + exists(ControlFlowNode use, Expr origin | + use.refersTo(result, this, origin.getAFlowNode()) + | + this.hasStaticallyUniqueInstance() + and + /* Ensure that original expression will be executed only one. */ + origin.getScope() instanceof ImportTimeScope and + not exists(Expr outer | outer.getASubExpression+() = origin) + ) + or + this = theNoneType() and result = theNoneObject() + } + + /** This class is only instantiated at one place in the code */ + private predicate hasStaticallyUniqueInstance() { + strictcount(Object instances | PointsTo::points_to(_, _, instances, this, _)) = 1 + } + + ImportTimeScope getImportTimeScope() { + result = this.getPyClass() + } + + override string toString() { + this.isC() and result = "builtin-class " + this.getName() and not this = theUnknownType() + or + not this.isC() and result = "class " + this.getName() + } + + /* Method Resolution Order */ + + /** Returns the next class in the MRO of 'this' after 'sup' */ + ClassObject nextInMro(ClassObject sup) { + exists(MRO::ClassList mro, int i | + mro = this.getMro() and + sup = mro.getItem(i) and + result = mro.getItem(i+1) + ) and + not this.failedInference() + } + + /** Gets the MRO for this class. ClassObject `sup` occurs at `index` in the list of classes. + * `this` has an index of `1`, the next class in the MRO has an index of `2`, and so on. + */ + ClassObject getMroItem(int index) { + result = this.getMro().getItem(index) + } + + /** Holds if this class has duplicate base classes */ + predicate hasDuplicateBases() { + exists(ClassObject base, int i, int j | i != j and base = this.getBaseType(i) and base = this.getBaseType(j)) + } + + /** Holds if this class is an iterable. */ + predicate isIterable() { + this.hasAttribute("__iter__") or this.hasAttribute("__getitem__") + } + + /** Holds if this class is an iterator. */ + predicate isIterator() { + this.hasAttribute("__iter__") and + (major_version() = 3 and this.hasAttribute("__next__") + or + /* Because 'next' is a common method name we need to check that an __iter__ + * method actually returns this class. This is not needed for Py3 as the + * '__next__' method exists to define a class as an iterator. + */ + major_version() = 2 and this.hasAttribute("next") and + exists(ClassObject other, FunctionObject iter | + other.declaredAttribute("__iter__") = iter | + iter.getAnInferredReturnType() = this + ) + ) + or + /* This will be redundant when we have C class information */ + this = theGeneratorType() + } + + /** Holds if this class is an improper subclass of the other class. + * True if this is a sub-class of other or this is the same class as other. + * + * Equivalent to the Python builtin function issubclass(). + */ + predicate isSubclassOf(ClassObject other) { + this = other or this.getASuperType() = other + } + + /** Synonymous with isContainer(), retained for backwards compatibility. */ + predicate isCollection() { + this.isContainer() + } + + /** Holds if this class is a container(). That is, does it have a __getitem__ method.*/ + predicate isContainer() { + exists(this.lookupAttribute("__getitem__")) + } + + /** Holds if this class is a mapping. */ + predicate isMapping() { + exists(this.lookupAttribute("__getitem__")) + and + not this.isSequence() + } + + /** Holds if this class is probably a sequence. */ + predicate isSequence() { + /* To determine whether something is a sequence or a mapping is not entirely clear, + * so we need to guess a bit. + */ + this.getAnImproperSuperType() = theTupleType() + or + this.getAnImproperSuperType() = theListType() + or + this.getAnImproperSuperType() = theRangeType() + or + this.getAnImproperSuperType() = theBytesType() + or + this.getAnImproperSuperType() = theUnicodeType() + or + /* Does this inherit from abc.Sequence? */ + this.getASuperType().getName() = "Sequence" + or + /* Does it have an index or __reversed__ method? */ + this.isContainer() and + ( + this.hasAttribute("index") or + this.hasAttribute("__reversed__") + ) + } + + predicate isCallable() { + this.hasAttribute("__call__") + } + + predicate isContextManager() { + this.hasAttribute("__enter__") and this.hasAttribute("__exit__") + } + + predicate assignedInInit(string name) { + exists(FunctionObject init | init = this.lookupAttribute("__init__") | + attribute_assigned_in_method(init, name) + ) + } + + /** Holds if this class is unhashable */ + predicate unhashable() { + this.lookupAttribute("__hash__") = theNoneObject() + or + ((FunctionObject)this.lookupAttribute("__hash__")).neverReturns() + } + + /** Holds if this class is a descriptor */ + predicate isDescriptorType() { + this.hasAttribute("__get__") + } + + /** Holds if this class is an overriding descriptor */ + predicate isOverridingDescriptorType() { + this.hasAttribute("__get__") and this.hasAttribute("__set__") + } + + FunctionObject getAMethodCalledFromInit() { + exists(FunctionObject init | + init = this.lookupAttribute("__init__") and + init.getACallee*() = result + ) + } + + override boolean booleanValue() { + result = true + } + + /** Gets a call to this class. Note that the call may not create a new instance of + * this class, as that depends on the `__new__` method of this class. + */ + CallNode getACall() { + result.getFunction().refersTo(this) + } + +} + +/** The 'str' class. This is the same as the 'bytes' class for + * Python 2 and the 'unicode' class for Python 3 */ +ClassObject theStrType() { + if major_version() = 2 then + result = theBytesType() + else + result = theUnicodeType() +} + +private +Module theAbcModule() { + result.getName() = "abc" +} + +ClassObject theAbcMetaClassObject() { + /* Avoid using points-to and thus negative recursion */ + exists(Class abcmeta | + result.getPyClass() = abcmeta | + abcmeta.getName() = "ABCMeta" and + abcmeta.getScope() = theAbcModule() + ) +} + +/* Common builtin classes */ + +/** The built-in class NoneType*/ +ClassObject theNoneType() { + py_special_objects(result, "NoneType") +} + +/** The built-in class 'bool' */ +ClassObject theBoolType() { + py_special_objects(result, "bool") +} + +/** The builtin class 'type' */ +ClassObject theTypeType() { + py_special_objects(result, "type") +} + +/** The builtin object ClassType (for old-style classes) */ +ClassObject theClassType() { + py_special_objects(result, "ClassType") +} + +/** The builtin object InstanceType (for old-style classes) */ +ClassObject theInstanceType() { + py_special_objects(result, "InstanceType") +} + +/** The builtin class 'tuple' */ +ClassObject theTupleType() { + py_special_objects(result, "tuple") +} + +/** The builtin class 'int' */ +ClassObject theIntType() { + py_special_objects(result, "int") +} + +/** The builtin class 'long' (Python 2 only) */ +ClassObject theLongType() { + py_special_objects(result, "long") +} + +/** The builtin class 'float' */ +ClassObject theFloatType() { + py_special_objects(result, "float") +} + +/** The builtin class 'complex' */ +ClassObject theComplexType() { + py_special_objects(result, "complex") +} + +/** The builtin class 'object' */ +ClassObject theObjectType() { + py_special_objects(result, "object") +} + +/** The builtin class 'list' */ +ClassObject theListType() { + py_special_objects(result, "list") +} + +/** The builtin class 'dict' */ + +ClassObject theDictType() { + py_special_objects(result, "dict") +} + +/** The builtin class 'Exception' */ + +ClassObject theExceptionType() { + py_special_objects(result, "Exception") +} + +/** The builtin class for unicode. unicode in Python2, str in Python3 */ +ClassObject theUnicodeType() { + py_special_objects(result, "unicode") +} + +/** The builtin class '(x)range' */ +ClassObject theRangeType() { + result = builtin_object("xrange") + or + major_version() = 3 and result = builtin_object("range") +} + +/** The builtin class for bytes. str in Python2, bytes in Python3 */ +ClassObject theBytesType() { + py_special_objects(result, "bytes") +} + +/** The builtin class 'set' */ +ClassObject theSetType() { + py_special_objects(result, "set") +} + +/** The builtin class 'property' */ +ClassObject thePropertyType() { + py_special_objects(result, "property") +} + +/** The builtin class 'BaseException' */ +ClassObject theBaseExceptionType() { + py_special_objects(result, "BaseException") +} + +/** The class of builtin-functions */ +ClassObject theBuiltinFunctionType() { + py_special_objects(result, "BuiltinFunctionType") +} + +/** The class of Python functions */ +ClassObject thePyFunctionType() { + py_special_objects(result, "FunctionType") +} + +/** The builtin class 'classmethod' */ +ClassObject theClassMethodType() { + py_special_objects(result, "ClassMethod") +} + +/** The builtin class 'staticmethod' */ +ClassObject theStaticMethodType() { + py_special_objects(result, "StaticMethod") +} + +/** The class of modules */ +ClassObject theModuleType() { + py_special_objects(result, "ModuleType") +} + +/** The class of generators */ +ClassObject theGeneratorType() { + py_special_objects(result, "generator") +} + +/** The builtin class 'TypeError' */ +ClassObject theTypeErrorType() { + py_special_objects(result, "TypeError") +} + +/** The builtin class 'AttributeError' */ +ClassObject theAttributeErrorType() { + py_special_objects(result, "AttributeError") +} + +/** The builtin class 'KeyError' */ +ClassObject theKeyErrorType() { + py_special_objects(result, "KeyError") +} + +/** The builtin class of bound methods */ +pragma [noinline] +ClassObject theBoundMethodType() { + py_special_objects(result, "MethodType") +} + +/** The builtin class of builtin properties */ +ClassObject theGetSetDescriptorType() { + py_special_objects(result, "GetSetDescriptorType") +} + +/** The method descriptor class */ +ClassObject theMethodDescriptorType() { + py_special_objects(result, "MethodDescriptorType") +} + +/** The class of builtin properties */ +ClassObject theBuiltinPropertyType() { + /* This is CPython specific */ + result.isC() and + result.getName() = "getset_descriptor" +} + +/** The builtin class 'IOError' */ +ClassObject theIOErrorType() { + result = builtin_object("IOError") +} + +/** The builtin class 'super' */ +ClassObject theSuperType() { + result = builtin_object("super") +} + +/** The builtin class 'StopIteration' */ +ClassObject theStopIterationType() { + result = builtin_object("StopIteration") +} + +/** The builtin class 'NotImplementedError' */ +ClassObject theNotImplementedErrorType() { + result = builtin_object("NotImplementedError") +} diff --git a/python/ql/src/semmle/python/types/Descriptors.qll b/python/ql/src/semmle/python/types/Descriptors.qll new file mode 100644 index 00000000000..168597d0eed --- /dev/null +++ b/python/ql/src/semmle/python/types/Descriptors.qll @@ -0,0 +1,59 @@ +import python +private import semmle.python.pointsto.PointsTo + +/** A bound method object, x.f where type(x) has a method f */ +class BoundMethod extends Object { + + BoundMethod() { + bound_method(this, _) + } + + /* Gets the method 'f' in 'x.f' */ + FunctionObject getMethod() { + bound_method(this, result) + } + +} + +private predicate bound_method(AttrNode binding, FunctionObject method) { + binding = method.getAMethodCall().getFunction() +} + +private predicate decorator_call(Object method, ClassObject decorator, FunctionObject func) { + exists(CallNode f | + method = f and + f.getFunction().refersTo(decorator) and + PointsTo::points_to(f.getArg(0), _, func, _, _) + ) +} + +/** A class method object. Either a decorated function or an explicit call to classmethod(f) */ +class ClassMethodObject extends Object { + + ClassMethodObject() { + PointsTo::class_method(this, _) + } + + FunctionObject getFunction() { + PointsTo::class_method(this, result) + } + + CallNode getACall() { + PointsTo::class_method_call(this, _, _, _, result) + } + +} + +/** A static method object. Either a decorated function or an explicit call to staticmethod(f) */ +class StaticMethodObject extends Object { + + StaticMethodObject() { + decorator_call(this, theStaticMethodType(), _) + } + + FunctionObject getFunction() { + decorator_call(this, theStaticMethodType(), result) + } + +} + diff --git a/python/ql/src/semmle/python/types/Exceptions.qll b/python/ql/src/semmle/python/types/Exceptions.qll new file mode 100644 index 00000000000..485c0112116 --- /dev/null +++ b/python/ql/src/semmle/python/types/Exceptions.qll @@ -0,0 +1,331 @@ +/** + * Analysis of exception raising and handling. + * + * In order to make this useful we make a number of assumptions. These are: + * 1. Typing errors (TypeError, NameError, AttributeError) are assumed to occur only if: + * a) Explicitly raised, e.g. raise TypeError() + * or + * b) Explicitly caught, e.g. except TypeError: + * 2. Asynchronous exceptions, MemoryError, KeyboardInterrupt are ignored. + * 3. Calls to unidentified objects can raise anything, unless it is an + * attribute named 'read' or 'write' in which case it can raise IOError. + */ + +import python + +/** Subset of ControlFlowNodes which might raise an exception */ +class RaisingNode extends ControlFlowNode { + + RaisingNode() { + exists(this.getAnExceptionalSuccessor()) + or + this.isExceptionalExit(_) + } + + /** Gets the CFG node for the exception, if and only if this RaisingNode is an explicit raise */ + ControlFlowNode getExceptionNode() { + exists(Raise r | + r = this.getNode() and result.getNode() = r.getRaised() and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + private predicate quits() { + this.(CallNode).getFunction().refersTo(quitterObject(_)) + } + + /** Gets the type of an exception that may be raised + at this control flow node */ + ClassObject getARaisedType() { + result = this.localRaisedType() + or + exists(FunctionObject func | + this = func.getACall() | + result = func.getARaisedType() + ) + or + result = systemExitRaise() + } + + pragma[noinline] + private ClassObject systemExitRaise() { + this.quits() and result = builtin_object("SystemExit") + } + + pragma [noinline, nomagic] + private ClassObject localRaisedType() { + result.isSubclassOf(theBaseExceptionType()) + and + ( + exists(ControlFlowNode ex | + ex = this.getExceptionNode() and + (ex.refersTo(result) or ex.refersTo(_, result, _)) + ) + or + this.getNode() instanceof ImportExpr and result = builtin_object("ImportError") + or + this.getNode() instanceof Print and result = theIOErrorType() + or + exists(ExceptFlowNode except | + except = this.getAnExceptionalSuccessor() and + except.handles(result) and + result = this.innateException() + ) + or + not exists(ExceptFlowNode except | except = this.getAnExceptionalSuccessor()) + and + sequence_or_mapping(this) and result = theLookupErrorType() + or + this.read_write_call() and result = theIOErrorType() + ) + } + + pragma [noinline] + ClassObject innateException() { + this.getNode() instanceof Attribute and result = theAttributeErrorType() + or + this.getNode() instanceof Name and result = theNameErrorType() + or + this.getNode() instanceof Subscript and result = theIndexErrorType() + or + this.getNode() instanceof Subscript and result = theKeyErrorType() + } + + /** Whether this control flow node raises an exception, + * but the type of the exception it raises cannot be inferred. */ + predicate raisesUnknownType() { + /* read/write calls are assumed to raise IOError (OSError for Py3) */ + not this.read_write_call() + and + ( + /* Call to an unknown object */ + this.getNode() instanceof Call and not exists(FunctionObject func | this = func.getACall()) + and not exists(ClassObject known | this.(CallNode).getFunction().refersTo(known)) + or + this.getNode() instanceof Exec + or + /* Call to a function raising an unknown type */ + exists(FunctionObject func | + this = func.getACall() | + func.raisesUnknownType() + ) + ) + } + + private predicate read_write_call() { + exists(string mname | mname = this.(CallNode).getFunction().(AttrNode).getName() | + mname = "read" or mname = "write" + ) + } + + /** Whether (as inferred by type inference) it is highly unlikely (or impossible) for control to flow from this to succ. + */ + predicate unlikelySuccessor(ControlFlowNode succ) { + succ = this.getAnExceptionalSuccessor() and + not this.viableExceptionEdge(succ, _) and + not this.raisesUnknownType() + or + exists(FunctionObject func | + func.getACall() = this and + func.neverReturns() and + succ = this.getASuccessor() and + not succ = this.getAnExceptionalSuccessor() and + // If result is yielded then func is likely to be some form of coroutine. + not succ.getNode() instanceof Yield + ) + or + this.quits() and + succ = this.getASuccessor() and + not succ = this.getAnExceptionalSuccessor() + } + + /** Whether it is considered plausible that 'raised' can be raised across the edge this-succ */ + predicate viableExceptionEdge(ControlFlowNode succ, ClassObject raised) { + raised.isLegalExceptionType() and + raised = this.getARaisedType() and + succ = this.getAnExceptionalSuccessor() and + ( + /* An 'except' that handles raised and there is no more previous handler */ + ((ExceptFlowNode)succ).handles(raised) and + not exists(ExceptFlowNode other, StmtList s, int i, int j | + not other = succ and other.handles(raised) and + s.getItem(i) = succ.getNode() and s.getItem(j) = other.getNode() + | + j < i + ) + or + /* Any successor that is not an 'except', provided that 'raised' is not handled by a different successor. */ + (not ((ExceptFlowNode)this.getAnExceptionalSuccessor()).handles(raised) and + not succ instanceof ExceptFlowNode) + ) + } + + /** Whether this exceptional exit is viable. That is, is it + * plausible that the scope `s` can be exited with exception `raised` + * at this point. + */ + predicate viableExceptionalExit(Scope s, ClassObject raised) { + raised.isLegalExceptionType() and + raised = this.getARaisedType() and + this.isExceptionalExit(s) and + not ((ExceptFlowNode)this.getAnExceptionalSuccessor()).handles(raised) + } + +} + +/** Is this a sequence or mapping subscript x[i]? */ +private predicate sequence_or_mapping(RaisingNode r) { + r.getNode() instanceof Subscript +} + +private predicate current_exception(ClassObject ex, BasicBlock b) { + exists(RaisingNode r | + r.viableExceptionEdge(b.getNode(0), ex) and not b.getNode(0) instanceof ExceptFlowNode + ) + or + exists(BasicBlock prev | + current_exception(ex, prev) and + exists(ControlFlowNode pred, ControlFlowNode succ | + pred = prev.getLastNode() and succ = b.getNode(0) | + pred.getASuccessor() = succ and + (/* Normal control flow */ + not pred.getAnExceptionalSuccessor() = succ or + /* Re-raise the current exception, propagating to the successor */ + pred instanceof ReraisingNode) + ) + ) +} + +private predicate unknown_current_exception(BasicBlock b) { + exists(RaisingNode r | + r.raisesUnknownType() and + r.getAnExceptionalSuccessor() = b.getNode(0) and + not b.getNode(0) instanceof ExceptFlowNode + ) + or + exists(BasicBlock prev | + unknown_current_exception(prev) and + exists(ControlFlowNode pred, ControlFlowNode succ | + pred = prev.getLastNode() and succ = b.getNode(0) | + pred.getASuccessor() = succ and + (not pred.getAnExceptionalSuccessor() = succ or pred instanceof ReraisingNode) + ) + ) +} + +/** INTERNAL -- Use FunctionObject.getARaisedType() instead */ +predicate scope_raises(ClassObject ex, Scope s) { + exists(BasicBlock b | + current_exception(ex, b) and + b.getLastNode().isExceptionalExit(s) | + b.getLastNode() instanceof ReraisingNode + ) + or + exists(RaisingNode r | r.viableExceptionalExit(s, ex)) +} + +/** INTERNAL -- Use FunctionObject.raisesUnknownType() instead */ +predicate scope_raises_unknown(Scope s) { + exists(BasicBlock b | + b.getLastNode() instanceof ReraisingNode + and b.getLastNode().isExceptionalExit(s) | + unknown_current_exception(b) + ) + or + exists(RaisingNode r | + r.raisesUnknownType() and + r.isExceptionalExit(s) + ) +} + + +/** ControlFlowNode for an 'except' statement. */ +class ExceptFlowNode extends ControlFlowNode { + + ExceptFlowNode() { + this.getNode() instanceof ExceptStmt + } + + ControlFlowNode getType() { + exists(ExceptStmt ex | + this.getBasicBlock().dominates(result.getBasicBlock()) and + ex = this.getNode() and result = ex.getType().getAFlowNode() + ) + } + + ControlFlowNode getName() { + exists(ExceptStmt ex | + this.getBasicBlock().dominates(result.getBasicBlock()) and + ex = this.getNode() and result = ex.getName().getAFlowNode() + ) + } + + private predicate handledObject(Object obj, ClassObject cls, ControlFlowNode origin) { + this.getType().refersTo(obj, cls, origin) + or + exists(Object tup | + this.handledObject(tup, theTupleType(), _) | + element_from_tuple(tup).refersTo(obj, cls, origin) + ) + } + + /** Gets the inferred type(s) that are handled by this node, splitting tuples if possible. */ + pragma [noinline] + predicate handledException(Object obj, ClassObject cls, ControlFlowNode origin) { + this.handledObject(obj, cls, origin) and not cls = theTupleType() + or + not exists(this.getNode().(ExceptStmt).getType()) and obj = theBaseExceptionType() and cls = theTypeType() and + origin = this + } + + /** Whether this `except` handles `cls` */ + predicate handles(ClassObject cls) { + exists(ClassObject handled | + this.handledException(handled, _, _) | + cls.getAnImproperSuperType() = handled + ) + } + +} + +private ControlFlowNode element_from_tuple(Object tuple) { + exists(Tuple t | + t = tuple.getOrigin() and result = t.getAnElt().getAFlowNode() + ) +} + +/** A Reraising node is the node at the end of a finally block (on the exceptional branch) + * that reraises the current exception. + */ +class ReraisingNode extends RaisingNode { + + ReraisingNode() { + not this.getNode() instanceof Raise and + in_finally(this) and + forall(ControlFlowNode succ | + succ = this.getASuccessor() | + succ = this.getAnExceptionalSuccessor() + ) + } + + /** Gets a class that may be raised by this node */ + override ClassObject getARaisedType() { + exists(BasicBlock b | + current_exception(result, b) and + b.getNode(_) = this + ) + } + +} + +private predicate in_finally(ControlFlowNode n) { + exists(Stmt f | + exists(Try t | f = t.getAFinalstmt()) | + f = n.getNode() + or + f.containsInScope(n.getNode()) + ) +} + + + diff --git a/python/ql/src/semmle/python/types/Extensions.qll b/python/ql/src/semmle/python/types/Extensions.qll new file mode 100644 index 00000000000..619978ff325 --- /dev/null +++ b/python/ql/src/semmle/python/types/Extensions.qll @@ -0,0 +1,59 @@ +/** This library allows custom extensions to the points-to analysis to incorporate + * custom domain knowledge into the points-to analysis. + * + * This should be considered an advance feature. Modifying the points-to analysis + * can cause queries to give strange and misleading results, if not done with care. + */ + +import python +private import semmle.python.pointsto.PointsTo +private import semmle.python.pointsto.PointsToContext + +/* Custom Facts. This extension mechanism allows you to add custom + * sources of data to the points-to analysis. + */ + +abstract class CustomPointsToFact extends @py_flow_node { + + string toString() { none() } + + abstract predicate pointsTo(Context context, Object value, ClassObject cls, ControlFlowNode origin); + +} + +/* For backwards compatibility */ +class FinalCustomPointsToFact = CustomPointsToFact; + +abstract class CustomPointsToOriginFact extends CustomPointsToFact { + + abstract predicate pointsTo(Object value, ClassObject cls); + + override predicate pointsTo(Context context, Object value, ClassObject cls, ControlFlowNode origin) { + this.pointsTo(value, cls) and origin = this and context.appliesTo(this) + } + +} + +/* An example */ + +/** Any variable iterating over range or xrange must be an integer */ +class RangeIterationVariableFact extends CustomPointsToFact { + + RangeIterationVariableFact() { + exists(For f, ControlFlowNode iterable | + iterable.getBasicBlock().dominates(this.(ControlFlowNode).getBasicBlock()) and + f.getIter().getAFlowNode() = iterable and + f.getTarget().getAFlowNode() = this and + PointsTo::points_to(iterable, _, theRangeType(), _, _) + ) + } + + override predicate pointsTo(Context context, Object value, ClassObject cls, ControlFlowNode origin) { + value = this and + origin = this and + cls = theIntType() and + context.appliesTo(this) + } +} + + diff --git a/python/ql/src/semmle/python/types/FunctionObject.qll b/python/ql/src/semmle/python/types/FunctionObject.qll new file mode 100644 index 00000000000..dffc14a0e27 --- /dev/null +++ b/python/ql/src/semmle/python/types/FunctionObject.qll @@ -0,0 +1,379 @@ +import python +import semmle.python.types.Exceptions +private import semmle.python.pointsto.PointsTo +private import semmle.python.libraries.Zope +private import semmle.python.pointsto.Base + +/** A function object, whether written in Python or builtin */ +abstract class FunctionObject extends Object { + + predicate isOverridingMethod() { + exists(Object f | this.overrides(f)) + } + + predicate isOverriddenMethod() { + exists(Object f | f.overrides(this)) + } + + Function getFunction() { + result = ((CallableExpr)this.getOrigin()).getInnerScope() + } + + /** This function always returns None, meaning that its return value should be disregarded */ + abstract predicate isProcedure(); + + /** Gets the name of this function */ + abstract string getName(); + + /** Gets a class that may be raised by this function */ + abstract ClassObject getARaisedType(); + + /** Whether this function raises an exception, the class of which cannot be inferred */ + abstract predicate raisesUnknownType(); + + /** Use descriptiveString() instead. */ + deprecated string prettyString() { + result = this.descriptiveString() + } + + /** Gets a longer, more descriptive version of toString() */ + abstract string descriptiveString(); + + /** Gets a call-site from where this function is called as a function */ + CallNode getAFunctionCall() { + PointsTo::function_call(this, _, result) + } + + /** Gets a call-site from where this function is called as a method */ + CallNode getAMethodCall() { + PointsTo::method_call(this, _, result) + } + + /** Gets a call-site from where this function is called */ + ControlFlowNode getACall() { + result = PointsTo::get_a_call(this, _) + } + + /** Gets a call-site from where this function is called, given the `context` */ + ControlFlowNode getACall(Context caller_context) { + result = PointsTo::get_a_call(this, caller_context) + } + + /** Gets the `ControlFlowNode` that will be passed as the nth argument to `this` when called at `call`. + This predicate will correctly handle `x.y()`, treating `x` as the zeroth argument. + */ + ControlFlowNode getArgumentForCall(CallNode call, int n) { + result = PointsTo::get_positional_argument_for_call(this, _, call, n) + } + + /** Gets the `ControlFlowNode` that will be passed as the named argument to `this` when called at `call`. + This predicate will correctly handle `x.y()`, treating `x` as the self argument. + */ + ControlFlowNode getNamedArgumentForCall(CallNode call, string name) { + result = PointsTo::get_named_argument_for_call(this, _, call, name) + } + + /** Whether this function never returns. This is an approximation. + */ + predicate neverReturns() { + PointsTo::function_never_returns(this) + } + + /** Whether this is a "normal" method, that is, it is exists as a class attribute + * which is not wrapped and not the __new__ method. */ + predicate isNormalMethod() { + exists(ClassObject cls, string name | + cls.declaredAttribute(name) = this and + name != "__new__" and + not this.getOrigin() instanceof Lambda + ) + } + + /** Gets the minimum number of parameters that can be correctly passed to this function */ + abstract int minParameters(); + + /** Gets the maximum number of parameters that can be correctly passed to this function */ + abstract int maxParameters(); + + /** Gets a function that this function (directly) calls */ + FunctionObject getACallee() { + exists(ControlFlowNode node | + node.getScope() = this.getFunction() and + result.getACall() = node + ) + } + + /** Gets the qualified name for this function object. + * Should return the same name as the `__qualname__` attribute on functions in Python 3. + */ + abstract string getQualifiedName(); + + /** Whether `name` is a legal argument name for this function */ + bindingset[name] + predicate isLegalArgumentName(string name) { + this.getFunction().getAnArg().asName().getId() = name + or + this.getFunction().getAKeywordOnlyArg().getId() = name + or + this.getFunction().hasKwArg() + } + + /** Gets a class that this function may return */ + ClassObject getAnInferredReturnType() { + result = this.(BuiltinCallable).getAReturnType() + } + + predicate isAbstract() { + this.getARaisedType() = theNotImplementedErrorType() + } + +} + +class PyFunctionObject extends FunctionObject { + + PyFunctionObject() { + this.getOrigin() instanceof CallableExpr + } + + override string toString() { + result = "Function " + this.getName() + } + + override string getName() { + result = ((FunctionExpr)this.getOrigin()).getName() + or + this.getOrigin() instanceof Lambda and result = "lambda" + } + + /** Whether this function is a procedure, that is, it has no explicit return statement and is not a generator function */ + override predicate isProcedure() { + this.getFunction().isProcedure() + } + + override ClassObject getARaisedType() { + scope_raises(result, this.getFunction()) + } + + override predicate raisesUnknownType() { + scope_raises_unknown(this.getFunction()) + } + + /** Gets a control flow node corresponding to the value of a return statement */ + ControlFlowNode getAReturnedNode() { + exists(Return ret | + ret.getScope() = this.getFunction() and + result.getNode() = ret.getValue() + ) + } + + override string descriptiveString() { + if this.getFunction().isMethod() then ( + exists(Class cls | + this.getFunction().getScope() = cls | + result = "method " + this.getQualifiedName() + ) + ) else ( + result = "function " + this.getQualifiedName() + ) + } + + override int minParameters() { + exists(Function f | + f = this.getFunction() and + result = count(f.getAnArg()) - count(f.getDefinition().getArgs().getADefault()) + ) + } + + override int maxParameters() { + exists(Function f | + f = this.getFunction() and + if exists(f.getVararg()) then + result = 2147483647 // INT_MAX + else + result = count(f.getAnArg()) + ) + } + + override string getQualifiedName() { + result = this.getFunction().getQualifiedName() + } + + predicate unconditionallyReturnsParameter(int n) { + exists(SsaVariable pvar | + exists(Parameter p | p = this.getFunction().getArg(n) | + p.asName().getAFlowNode() = pvar.getDefinition() + ) and + exists(NameNode rval | + rval = pvar.getAUse() and + exists(Return r | r.getValue() = rval.getNode()) and + rval.strictlyDominates(rval.getScope().getANormalExit()) + ) + ) + } + + /** Factored out to help join ordering */ + private predicate implicitlyReturns(Object none_, ClassObject noneType) { + noneType = theNoneType() and not this.getFunction().isGenerator() and none_ = theNoneObject() and + ( + not exists(this.getAReturnedNode()) and exists(this.getFunction().getANormalExit()) + or + exists(Return ret | ret.getScope() = this.getFunction() and not exists(ret.getValue())) + ) + } + + /** Gets a class that this function may return */ + override ClassObject getAnInferredReturnType() { + this.getFunction().isGenerator() and result = theGeneratorType() + or + not this.neverReturns() and not this.getFunction().isGenerator() and + ( + this.(PyFunctionObject).getAReturnedNode().refersTo( _, result, _) + or + this.implicitlyReturns(_, result) + ) + } + + ParameterDefinition getParameter(int n) { + result.getDefiningNode().getNode() = this.getFunction().getArg(n) + } + +} + +abstract class BuiltinCallable extends FunctionObject { + + abstract ClassObject getAReturnType(); + + override predicate isProcedure() { + forex(ClassObject rt | + rt = this.getAReturnType() | + rt = theNoneType() + ) + } + + abstract override string getQualifiedName(); + +} + +class BuiltinMethodObject extends BuiltinCallable { + + BuiltinMethodObject() { + py_cobjecttypes(this, theMethodDescriptorType()) + or + py_cobjecttypes(this, theBuiltinFunctionType()) and exists(ClassObject c | py_cmembers_versioned(c, _, this, major_version().toString())) + or + exists(ClassObject wrapper_descriptor | + py_cobjecttypes(this, wrapper_descriptor) and + py_cobjectnames(wrapper_descriptor, "wrapper_descriptor") + ) + } + + override string getQualifiedName() { + exists(ClassObject cls | + py_cmembers_versioned(cls, _, this, major_version().toString()) | + result = cls.getName() + "." + this.getName() + ) + or + not exists(ClassObject cls | py_cmembers_versioned(cls, _, this, major_version().toString())) and + result = this.getName() + } + + override string descriptiveString() { + result = "builtin-method " + this.getQualifiedName() + } + + override string getName() { + py_cobjectnames(this, result) + } + + override string toString() { + result = "Builtin-method " + this.getName() + } + + override ClassObject getARaisedType() { + /* Information is unavailable for C code in general */ + none() + } + + override predicate raisesUnknownType() { + /* Information is unavailable for C code in general */ + any() + } + + override int minParameters() { + none() + } + + override int maxParameters() { + none() + } + + override ClassObject getAReturnType() { + ext_rettype(this, result) + } + +} + +class BuiltinFunctionObject extends BuiltinCallable { + + BuiltinFunctionObject() { + py_cobjecttypes(this, theBuiltinFunctionType()) and exists(ModuleObject m | py_cmembers_versioned(m, _, this, major_version().toString())) + } + + override string getName() { + py_cobjectnames(this, result) + } + + override string getQualifiedName() { + result = this.getName() + } + + override string toString() { + result = "Builtin-function " + this.getName() + } + + override string descriptiveString() { + result = "builtin-function " + this.getName() + } + + override ClassObject getARaisedType() { + /* Information is unavailable for C code in general */ + none() + } + + override predicate raisesUnknownType() { + /* Information is unavailable for C code in general */ + any() + } + + override ClassObject getAReturnType() { + /* Enumerate the types of a few builtin functions, that the CPython analysis misses. + */ + this = builtin_object("hex") and result = theStrType() + or + this = builtin_object("oct") and result = theStrType() + or + this = builtin_object("intern") and result = theStrType() + or + /* Fix a few minor inaccuracies in the CPython analysis */ + ext_rettype(this, result) and not ( + this = builtin_object("__import__") and result = theNoneType() + or + this = builtin_object("compile") and result = theNoneType() + or + this = builtin_object("sum") + or + this = builtin_object("filter") + ) + } + + override int minParameters() { + none() + } + + override int maxParameters() { + none() + } + +} + + diff --git a/python/ql/src/semmle/python/types/IgnoredAndApproximations.txt b/python/ql/src/semmle/python/types/IgnoredAndApproximations.txt new file mode 100644 index 00000000000..2b2540e35ee --- /dev/null +++ b/python/ql/src/semmle/python/types/IgnoredAndApproximations.txt @@ -0,0 +1,47 @@ +List of approximations used and semantic details that are ignored +================================================================= + +1. Metaclass __getattribute___ + +Attribute lookup on a class, but not its instances, used the __getattribute__() +method of the metaclass. I doubt anyone every overrides this method except for debugging +or testing frameworks, so we ignore the possibility that for a class C +(type(C)).__getattribute__ != type.__getattribute__ + +2. Class __getattribute__ +Many analyses are context-insensitive. For those analyses any instance of a class that +defines __getattribute__ are treated as having unknowable attributes. + +3. Class and Function descriptors +Class and Function descriptors provide a challenge. +The resulting entity is the result of calling the descriptor with the function as input: +@dec +def f(): pass +is equivalent to f = dec(f) + +and decorators themselves can be the result of calling a higher-order function +and can, also be themselves decorated. + +This clearly requires context sensitive analysis. +@dec(x): +def f(): pass +is equivalent to f = dec(x)(f) +but in a context-insensitive context. +Need a method: +Object decoratored_function(Object decorator, Object undecorated); +But what is the decorator and what object is available as a result? +Need to create an object for each decorator of a class or function. +That should be the actual Object. + +There is an assumption that each Object has a one-to-one mapping with a FlowNode +adding extra Objects for decorators might be a problem, since the decorator 'dec' +will point to another object (it could even points to itself if it were a lambda), +yet we need an Object for each level of decorated function. +To do this all decorated function object have the (Function|Class)Expr as + origin. This requires that the getOrigin() method will need refinement for those + QL types. + + + + + diff --git a/python/ql/src/semmle/python/types/ImportTime.qll b/python/ql/src/semmle/python/types/ImportTime.qll new file mode 100644 index 00000000000..0a9e7c9d145 --- /dev/null +++ b/python/ql/src/semmle/python/types/ImportTime.qll @@ -0,0 +1,35 @@ +import python + +/** An ImportTimeScope is any scope that is not nested within a function and will thus be executed if its + * enclosing module is imported. + * Note however, that if a scope is not an ImportTimeScope it may still be executed at import time. + * This is an artificial approximation, which is necessary for static analysis. + */ +class ImportTimeScope extends Scope { + + ImportTimeScope() { + not this.getEnclosingScope*() instanceof Function + } + + /** Whether this scope explicitly defines 'name'. + * Does not cover implicit definitions be import * */ + pragma[nomagic] + predicate definesName(string name) { + exists(SsaVariable var | name = var.getId() and var.getAUse() = this.getANormalExit()) + } + + /** Holds if the control flow passes from `outer` to `inner` when this scope starts executing */ + predicate entryEdge(ControlFlowNode outer, ControlFlowNode inner) { + inner = this.getEntryNode() and + outer.getNode().(ClassExpr).getInnerScope() = this + } + + /** Gets the global variable that is used during lookup, should `var` be undefined. */ + GlobalVariable getOuterVariable(LocalVariable var) { + this instanceof Class and + var.getScope() = this and + result.getScope() = this.getEnclosingModule() and + var.getId() = result.getId() + } + +} diff --git a/python/ql/src/semmle/python/types/ModuleKind.qll b/python/ql/src/semmle/python/types/ModuleKind.qll new file mode 100644 index 00000000000..abb39626ff1 --- /dev/null +++ b/python/ql/src/semmle/python/types/ModuleKind.qll @@ -0,0 +1,41 @@ +import python + +private predicate is_normal_module(ModuleObject m) { + m instanceof BuiltinModuleObject + or + m instanceof PackageObject + or + exists(ImportingStmt i | m.importedAs(i.getAnImportedModuleName())) + or + m.getName().matches("%\\_\\_init\\_\\_") +} + +private predicate is_script(ModuleObject m) { + not is_normal_module(m) and ( + m.getModule().getFile().getExtension() != ".py" + or + exists(If i, Name name, StrConst main, Cmpop op | + i.getScope() = m.getModule() and + op instanceof Eq and + i.getTest().(Compare).compares(name, op, main) and + name.getId() = "__name__" and main.getText() = "__main__" + ) + ) +} + +private predicate is_plugin(ModuleObject m) { + // This needs refining but is sufficient for our present needs. + not is_normal_module(m) and + not is_script(m) +} + +/** Gets the kind for module `m` will be one of + * "module", "script" or "plugin" + */ +string getKindForModule(ModuleObject m) { + is_normal_module(m) and result = "module" + or + is_script(m) and result = "script" + or + is_plugin(m) and result = "plugin" +} diff --git a/python/ql/src/semmle/python/types/ModuleObject.qll b/python/ql/src/semmle/python/types/ModuleObject.qll new file mode 100644 index 00000000000..6a05cf0afe3 --- /dev/null +++ b/python/ql/src/semmle/python/types/ModuleObject.qll @@ -0,0 +1,259 @@ +import python +private import semmle.python.pointsto.PointsTo +private import semmle.python.pointsto.Base +private import semmle.python.types.ModuleKind + +abstract class ModuleObject extends Object { + + ModuleObject () { + exists(Module m | m.getEntryNode() = this) + or + py_cobjecttypes(this, theModuleType()) + } + + /** Gets the scope corresponding to this module, if this is a Python module */ + Module getModule() { + none() + } + + Container getPath() { + none() + } + + /** Gets the name of this scope */ + abstract string getName(); + + override string toString() { + result = "Module " + this.getName() + } + + /** Gets the named attribute of this module. Using attributeRefersTo() instead + * may provide better results for presentation. */ + pragma [noinline] + abstract Object getAttribute(string name); + + /** Whether the named attribute of this module "refers-to" value, with a known origin. + */ + abstract predicate attributeRefersTo(string name, Object value, ControlFlowNode origin); + + /** Whether the named attribute of this module "refers-to" value, with known class and a known origin. + */ + abstract predicate attributeRefersTo(string name, Object value, ClassObject cls, ControlFlowNode origin); + + /** Gets the package for this module. */ + PackageObject getPackage() { + this.getName().matches("%.%") and + result.getName() = this.getName().regexpReplaceAll("\\.[^.]*$", "") + } + + /** Whether this module "exports" `name`. That is, whether using `import *` on this module + will result in `name` being added to the namespace. */ + predicate exports(string name) { + PointsTo::module_exports(this, name) + } + + /** Whether the complete set of names "exported" by this module can be accurately determined */ + abstract predicate exportsComplete(); + + /** Gets the short name of the module. For example the short name of module x.y.z is 'z' */ + string getShortName() { + result = this.getName().suffix(this.getPackage().getName().length()+1) + or + result = this.getName() and not exists(this.getPackage()) + } + + /** Whether this module is imported by 'import name'. For example on a linux system, + * the module 'posixpath' is imported as 'os.path' or as 'posixpath' */ + predicate importedAs(string name) { + PointsTo::module_imported_as(this, name) + } + + abstract predicate hasAttribute(string name); + + ModuleObject getAnImportedModule() { + result.importedAs(this.getModule().getAnImportedModuleName()) + } + + /** Gets the kind for this module. Will be one of + * "module", "script" or "plugin". + */ + string getKind() { + result = getKindForModule(this) + } + + override boolean booleanValue() { + result = true + } + +} + +class BuiltinModuleObject extends ModuleObject { + + BuiltinModuleObject () { + py_cobjecttypes(this, theModuleType()) + } + + override string getName() { + py_cobjectnames(this, result) + } + + override Object getAttribute(string name) { + py_cmembers_versioned(this, name, result, major_version().toString()) + } + + override predicate hasAttribute(string name) { + py_cmembers_versioned(this, name, _, major_version().toString()) + } + + override predicate attributeRefersTo(string name, Object value, ControlFlowNode origin) { + none() + } + + override predicate attributeRefersTo(string name, Object value, ClassObject cls, ControlFlowNode origin) { + none() + } + + override predicate exportsComplete() { + any() + } + + +} + +class PythonModuleObject extends ModuleObject { + + PythonModuleObject() { + exists(Module m | m.getEntryNode() = this | + not m.isPackage() + ) + } + + override string getName() { + result = this.getModule().getName() + } + + override Module getModule() { + result = this.getOrigin() + } + + override Container getPath() { + result = this.getModule().getFile() + } + + override Object getAttribute(string name) { + this.attributeRefersTo(name, result, _, _) + } + + override predicate exportsComplete() { + exists(Module m | + m = this.getModule() | + not exists(Call modify, Attribute attr, GlobalVariable all | + modify.getScope() = m and modify.getFunc() = attr and + all.getId() = "__all__" | + attr.getObject().(Name).uses(all) + ) + ) + } + + override predicate hasAttribute(string name) { + PointsTo::module_defines_name(this.getModule(), name) + or + this.attributeRefersTo(name, _, _, _) + or + /* The interpreter always adds the __name__ and __package__ attributes */ + name = "__name__" or name = "__package__" + } + + override predicate attributeRefersTo(string name, Object value, ControlFlowNode origin) { + PointsTo::py_module_attributes(this.getModule(), name, value, _, origin) + } + + override predicate attributeRefersTo(string name, Object value, ClassObject cls, ControlFlowNode origin) { + PointsTo::py_module_attributes(this.getModule(), name, value, cls, origin) + } + +} + +/** Primarily for internal use. + * + * Gets the object for the string text. + * The extractor will have populated a str object + * for each module name, with the name b'text' or u'text' (including the quotes). + */ +Object object_for_string(string text) { + py_cobjecttypes(result, theStrType()) and + exists(string repr | + py_cobjectnames(result, repr) and + repr.charAt(1) = "'" | + /* Strip quotes off repr */ + text = repr.substring(2, repr.length()-1) + ) +} + +class PackageObject extends ModuleObject { + + PackageObject() { + exists(Module p | p.getEntryNode() = this | + p.isPackage() + ) + } + + override string getName() { + result = this.getModule().getName() + } + + override Module getModule() { + result = this.getOrigin() + } + + override Container getPath() { + exists(ModuleObject m | + m.getPackage() = this | + result = m.getPath().getParent() + ) + } + + ModuleObject submodule(string name) { + result.getPackage() = this and + name = result.getShortName() + } + + override Object getAttribute(string name) { + PointsTo::package_attribute_points_to(this, name, result, _, _) + } + + PythonModuleObject getInitModule() { + result.getModule() = this.getModule().getInitModule() + } + + override predicate exportsComplete() { + not exists(this.getInitModule()) + or + this.getInitModule().exportsComplete() + } + + override predicate hasAttribute(string name) { + exists(this.submodule(name)) + or + this.getInitModule().hasAttribute(name) + } + + override predicate attributeRefersTo(string name, Object value, ControlFlowNode origin) { + PointsTo::package_attribute_points_to(this, name, value, _, origin) + } + + override predicate attributeRefersTo(string name, Object value, ClassObject cls, ControlFlowNode origin) { + PointsTo::package_attribute_points_to(this, name, value, cls, origin) + } + + Location getLocation() { + none() + } + + override predicate hasLocationInfo(string path, int bl, int bc, int el, int ec) { + path = this.getPath().getName() and + bl = 0 and bc = 0 and el = 0 and ec = 0 + } + +} + diff --git a/python/ql/src/semmle/python/types/Object.qll b/python/ql/src/semmle/python/types/Object.qll new file mode 100644 index 00000000000..10b569ed076 --- /dev/null +++ b/python/ql/src/semmle/python/types/Object.qll @@ -0,0 +1,518 @@ +import python +private import semmle.python.pointsto.Base + +private cached predicate is_an_object(@py_object obj) { + /* CFG nodes for numeric literals, all of which have a @py_cobject for the value of that literal */ + not obj.(ControlFlowNode).getNode() instanceof ImmutableLiteral + and + not ( + /* @py_cobjects for modules which have a corresponding Python module */ + exists(@py_cobject mod_type | py_special_objects(mod_type, "ModuleType") and py_cobjecttypes(obj, mod_type)) and + exists(Module m | py_cobjectnames(obj, m.getName())) + ) +} + +/** Instances of this class represent objects in the Python program. However, since + * the QL database is static and Python programs are dynamic, there are necessarily a + * number of approximations. + * + * Each point in the control flow graph where a new object can be created is treated as + * an object. Many builtin objects, such as integers, strings and builtin classes, are + * also treated as 'objects'. Hence each 'object', that is an instance of this class, + * represents a set of actual Python objects in the actual program. + * + * Ideally each set would contain only one member, but that is not possible in practice. + * Many instances of this class will represent many actual Python objects, especially + * if the point in the control flow graph to which they refer is in a loop. Others may not + * refer to any objects. However, for many important objects such as classes and functions, + * there is a one-to-one relation. + */ +class Object extends @py_object { + + Object() { + is_an_object(this) + } + + /** Gets an inferred type for this object, without using inter-procedural analysis. + * WARNING: The lack of context makes this less accurate than f.refersTo(this, result, _) + * for a control flow node 'f' */ + ClassObject getAnInferredType() { + exists(ControlFlowNode somewhere | somewhere.refersTo(this, result, _)) + or + py_cobjecttypes(this, result) and not this = unknownValue() + or + this = unknownValue() and result = theUnknownType() + } + + /** Whether this a builtin object. A builtin object is one defined by the implementation, + such as the integer 4 or by a native extension, such as a NumPy array class. */ + predicate isBuiltin() { + py_cobjects(this) + } + + /** Retained for backwards compatibility. See Object.isBuiltin() */ + predicate isC() { + this.isBuiltin() + } + + /** Gets the point in the source code from which this object "originates". + * + * WARNING: The lack of context makes this less accurate than f.refersTo(this, _, result) + * for a control flow node 'f'. + */ + AstNode getOrigin() { + py_flow_bb_node(this, result, _, _) + } + + private predicate hasOrigin() { + py_flow_bb_node(this, _, _, _) + } + + predicate hasLocationInfo(string filepath, int bl, int bc, int el, int ec) { + this.hasOrigin() and this.getOrigin().getLocation().hasLocationInfo(filepath, bl, bc, el, ec) + or + not this.hasOrigin() and filepath = ":Compiled Code" and bl = 0 and bc = 0 and el = 0 and ec = 0 + } + + string toString() { + this.isC() and + not this = undefinedVariable() and not this = unknownValue() and + exists(ClassObject type, string typename, string objname | + py_cobjecttypes(this, type) and py_cobjectnames(this, objname) and typename = type.getName() | + result = typename + " " + objname + ) + or + result = this.getOrigin().toString() + } + + /** Gets the class of this object for simple cases, namely constants, functions, + * comprehensions and built-in objects. + * + * This exists primarily for internal use. Use getAnInferredType() instead. + */ + cached ClassObject simpleClass() { + result = comprehension(this.getOrigin()) + or + result = collection_literal(this.getOrigin()) + or + result = string_literal(this.getOrigin()) + or + this.getOrigin() instanceof CallableExpr and result = thePyFunctionType() + or + this.getOrigin() instanceof Module and result = theModuleType() + or + py_cobjecttypes(this, result) + } + + private + ClassObject declaringClass(string name) { + result.declaredAttribute(name) = this + } + + /** Whether this overrides o. In this context, "overrides" means that this object + * is a named attribute of a some class C and `o` is a named attribute of another + * class S, both attributes having the same name, and S is a super class of C. + */ + predicate overrides(Object o) { + exists(string name | + declaringClass(name).getASuperType() = o.declaringClass(name) + ) + } + + /** The Boolean value of this object if it always evaluates to true or false. + * For example: + * false for None, true for 7 and no result for int(x) + */ + boolean booleanValue() { + this = theNoneObject() and result = false + or + this = theTrueObject() and result = true + or + this = theFalseObject() and result = false + or + this = theEmptyTupleObject() and result = false + or + exists(Tuple t | t = this.getOrigin() | + exists(t.getAnElt()) and result = true + or + not exists(t.getAnElt()) and result = false + ) + or + exists(Unicode s | s.getLiteralObject() = this | + s.getS() = "" and result = false + or + s.getS() != "" and result = true + ) + or + exists(Bytes s | s.getLiteralObject() = this | + s.getS() = "" and result = false + or + s.getS() != "" and result = true + ) + } + + /** Holds if this object can be referred to by `longName` + * For example, the modules `dict` in the `sys` module + * has the long name `sys.modules` and the name `os.path.join` + * will refer to the path joining function even though it might + * be declared in the `posix` or `nt` modules. + * Long names can have no more than three dots after the module name. + */ + cached predicate hasLongName(string longName) { + this = findByName0(longName) or + this = findByName1(longName) or + this = findByName2(longName) or + this = findByName3(longName) or + exists(ClassMethodObject cm | + cm.hasLongName(longName) and + cm.getFunction() = this + ) + or + exists(StaticMethodObject cm | + cm.hasLongName(longName) and + cm.getFunction() = this + ) + } + +} + +private Object findByName0(string longName) { + result.(ModuleObject).getName() = longName +} + +private Object findByName1(string longName) { + exists(string owner, string attrname | + longName = owner + "." + attrname + | + result = findByName0(owner).(ModuleObject).getAttribute(attrname) + or + result = findByName0(owner).(ClassObject).lookupAttribute(attrname) + ) + and + not result = findByName0(_) +} + +private Object findByName2(string longName) { + exists(string owner, string attrname | + longName = owner + "." + attrname + | + result = findByName1(owner).(ModuleObject).getAttribute(attrname) + or + result = findByName1(owner).(ClassObject).lookupAttribute(attrname) + ) + and not result = findByName0(_) + and not result = findByName1(_) +} + +private Object findByName3(string longName) { + exists(string owner, string attrname | + longName = owner + "." + attrname + | + result = findByName2(owner).(ModuleObject).getAttribute(attrname) + or + result = findByName2(owner).(ClassObject).lookupAttribute(attrname) + ) + and not result = findByName0(_) + and not result = findByName1(_) + and not result = findByName2(_) +} + + +/** Numeric objects (ints and floats). + * Includes those occurring in the source as a literal + * or in a builtin module as a value. + */ +class NumericObject extends Object { + + NumericObject() { + py_cobjecttypes(this, theIntType()) or + py_cobjecttypes(this, theLongType()) or + py_cobjecttypes(this, theFloatType()) + } + + /** Gets the Boolean value that this object + * would evaluate to in a Boolean context, + * such as `bool(x)` or `if x: ...` + */ + override boolean booleanValue() { + this.intValue() != 0 and result = true + or + this.intValue() = 0 and result = false + or + this.floatValue() != 0 and result = true + or + this.floatValue() = 0 and result = false + } + + /** Gets the value of this object if it is a constant integer and it fits in a QL int */ + int intValue() { + (py_cobjecttypes(this, theIntType()) or py_cobjecttypes(this, theLongType())) + and + exists(string s | py_cobjectnames(this, s) and result = s.toInt()) + } + + /** Gets the value of this object if it is a constant float */ + float floatValue() { + (py_cobjecttypes(this, theFloatType())) + and + exists(string s | py_cobjectnames(this, s) and result = s.toFloat()) + } + + /** Gets the string representation of this object, equivalent to calling repr() in Python */ + string repr() { + exists(string s | + py_cobjectnames(this, s) | + if py_cobjecttypes(this, theLongType()) then + result = s + "L" + else + result = s + ) + } + +} + +/** String objects (unicode or bytes). + * Includes those occurring in the source as a literal + * or in a builtin module as a value. + */ +class StringObject extends Object { + + StringObject() { + py_cobjecttypes(this, theUnicodeType()) or + py_cobjecttypes(this, theBytesType()) + } + + /** Whether this string is composed entirely of ascii encodable characters */ + predicate isAscii() { + this.getText().regexpMatch("^\\p{ASCII}*$") + } + + override boolean booleanValue() { + this.getText() = "" and result = false + or + this.getText() != "" and result = true + } + + /** Gets the text for this string */ + cached string getText() { + exists(string quoted_string | + py_cobjectnames(this, quoted_string) and + result = quoted_string.regexpCapture("[bu]'([\\s\\S]*)'", 1) + ) + } + +} + +/** Sequence objects (lists and tuples) + * Includes those occurring in the source as a literal + * or in a builtin module as a value. + */ +abstract class SequenceObject extends Object { + + /** Gets the length of this sequence */ + int getLength() { + result = strictcount(this.getBuiltinElement(_)) + or + result = strictcount(this.getSourceElement(_)) + } + + /** Gets the nth item of this builtin sequence */ + Object getBuiltinElement(int n) { + py_citems(this, n, result) + } + + /** Gets the nth source element of this sequence */ + ControlFlowNode getSourceElement(int n) { + result = this.(SequenceNode).getElement(n) + } + + Object getInferredElement(int n) { + result = this.getBuiltinElement(n) + or + this.getSourceElement(n).refersTo(result) + } + +} + +class TupleObject extends SequenceObject { + + TupleObject() { + py_cobjecttypes(this, theTupleType()) + or + this instanceof TupleNode + or + exists(Function func | func.getVararg().getAFlowNode() = this) + } + +} + +class NonEmptyTupleObject extends TupleObject { + + NonEmptyTupleObject() { + exists(Function func | func.getVararg().getAFlowNode() = this) + } + + override boolean booleanValue() { + result = true + } + +} + + +class ListObject extends SequenceObject { + + ListObject() { + py_cobjecttypes(this, theListType()) + or + this instanceof ListNode + } + +} + +/** The `builtin` module */ +BuiltinModuleObject theBuiltinModuleObject() { + py_special_objects(result, "builtin_module_2") and major_version() = 2 + or + py_special_objects(result, "builtin_module_3") and major_version() = 3 +} + +/** The `sys` module */ +BuiltinModuleObject theSysModuleObject() { + py_special_objects(result, "sys") +} + +Object builtin_object(string name) { + py_cmembers_versioned(theBuiltinModuleObject(), name, result, major_version().toString()) +} + +/** The built-in object None */ + Object theNoneObject() { + py_special_objects(result, "None") +} + +/** The built-in object True */ + Object theTrueObject() { + py_special_objects(result, "True") +} + +/** The built-in object False */ + Object theFalseObject() { + py_special_objects(result, "False") +} + +/** The builtin function apply (Python 2 only) */ + Object theApplyFunction() { + result = builtin_object("apply") +} + +/** The builtin function hasattr */ + Object theHasattrFunction() { + result = builtin_object("hasattr") +} + +/** The builtin function len */ + Object theLenFunction() { + result = builtin_object("len") +} + +/** The builtin function format */ + Object theFormatFunction() { + result = builtin_object("format") +} + +/** The builtin function open */ + Object theOpenFunction() { + result = builtin_object("open") +} + +/** The builtin function print (Python 2.7 upwards) */ + Object thePrintFunction() { + result = builtin_object("print") +} + +/** The builtin function input (Python 2 only) */ + Object theInputFunction() { + result = builtin_object("input") +} + +/** The builtin function locals */ + Object theLocalsFunction() { + py_special_objects(result, "locals") +} + +/** The builtin function globals */ + Object theGlobalsFunction() { + py_special_objects(result, "globals") +} + +/** The builtin function sys.exit */ + Object theExitFunctionObject() { + py_cmembers_versioned(theSysModuleObject(), "exit", result, major_version().toString()) +} + +/** The NameError class */ +Object theNameErrorType() { + result = builtin_object("NameError") +} + +/** The StandardError class */ +Object theStandardErrorType() { + result = builtin_object("StandardError") +} + +/** The IndexError class */ +Object theIndexErrorType() { + result = builtin_object("IndexError") +} + +/** The LookupError class */ +Object theLookupErrorType() { + result = builtin_object("LookupError") +} + +/** The named quitter object (quit or exit) in the builtin namespace */ +Object quitterObject(string name) { + (name = "quit" or name = "exit") and + result = builtin_object(name) +} + +/** The builtin object `NotImplemented`. Not be confused with `NotImplementedError`. */ +Object theNotImplementedObject() { + result = builtin_object("NotImplemented") +} + +Object theEmptyTupleObject() { + py_cobjecttypes(result, theTupleType()) and not py_citems(result, _, _) +} + + +private ClassObject comprehension(Expr e) { + e instanceof ListComp and result = theListType() + or + e instanceof SetComp and result = theSetType() + or + e instanceof DictComp and result = theDictType() + or + e instanceof GeneratorExp and result = theGeneratorType() +} + +private ClassObject collection_literal(Expr e) { + e instanceof List and result = theListType() + or + e instanceof Set and result = theSetType() + or + e instanceof Dict and result = theDictType() + or + e instanceof Tuple and result = theTupleType() +} + +private ClassObject string_literal(Expr e) { + e instanceof Bytes and result = theBytesType() + or + e instanceof Unicode and result = theUnicodeType() +} + +Object theUnknownType() { + py_special_objects(result, "_semmle_unknown_type") +} + diff --git a/python/ql/src/semmle/python/types/Properties.qll b/python/ql/src/semmle/python/types/Properties.qll new file mode 100644 index 00000000000..01a6e07155d --- /dev/null +++ b/python/ql/src/semmle/python/types/Properties.qll @@ -0,0 +1,144 @@ +import python + +/** A Python property: + * @property + * def f(): + * .... + * + * Also any instances of types.GetSetDescriptorType (which are equivalent, but implemented in C) + */ +abstract class PropertyObject extends Object { + + PropertyObject() { + property_getter(this, _) + or + py_cobjecttypes(this, theBuiltinPropertyType()) + } + + /** Gets the name of this property */ + abstract string getName(); + + /** Gets the getter of this property */ + abstract Object getGetter(); + + /** Gets the setter of this property */ + abstract Object getSetter(); + + /** Gets the deleter of this property */ + abstract Object getDeleter(); + + override string toString() { + result = "Property " + this.getName() + } + + /** Whether this property is read-only. */ + predicate isReadOnly() { + not exists(this.getSetter()) + } + + /** Gets an inferred type of this property. + * That is the type returned by its getter function, + * not the type of the property object which is types.PropertyType. */ + abstract ClassObject getInferredPropertyType(); + +} + + +class PythonPropertyObject extends PropertyObject { + + PythonPropertyObject() { + property_getter(this, _) + } + + override string getName() { + result = this.getGetter().getName() + } + + /** Gets the getter function of this property */ + override FunctionObject getGetter() { + property_getter(this, result) + } + + override ClassObject getInferredPropertyType() { + result = this.getGetter().getAnInferredReturnType() + } + + /** Gets the setter function of this property */ + override FunctionObject getSetter() { + property_setter(this, result) + } + + /** Gets the deleter function of this property */ + override FunctionObject getDeleter() { + property_deleter(this, result) + } + +} + +class BuiltinPropertyObject extends PropertyObject { + + BuiltinPropertyObject() { + py_cobjecttypes(this, theBuiltinPropertyType()) + } + + override string getName() { + py_cobjectnames(this, result) + } + + /** Gets the getter method wrapper of this property */ + override Object getGetter() { + py_cmembers_versioned(this, "__get__", result, major_version().toString()) + } + + override ClassObject getInferredPropertyType() { + none() + } + + /** Gets the setter method wrapper of this property */ + override Object getSetter() { + py_cmembers_versioned(this, "__set__", result, major_version().toString()) + } + + /** Gets the deleter method wrapper of this property */ + override Object getDeleter() { + py_cmembers_versioned(this, "__delete__", result, major_version().toString()) + } + +} + +private predicate property_getter(CallNode decorated, FunctionObject getter) { + decorated.getFunction().refersTo(thePropertyType()) + and + decorated.getArg(0).refersTo(getter) +} + +private predicate property_setter(CallNode decorated, FunctionObject setter) { + property_getter(decorated, _) + and + exists(CallNode setter_call, AttrNode prop_setter | + prop_setter.getObject("setter").refersTo((Object)decorated) | + setter_call.getArg(0).refersTo(setter) + and + setter_call.getFunction() = prop_setter + ) + or + decorated.getFunction().refersTo(thePropertyType()) + and + decorated.getArg(1).refersTo(setter) +} + +private predicate property_deleter(CallNode decorated, FunctionObject deleter) { + property_getter(decorated, _) + and + exists(CallNode deleter_call, AttrNode prop_deleter | + prop_deleter.getObject("deleter").refersTo((Object)decorated) | + deleter_call.getArg(0).refersTo(deleter) + and + deleter_call.getFunction() = prop_deleter + ) + or + decorated.getFunction().refersTo(thePropertyType()) + and + decorated.getArg(2).refersTo(deleter) +} + diff --git a/python/ql/src/semmle/python/types/Version.qll b/python/ql/src/semmle/python/types/Version.qll new file mode 100644 index 00000000000..7d722167a5b --- /dev/null +++ b/python/ql/src/semmle/python/types/Version.qll @@ -0,0 +1,169 @@ +import python +import semmle.python.GuardedControlFlow +private import semmle.python.pointsto.PointsTo + +/** A Version of the Python interpreter. + * Currently only 2.7 or 3.x but may include different sets of versions in the future. */ +class Version extends int { + + Version() { + this = 2 or this = 3 + } + + /** Holds if this version (or set of versions) includes the version `major`.`minor` */ + predicate includes(int major, int minor) { + this = 2 and major = 2 and minor = 7 + or + this = 3 and major = 3 and minor in [4..7] + } + +} + +Object theSysVersionInfoTuple() { + py_cmembers_versioned(theSysModuleObject(), "version_info", result, major_version().toString()) +} + +Object theSysHexVersionNumber() { + py_cmembers_versioned(theSysModuleObject(), "hexversion", result, major_version().toString()) +} + +Object theSysVersionString() { + py_cmembers_versioned(theSysModuleObject(), "version", result, major_version().toString()) +} + + +string reversed(Cmpop op) { + op instanceof Lt and result = ">" + or + op instanceof Gt and result = "<" + or + op instanceof GtE and result = "<=" + or + op instanceof LtE and result = ">=" + or + op instanceof Eq and result = "==" + or + op instanceof NotEq and result = "!=" +} + + +/** DEPRECATED: + * A test on the major version of the Python interpreter + * */ +class VersionTest extends @py_flow_node { + + string toString() { + result = "VersionTest" + } + + VersionTest() { + PointsTo::version_const(this, _, _) + } + + predicate isTrue() { + PointsTo::version_const(this, _, true) + } + + AstNode getNode() { + result = this.(ControlFlowNode).getNode() + } + +} + +/** A guard on the major version of the Python interpreter */ +class VersionGuard extends ConditionBlock { + + VersionGuard() { + exists(VersionTest v | + PointsTo::points_to(this.getLastNode(), _, v, _, _) or + PointsTo::points_to(this.getLastNode(), _, _, _, v) + ) + } + + predicate isTrue() { + exists(VersionTest v | + v.isTrue() | + PointsTo::points_to(this.getLastNode(), _, v, _, _) or + PointsTo::points_to(this.getLastNode(), _, _, _, v) + ) + } + +} + +string os_name(StrConst s) { + exists(string t | + t = s.getText() | + t = "Darwin" and result = "darwin" + or + t = "Windows" and result = "win32" + or + t = "Linux" and result = "linux" + or + not t = "Darwin" and not t = "Windows" and not t = "Linux" and result = t + ) +} + +predicate get_platform_name(Expr e) { + exists(Attribute a, Name n | a = e and n = a.getObject() | + n.getId() = "sys" and a.getName() = "platform" + ) + or + exists(Call c, Attribute a, Name n | + c = e and a = c.getFunc() and n = a.getObject() | + a.getName() = "system" and n.getId() = "platform" + ) +} + +predicate os_compare(ControlFlowNode f, string name) { + exists(Compare c, Expr l, Expr r, Cmpop op | + c = f.getNode() and + l = c.getLeft() and + r = c.getComparator(0) and + op = c.getOp(0) | + (op instanceof Eq or op instanceof Is) + and + ( get_platform_name(l) and name = os_name(r) + or + get_platform_name(r) and name = os_name(l) + ) + ) +} + +class OsTest extends @py_flow_node { + + OsTest() { + os_compare(this, _) + } + + string getOs() { + os_compare(this, result) + } + + string toString() { + result = "OsTest" + } + + AstNode getNode() { + result = this.(ControlFlowNode).getNode() + } + +} + + +class OsGuard extends ConditionBlock { + + OsGuard() { + exists(OsTest t | + PointsTo::points_to(this.getLastNode(), _, theBoolType(), t, _) + ) + } + + string getOs() { + exists(OsTest t | + PointsTo::points_to(this.getLastNode(), _, theBoolType(), t, _) and result = t.getOs() + ) + } + +} + + diff --git a/python/ql/src/semmle/python/values/StringAttributes.qll b/python/ql/src/semmle/python/values/StringAttributes.qll new file mode 100644 index 00000000000..86cc29dc5b3 --- /dev/null +++ b/python/ql/src/semmle/python/values/StringAttributes.qll @@ -0,0 +1,90 @@ +import python + +predicate string_attribute_all(ControlFlowNode n, string attr) { + (n.getNode() instanceof Unicode or n.getNode() instanceof Bytes) and attr = "const" + or + exists(Object s | + n.refersTo(s, theBytesType(), _) and attr = "bytes" and + // We are only interested in bytes if they may cause an exception if + // implicitly converted to unicode. ASCII is safe. + not s.(StringObject).isAscii() + ) +} + +predicate tracked_object(ControlFlowNode obj, string attr) { + tracked_object_all(obj, attr) + or + tracked_object_any(obj, attr) +} + +predicate open_file(Object obj) { + obj.(CallNode).getFunction().refersTo(theOpenFunction()) +} + +predicate string_attribute_any(ControlFlowNode n, string attr) { + attr = "user-input" and + exists(Object input | + n.(CallNode).getFunction().refersTo(input) | + if major_version() = 2 then + input = builtin_object("raw_input") + else + input = theInputFunction() + ) + or + attr = "file-input" and + exists(Object fd | n.(CallNode).getFunction().(AttrNode).getObject("read").refersTo(fd) | + open_file(fd) + ) + or + n.refersTo(_, theUnicodeType(), _) and attr = "unicode" +} + +predicate tracked_object_any(ControlFlowNode obj, string attr) { + string_attribute_any(obj, attr) + or + exists(ControlFlowNode other | + tracking_step(other, obj) | + tracked_object_any(other, attr) + ) +} + +predicate tracked_object_all(ControlFlowNode obj, string attr) { + string_attribute_all(obj, attr) + or + forex(ControlFlowNode other | + tracking_step(other, obj) | + tracked_object_all(other, attr) + ) +} + +predicate tracked_call_step(ControlFlowNode ret, ControlFlowNode call) { + exists(FunctionObject func, Return r | + func.getACall() = call and + func.getFunction() = r.getScope() and + r.getValue() = ret.getNode() + ) +} + +ControlFlowNode sequence_for_iterator(ControlFlowNode f) { + exists(For for | f.getNode() = for.getTarget() | + result.getNode() = for.getIter() and + result.getBasicBlock().dominates(f.getBasicBlock()) + ) +} + +pragma [noinline] +private predicate tracking_step(ControlFlowNode src, ControlFlowNode dest) { + src = dest.(BinaryExprNode).getAnOperand() + or + src = dest.(UnaryExprNode).getOperand() + or + src = sequence_for_iterator(dest) + or + src = dest.(AttrNode).getObject() + or + src = dest.(SubscriptNode).getValue() + or + tracked_call_step(src, dest) + or + dest.refersTo((Object)src) +} diff --git a/python/ql/src/semmle/python/web/Http.qll b/python/ql/src/semmle/python/web/Http.qll new file mode 100644 index 00000000000..5789fda7d86 --- /dev/null +++ b/python/ql/src/semmle/python/web/Http.qll @@ -0,0 +1,25 @@ +import python +import semmle.python.security.TaintTracking +import semmle.python.security.strings.External + +/** Generic taint source from a http request */ +abstract class SimpleHttpRequestTaintSource extends TaintSource { + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExternalStringKind + } + +} + +/** Gets an http verb */ +string httpVerb() { + result = "GET" or result = "POST" or + result = "PUT" or result = "PATCH" or + result = "DELETE" or result = "OPTIONS" or + result = "HEAD" +} + +/** Gets an http verb, in lower case */ +string httpVerbLower() { + result = httpVerb().toLowerCase() +} diff --git a/python/ql/src/semmle/python/web/HttpRedirect.qll b/python/ql/src/semmle/python/web/HttpRedirect.qll new file mode 100644 index 00000000000..f3df7cac80d --- /dev/null +++ b/python/ql/src/semmle/python/web/HttpRedirect.qll @@ -0,0 +1,8 @@ +import python + +import semmle.python.security.strings.Basic + +import semmle.python.web.django.Redirect +import semmle.python.web.flask.Redirect +import semmle.python.web.tornado.Redirect +import semmle.python.web.pyramid.Redirect diff --git a/python/ql/src/semmle/python/web/HttpRequest.qll b/python/ql/src/semmle/python/web/HttpRequest.qll new file mode 100644 index 00000000000..1566ac645dc --- /dev/null +++ b/python/ql/src/semmle/python/web/HttpRequest.qll @@ -0,0 +1,5 @@ +import semmle.python.web.django.Request +import semmle.python.web.flask.Request +import semmle.python.web.tornado.Request +import semmle.python.web.pyramid.Request +import semmle.python.web.twisted.Request diff --git a/python/ql/src/semmle/python/web/HttpResponse.qll b/python/ql/src/semmle/python/web/HttpResponse.qll new file mode 100644 index 00000000000..f38836d768b --- /dev/null +++ b/python/ql/src/semmle/python/web/HttpResponse.qll @@ -0,0 +1,5 @@ +import semmle.python.web.django.Response +import semmle.python.web.flask.Response +import semmle.python.web.pyramid.Response +import semmle.python.web.tornado.Response +import semmle.python.web.twisted.Response diff --git a/python/ql/src/semmle/python/web/django/Db.qll b/python/ql/src/semmle/python/web/django/Db.qll new file mode 100644 index 00000000000..e51955b154f --- /dev/null +++ b/python/ql/src/semmle/python/web/django/Db.qll @@ -0,0 +1,65 @@ +import python +import semmle.python.security.injection.Sql + +/** A taint kind representing a django cursor object. + */ +class DjangoDbCursor extends DbCursor { + + DjangoDbCursor() { + this = "django.db.connection.cursor" + } + +} + +private Object theDjangoConnectionObject() { + any(ModuleObject m | m.getName() = "django.db").getAttribute("connection") = result +} + +/** A kind of taint source representing sources of django cursor objects. + */ +class DjangoDbCursorSource extends DbConnectionSource { + + DjangoDbCursorSource() { + exists(AttrNode cursor | + this.(CallNode).getFunction()= cursor and + cursor.getObject("cursor").refersTo(theDjangoConnectionObject()) + ) + } + + override string toString() { + result = "django.db.connection.cursor" + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof DjangoDbCursor + } + +} + + +ClassObject theDjangoRawSqlClass() { + result = any(ModuleObject m | m.getName() = "django.db.models.expressions").getAttribute("RawSQL") +} + +/** + * A sink of taint on calls to `django.db.models.expressions.RawSQL`. This + * allows arbitrary SQL statements to be executed, which is a security risk. + */ + +class DjangoRawSqlSink extends TaintSink { + DjangoRawSqlSink() { + exists(CallNode call | + call = theDjangoRawSqlClass().getACall() and + this = call.getArg(0) + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + + override string toString() { + result = "django.db.models.expressions.RawSQL(sink,...)" + } +} + diff --git a/python/ql/src/semmle/python/web/django/Model.qll b/python/ql/src/semmle/python/web/django/Model.qll new file mode 100644 index 00000000000..d3b145e3a46 --- /dev/null +++ b/python/ql/src/semmle/python/web/django/Model.qll @@ -0,0 +1,156 @@ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Basic +import semmle.python.web.Http + +/** A django model class */ +class DjangoModel extends ClassObject { + + DjangoModel() { + any(ModuleObject m | m.getName() = "django.db.models").getAttribute("Model") = this.getAnImproperSuperType() + } + +} + +/** A "taint" for django database tables */ +class DjangoDbTableObjects extends TaintKind { + + DjangoDbTableObjects() { + this = "django.db.models.Model.objects" + } + + override TaintKind getTaintOfMethodResult(string name) { + result = this and + ( + name = "filter" or + name = "exclude" or + name = "annotate" or + name = "order_by" or + name = "reverse" or + name = "distinct" or + name = "values" or + name = "values_list" or + name = "dates" or + name = "datetimes" or + name = "none" or + name = "all" or + name = "union" or + name = "intersection" or + name = "difference" or + name = "select_related" or + name = "prefetch_related" or + name = "extra" or + name = "defer" or + name = "only" or + name = "using" or + name = "select_for_update" or + name = "raw" + ) + } +} + +/** Django model objects, which are sources of django database table "taint" */ +class DjangoModelObjects extends TaintSource { + + DjangoModelObjects() { + this.(AttrNode).isLoad() and this.(AttrNode).getObject("objects").refersTo(any(DjangoModel m)) + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof DjangoDbTableObjects + } + + override string toString() { + result = "django.db.models.Model.objects" + } + +} + +/** A write to a field of a django model, which is a vulnerable to external data. */ +class DjangoModelFieldWrite extends TaintSink { + + DjangoModelFieldWrite() { + exists(AttrNode attr, DjangoModel model | + this = attr and attr.isStore() and attr.getObject(_).refersTo(model) + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + + override string toString() { + result = "django model field write" + } + +} + +/** A direct reference to a django model object, which is a vulnerable to external data. */ +class DjangoModelDirectObjectReference extends TaintSink { + + DjangoModelDirectObjectReference() { + exists(CallNode objects_get_call, ControlFlowNode objects | + this = objects_get_call.getAnArg() | + objects_get_call.getFunction().(AttrNode).getObject("get") = objects and + any(DjangoDbTableObjects objs).taints(objects) + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + + override string toString() { + result = "django model object reference" + } +} + +/** + * A call to the `raw` method on a django model. This allows a raw SQL query + * to be sent to the database, which is a security risk. + */ + +class DjangoModelRawCall extends TaintSink { + + DjangoModelRawCall() { + exists(CallNode raw_call, ControlFlowNode queryset | + this = raw_call.getArg(0) | + raw_call.getFunction().(AttrNode).getObject("raw") = queryset and + any(DjangoDbTableObjects objs).taints(queryset) + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + + override string toString() { + result = "django.models.QuerySet.raw(sink,...)" + } +} + +/** + * A call to the `extra` method on a django model. This allows a raw SQL query + * to be sent to the database, which is a security risk. + */ + + +class DjangoModelExtraCall extends TaintSink { + + DjangoModelExtraCall() { + exists(CallNode extra_call, ControlFlowNode queryset | + this = extra_call.getArg(0) | + extra_call.getFunction().(AttrNode).getObject("extra") = queryset and + any(DjangoDbTableObjects objs).taints(queryset) + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + + override string toString() { + result = "django.models.QuerySet.extra(sink,...)" + } +} diff --git a/python/ql/src/semmle/python/web/django/Redirect.qll b/python/ql/src/semmle/python/web/django/Redirect.qll new file mode 100644 index 00000000000..a78c7a765ec --- /dev/null +++ b/python/ql/src/semmle/python/web/django/Redirect.qll @@ -0,0 +1,32 @@ +/** Provides class representing the `django.redirect` function. + * This module is intended to be imported into a taint-tracking query + * to extend `TaintSink`. + */ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Basic +private import semmle.python.web.django.Shared + + +/** + * Represents an argument to the `django.redirect` function. + */ +class DjangoRedirect extends TaintSink { + + override string toString() { + result = "django.redirect" + } + + DjangoRedirect() { + exists(CallNode call | + redirect().getACall() = call and + this = call.getAnArg() + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + +} diff --git a/python/ql/src/semmle/python/web/django/Request.qll b/python/ql/src/semmle/python/web/django/Request.qll new file mode 100644 index 00000000000..67b82981fb2 --- /dev/null +++ b/python/ql/src/semmle/python/web/django/Request.qll @@ -0,0 +1,164 @@ +import python +import semmle.python.regex + +import semmle.python.security.TaintTracking +import semmle.python.web.Http + + +/** A django.request.HttpRequest object */ +class DjangoRequest extends TaintKind { + + DjangoRequest() { + this = "django.request.HttpRequest" + } + + override TaintKind getTaintOfAttribute(string name) { + (name = "GET" or name = "POST") and + result instanceof DjangoQueryDict + } + + override TaintKind getTaintOfMethodResult(string name) { + + (name = "body" or name = "path") and + result instanceof ExternalStringKind + } +} + +/* Helper for getTaintForStep() */ +pragma [noinline] +private predicate subscript_taint(SubscriptNode sub, ControlFlowNode obj, TaintKind kind) { + sub.getValue() = obj and + kind instanceof ExternalStringKind +} + +/** A django.request.QueryDict object */ +class DjangoQueryDict extends TaintKind { + + DjangoQueryDict() { + this = "django.http.request.QueryDict" + } + + override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { + this.taints(fromnode) and + subscript_taint(tonode, fromnode, result) + } + + override TaintKind getTaintOfMethodResult(string name) { + name = "get" and result instanceof ExternalStringKind + } + +} + +abstract class DjangoRequestSource extends TaintSource { + + override string toString() { + result = "Django request source" + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof DjangoRequest + } + +} + +/** Function based views + * https://docs.djangoproject.com/en/1.11/topics/http/views/ + */ +private class DjangoFunctionBasedViewRequestArgument extends DjangoRequestSource { + + DjangoFunctionBasedViewRequestArgument() { + exists(FunctionObject view | + url_dispatch(_, _, view) and + this = view.getFunction().getArg(0).asName().getAFlowNode() + ) + } + +} + +/** Class based views + * https://docs.djangoproject.com/en/1.11/topics/class-based-views/ + * + */ +private class DjangoView extends ClassObject { + + DjangoView() { + any(ModuleObject m | m.getName() = "django.views.generic").getAttribute("View") = this.getAnImproperSuperType() + } +} + +private FunctionObject djangoViewHttpMethod() { + exists(DjangoView view | + view.lookupAttribute(httpVerbLower()) = result + ) +} + +class DjangoClassBasedViewRequestArgument extends DjangoRequestSource { + + DjangoClassBasedViewRequestArgument() { + this = djangoViewHttpMethod().getFunction().getArg(1).asName().getAFlowNode() + } + +} + + + + +/* *********** Routing ********* */ + + +/* Function based views */ +predicate url_dispatch(CallNode call, ControlFlowNode regex, FunctionObject view) { + exists(FunctionObject url | + any(ModuleObject m | m.getName() = "django.conf.urls").getAttribute("url") = url and + url.getArgumentForCall(call, 0) = regex and + url.getArgumentForCall(call, 1).refersTo(view) + ) +} + + +class UrlRegex extends RegexString { + + UrlRegex() { + url_dispatch(_, this.getAFlowNode(), _) + } + +} + +class UrlRouting extends CallNode { + + UrlRouting() { + url_dispatch(this, _, _) + } + + FunctionObject getViewFunction() { + url_dispatch(this, _, result) + } + + string getNamedArgument() { + exists(UrlRegex regex | + url_dispatch(this, regex.getAFlowNode(), _) and + regex.getGroupName(_, _) = result + ) + } + +} + +/** An argument specified in a url routing table */ +class HttpRequestParameter extends TaintSource { + + HttpRequestParameter() { + exists(UrlRouting url | + this.(ControlFlowNode).getNode() = + url.getViewFunction().getFunction().getArgByName(url.getNamedArgument()) + ) + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExternalStringKind + } + + override string toString() { + result = "django.http.request.parameter" + } +} + diff --git a/python/ql/src/semmle/python/web/django/Response.qll b/python/ql/src/semmle/python/web/django/Response.qll new file mode 100644 index 00000000000..ed3833c0279 --- /dev/null +++ b/python/ql/src/semmle/python/web/django/Response.qll @@ -0,0 +1,86 @@ +import python +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Basic +private import semmle.python.web.django.Shared + + +/** A django.http.response.Response object + * This isn't really a "taint", but we use the value tracking machinery to + * track the flow of response objects. + */ +class DjangoResponse extends TaintKind { + + DjangoResponse() { + this = "django.response.HttpResponse" + } + +} + +private ClassObject theDjangoHttpResponseClass() { + result = any(ModuleObject m | m.getName() = "django.http.response").getAttribute("HttpResponse") and + not result = theDjangoHttpRedirectClass() +} + +/** Instantiation of a django response. */ +class DjangoResponseSource extends TaintSource { + + DjangoResponseSource() { + exists(ClassObject cls | + cls.getAnImproperSuperType() = theDjangoHttpResponseClass() and + cls.getACall() = this + ) + } + + override predicate isSourceOf(TaintKind kind) { kind instanceof DjangoResponse } + + override string toString() { + result = "django.http.response.HttpResponse" + } +} + +/** A write to a django response, which is vulnerable to external data (xss) */ +class DjangoResponseWrite extends TaintSink { + + DjangoResponseWrite() { + exists(AttrNode meth, CallNode call | + call.getFunction() = meth and + any(DjangoResponse repsonse).taints(meth.getObject("write")) and + this = call.getArg(0) + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + + override string toString() { + result = "django.Response.write(...)" + } + +} + +/** An argument to initialization of a django response, which is vulnerable to external data (xss) */ +class DjangoResponseContent extends TaintSink { + + DjangoResponseContent() { + exists(CallNode call, ClassObject cls | + cls.getAnImproperSuperType() = theDjangoHttpResponseClass() and + call.getFunction().refersTo(cls) | + call.getArg(0) = this + or + call.getArgByName("content") = this + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + + override string toString() { + result = "django.Response(...)" + } + +} + + + diff --git a/python/ql/src/semmle/python/web/django/Sanitizers.qll b/python/ql/src/semmle/python/web/django/Sanitizers.qll new file mode 100644 index 00000000000..db7f8aff8f8 --- /dev/null +++ b/python/ql/src/semmle/python/web/django/Sanitizers.qll @@ -0,0 +1,7 @@ +import python + + +/* Sanitizers + * No django sanitizers implemented yet. + */ + diff --git a/python/ql/src/semmle/python/web/django/Shared.qll b/python/ql/src/semmle/python/web/django/Shared.qll new file mode 100644 index 00000000000..395c09ec958 --- /dev/null +++ b/python/ql/src/semmle/python/web/django/Shared.qll @@ -0,0 +1,9 @@ +import python + +FunctionObject redirect() { + result = any(ModuleObject m | m.getName() = "django.shortcuts").getAttribute("redirect") +} + +ClassObject theDjangoHttpRedirectClass() { + result = any(ModuleObject m | m.getName() = "django.http.response").getAttribute("HttpResponseRedirectBase") +} diff --git a/python/ql/src/semmle/python/web/flask/General.qll b/python/ql/src/semmle/python/web/flask/General.qll new file mode 100644 index 00000000000..febe7372b9b --- /dev/null +++ b/python/ql/src/semmle/python/web/flask/General.qll @@ -0,0 +1,117 @@ +import python +import semmle.python.web.Http + +/** The flask module */ +ModuleObject theFlaskModule() { + result = any(ModuleObject m | m.getName() = "flask") +} + +/** The flask app class */ +ClassObject theFlaskClass() { + result = theFlaskModule().getAttribute("Flask") +} + +/** The flask MethodView class */ +ClassObject theFlaskMethodViewClass() { + result = any(ModuleObject m | m.getName() = "flask.views").getAttribute("MethodView") +} + +ClassObject theFlaskReponseClass() { + result = theFlaskModule().getAttribute("Response") +} + +/** Holds if `route` is routed to `func` + * by decorating `func` with `app.route(route)` + */ +predicate app_route(ControlFlowNode route, Function func) { + exists(CallNode route_call, CallNode decorator_call | + route_call.getFunction().(AttrNode).getObject("route").refersTo(_, theFlaskClass(), _) and + decorator_call.getFunction() = route_call and + route_call.getArg(0) = route and + decorator_call.getArg(0).getNode().(FunctionExpr).getInnerScope() = func + ) +} + +/* Helper for add_url_rule */ +private predicate add_url_rule_call(ControlFlowNode regex, ControlFlowNode callable) { + exists(CallNode call | + call.getFunction().(AttrNode).getObject("add_url_rule").refersTo(_, theFlaskClass(), _) and + regex = call.getArg(0) | + callable = call.getArg(2) or + callable = call.getArgByName("view_func") + ) +} + +/** Holds if urls matching `regex` are routed to `func` */ +predicate add_url_rule(ControlFlowNode regex, Function func) { + exists(ControlFlowNode callable | + add_url_rule_call(regex, callable) + | + exists(PyFunctionObject f | f.getFunction() = func and callable.refersTo(f)) + or + /* MethodView.as_view() */ + exists(MethodViewClass view_cls | + view_cls.asTaint().taints(callable) | + func = view_cls.lookupAttribute(httpVerbLower()).(FunctionObject).getFunction() + ) + /* TO DO -- Handle Views that aren't MethodViews */ + ) +} + +/** Holds if urls matching `regex` are routed to `func` using + * any of flask's routing mechanisms. + */ +predicate flask_routing(ControlFlowNode regex, Function func) { + app_route(regex, func) + or + add_url_rule(regex, func) +} + +/** A class that extends flask.views.MethodView */ +private class MethodViewClass extends ClassObject { + + MethodViewClass() { + this.getAnImproperSuperType() = theFlaskMethodViewClass() + } + + /* As we are restricted to strings for taint kinds, we need to map these classes to strings. */ + string taintString() { + result = "flask/" + this.getQualifiedName() + ".as.view" + } + + /* As we are restricted to strings for taint kinds, we need to map these classes to strings. */ + TaintKind asTaint() { + result = this.taintString() + } +} + +private class MethodViewTaint extends TaintKind { + + MethodViewTaint() { + any(MethodViewClass cls).taintString() = this + } +} + +/** A source of method view "taint"s. */ +private class AsView extends TaintSource { + + AsView() { + exists(ClassObject view_class | + view_class.getAnImproperSuperType() = theFlaskMethodViewClass() and + this.(CallNode).getFunction().(AttrNode).getObject("as_view").refersTo(view_class) + ) + } + + override string toString() { + result = "flask.MethodView.as_view()" + } + + override predicate isSourceOf(TaintKind kind) { + exists(MethodViewClass view_class | + kind = view_class.asTaint() and + this.(CallNode).getFunction().(AttrNode).getObject("as_view").refersTo(view_class) + ) + } + +} + diff --git a/python/ql/src/semmle/python/web/flask/Redirect.qll b/python/ql/src/semmle/python/web/flask/Redirect.qll new file mode 100644 index 00000000000..81a6d4dc064 --- /dev/null +++ b/python/ql/src/semmle/python/web/flask/Redirect.qll @@ -0,0 +1,35 @@ +/** Provides class representing the `flask.redirect` function. + * This module is intended to be imported into a taint-tracking query + * to extend `TaintSink`. + */ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Basic +import semmle.python.web.flask.General + +FunctionObject flask_redirect() { + result = theFlaskModule().getAttribute("redirect") +} + +/** + * Represents an argument to the `flask.redirect` function. + */ +class FlaskRedirect extends TaintSink { + + override string toString() { + result = "flask.redirect" + } + + FlaskRedirect() { + exists(CallNode call | + flask_redirect().getACall() = call and + this = call.getAnArg() + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + +} diff --git a/python/ql/src/semmle/python/web/flask/Request.qll b/python/ql/src/semmle/python/web/flask/Request.qll new file mode 100644 index 00000000000..caa388e0d77 --- /dev/null +++ b/python/ql/src/semmle/python/web/flask/Request.qll @@ -0,0 +1,75 @@ +import python + +import semmle.python.security.TaintTracking +import semmle.python.web.Http +import semmle.python.web.flask.General + +private Object theFlaskRequestObject() { + result = theFlaskModule().getAttribute("request") + +} + +/** Holds if `attr` is an access of attribute `name` of the flask request object */ +private predicate flask_request_attr(AttrNode attr, string name) { + attr.isLoad() and + attr.getObject(name).refersTo(theFlaskRequestObject()) +} + +/** Source of external data from a flask request */ +class FlaskRequestData extends SimpleHttpRequestTaintSource { + + FlaskRequestData() { + not this instanceof FlaskRequestArgs and + exists(string name | + flask_request_attr(this, name) | + name = "path" or name = "full_path" or + name = "base_url" or name = "url" + ) + } + + override string toString() { + result = "flask.request" + } + +} + +/** Source of dictionary whose values are externally controlled */ +class FlaskRequestArgs extends TaintSource { + + FlaskRequestArgs() { + exists(string attr | + flask_request_attr(this, attr) | + attr = "args" or attr = "form" or + attr = "values" or attr = "files" or + attr = "headers" or attr = "json" + ) + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExternalStringDictKind + } + + override string toString() { + result = "flask.request.args" + } + +} + + +/** Source of dictionary whose values are externally controlled */ +class FlaskRequestJson extends TaintSource { + + FlaskRequestJson() { + flask_request_attr(this, "json") + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExternalJsonKind + } + + override string toString() { + result = "flask.request.json" + } + +} + diff --git a/python/ql/src/semmle/python/web/flask/Response.qll b/python/ql/src/semmle/python/web/flask/Response.qll new file mode 100644 index 00000000000..13f51f6519b --- /dev/null +++ b/python/ql/src/semmle/python/web/flask/Response.qll @@ -0,0 +1,48 @@ +import python + + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Basic + +import semmle.python.web.flask.General + +/** A flask response, which is vulnerable to any sort of + * http response malice. */ +class FlaskRoutedResponse extends TaintSink { + + FlaskRoutedResponse() { + exists(PyFunctionObject response | + flask_routing(_, response.getFunction()) and + this = response.getAReturnedNode() + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + + override string toString() { + result = "flask.routed.response" + } + +} + + +class FlaskResponseArgument extends TaintSink { + + FlaskResponseArgument() { + exists(CallNode call | + call.getFunction().refersTo(theFlaskReponseClass()) and + call.getArg(0) = this + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + + override string toString() { + result = "flask.response.argument" + } + +} \ No newline at end of file diff --git a/python/ql/src/semmle/python/web/pyramid/Redirect.qll b/python/ql/src/semmle/python/web/pyramid/Redirect.qll new file mode 100644 index 00000000000..61f662232b4 --- /dev/null +++ b/python/ql/src/semmle/python/web/pyramid/Redirect.qll @@ -0,0 +1,42 @@ +/** Provides class representing the `pyramid.redirect` function. + * This module is intended to be imported into a taint-tracking query + * to extend `TaintSink`. + */ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Basic + +private ClassObject redirectClass() { + exists(ModuleObject ex | + ex.getName() = "pyramid.httpexceptions" | + ex.getAttribute("HTTPFound") = result + or + ex.getAttribute("HTTPTemporaryRedirect") = result + ) +} + +/** + * Represents an argument to the `tornado.redirect` function. + */ +class PyramidRedirect extends TaintSink { + + override string toString() { + result = "pyramid.redirect" + } + + PyramidRedirect() { + exists(CallNode call | + call.getFunction().refersTo(redirectClass()) + | + call.getArg(0) = this + or + call.getArgByName("location") = this + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + +} diff --git a/python/ql/src/semmle/python/web/pyramid/Request.qll b/python/ql/src/semmle/python/web/pyramid/Request.qll new file mode 100644 index 00000000000..a35c2120353 --- /dev/null +++ b/python/ql/src/semmle/python/web/pyramid/Request.qll @@ -0,0 +1,39 @@ +import python + +import semmle.python.security.TaintTracking +import semmle.python.web.Http +private import semmle.python.web.webob.Request +private import semmle.python.web.pyramid.View + +class PyramidRequest extends BaseWebobRequest { + + PyramidRequest() { + this = "pyramid.request" + } + + override ClassObject getClass() { + result = any(ModuleObject m | m.getName() = "pyramid.request").getAttribute("Request") + } + +} + +/** Source of pyramid request objects */ +class PyramidViewArgument extends TaintSource { + + PyramidViewArgument() { + exists(Function view_func | + is_pyramid_view_function(view_func) and + this.(ControlFlowNode).getNode() = view_func.getArg(0) + ) + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof PyramidRequest + } + + override string toString() { + result = "pyramid.view.argument" + } + +} + diff --git a/python/ql/src/semmle/python/web/pyramid/Response.qll b/python/ql/src/semmle/python/web/pyramid/Response.qll new file mode 100644 index 00000000000..85f572c9eee --- /dev/null +++ b/python/ql/src/semmle/python/web/pyramid/Response.qll @@ -0,0 +1,28 @@ +import python + + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Basic + +private import semmle.python.web.pyramid.View + +/** A pyramid response, which is vulnerable to any sort of + * http response malice. */ +class PyramidRoutedResponse extends TaintSink { + + PyramidRoutedResponse() { + exists(PyFunctionObject view | + is_pyramid_view_function(view.getFunction()) and + this = view.getAReturnedNode() + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + + override string toString() { + result = "pyramid.routed.response" + } + +} diff --git a/python/ql/src/semmle/python/web/pyramid/View.qll b/python/ql/src/semmle/python/web/pyramid/View.qll new file mode 100644 index 00000000000..20e59c4c76d --- /dev/null +++ b/python/ql/src/semmle/python/web/pyramid/View.qll @@ -0,0 +1,14 @@ +import python + +ModuleObject thePyramidViewModule() { + result.getName() = "pyramid.view" +} + +Object thePyramidViewConfig() { + result = thePyramidViewModule().getAttribute("view_config") +} + +predicate is_pyramid_view_function(Function func) { + func.getADecorator().refersTo(_, thePyramidViewConfig(), _) +} + diff --git a/python/ql/src/semmle/python/web/tornado/Redirect.qll b/python/ql/src/semmle/python/web/tornado/Redirect.qll new file mode 100644 index 00000000000..3bfd022df72 --- /dev/null +++ b/python/ql/src/semmle/python/web/tornado/Redirect.qll @@ -0,0 +1,33 @@ +/** Provides class representing the `tornado.redirect` function. + * This module is intended to be imported into a taint-tracking query + * to extend `TaintSink`. + */ +import python + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Basic +import Tornado + + +/** + * Represents an argument to the `tornado.redirect` function. + */ +class TornadoRedirect extends TaintSink { + + override string toString() { + result = "tornado.redirect" + } + + TornadoRedirect() { + exists(CallNode call, ControlFlowNode node | + node = call.getFunction().(AttrNode).getObject("redirect") and + isTornadoRequestHandlerInstance(node) and + this = call.getAnArg() + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + +} diff --git a/python/ql/src/semmle/python/web/tornado/Request.qll b/python/ql/src/semmle/python/web/tornado/Request.qll new file mode 100644 index 00000000000..bc28dba114d --- /dev/null +++ b/python/ql/src/semmle/python/web/tornado/Request.qll @@ -0,0 +1,93 @@ +import python + +import semmle.python.security.TaintTracking +import semmle.python.web.Http +import Tornado + +/** A tornado.request.HttpRequest object */ +class TornadoRequest extends TaintKind { + + TornadoRequest() { + this = "tornado.request.HttpRequest" + } + + override TaintKind getTaintOfAttribute(string name) { + result instanceof ExternalStringDictKind and + ( + name = "headers" or + name = "arguments" or + name = "cookies" + ) + or + result instanceof ExternalStringKind and + ( + name = "path" or + name = "query" or + name = "body" + ) + } + +} + + +class TornadoRequestSource extends TaintSource { + + TornadoRequestSource() { + isTornadoRequestHandlerInstance(this.(AttrNode).getObject("request")) + } + + override string toString() { + result = "Tornado request source" + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof TornadoRequest + } + +} + +class TornadoExternalInputSource extends TaintSource { + + TornadoExternalInputSource() { + exists(string name | + name = "get_argument" or + name = "get_query_argument" or + name = "get_body_argument" or + name = "decode_argument" + | + this = callToNamedTornadoRequestHandlerMethod(name) + ) + } + + override string toString() { + result = "Tornado request method" + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExternalStringKind + } + +} + +class TornadoExternalInputListSource extends TaintSource { + + TornadoExternalInputListSource() { + exists(string name | + name = "get_arguments" or + name = "get_query_arguments" or + name = "get_body_arguments" + | + this = callToNamedTornadoRequestHandlerMethod(name) + ) + } + + override string toString() { + result = "Tornado request method" + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExternalStringSequenceKind + } + +} + diff --git a/python/ql/src/semmle/python/web/tornado/Response.qll b/python/ql/src/semmle/python/web/tornado/Response.qll new file mode 100644 index 00000000000..242ea816082 --- /dev/null +++ b/python/ql/src/semmle/python/web/tornado/Response.qll @@ -0,0 +1,96 @@ +import python + + +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Basic + +import Tornado + +class TornadoConnection extends TaintKind { + + TornadoConnection() { + this = "tornado.http.connection" + } + +} + +class TornadoConnectionSource extends TaintSource { + + TornadoConnectionSource() { + isTornadoRequestHandlerInstance(this.(AttrNode).getObject("connection")) + } + + override string toString() { + result = "Tornado http connection source" + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof TornadoConnection + } + +} + +class TornadoConnectionWrite extends TaintSink { + + override string toString() { + result = "tornado.connection.write" + } + + TornadoConnectionWrite() { + exists(CallNode call, ControlFlowNode conn | + conn = call.getFunction().(AttrNode).getObject("write") and + this = call.getAnArg() | + exists(TornadoConnection tc | tc.taints(conn)) + or + isTornadoRequestHandlerInstance(conn) + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + +} + +class TornadoHttpRequestHandlerWrite extends TaintSink { + + override string toString() { + result = "tornado.HttpRequesHandler.write" + } + + TornadoHttpRequestHandlerWrite() { + exists(CallNode call, ControlFlowNode node | + node = call.getFunction().(AttrNode).getObject("write") and + isTornadoRequestHandlerInstance(node) and + this = call.getAnArg() + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + +} + +class TornadoHttpRequestHandlerRedirect extends TaintSink { + + override string toString() { + result = "tornado.HttpRequesHandler.redirect" + } + + TornadoHttpRequestHandlerRedirect() { + exists(CallNode call, ControlFlowNode node | + node = call.getFunction().(AttrNode).getObject("redirect") and + isTornadoRequestHandlerInstance(node) and + this = call.getArg(0) + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof StringKind + } + +} + + + diff --git a/python/ql/src/semmle/python/web/tornado/Tornado.qll b/python/ql/src/semmle/python/web/tornado/Tornado.qll new file mode 100644 index 00000000000..d20e81953a0 --- /dev/null +++ b/python/ql/src/semmle/python/web/tornado/Tornado.qll @@ -0,0 +1,35 @@ +import python + +import semmle.python.security.TaintTracking + +private ClassObject theTornadoRequestHandlerClass() { + result = any(ModuleObject m | m.getName() = "tornado.web").getAttribute("RequestHandler") +} + +ClassObject aTornadoRequestHandlerClass() { + result.getASuperType() = theTornadoRequestHandlerClass() +} + +FunctionObject getTornadoRequestHandlerMethod(string name) { + result = theTornadoRequestHandlerClass().declaredAttribute(name) +} + +/** Holds if `node` is likely to refer to an instance of a tornado + * `RequestHandler` class. + */ + +predicate isTornadoRequestHandlerInstance(ControlFlowNode node) { + node.refersTo(_, aTornadoRequestHandlerClass(), _) + or + /* In some cases, the points-to analysis won't capture all instances we care + * about. For these, we use the following syntactic check. First, that + * `node` appears inside a method of a subclass of + * `tornado.web.RequestHandler`:*/ + node.getScope().getEnclosingScope().(Class).getClassObject() = aTornadoRequestHandlerClass() and + /* Secondly, that `node` refers to the `self` argument: */ + node.isLoad() and node.(NameNode).isSelf() +} + +CallNode callToNamedTornadoRequestHandlerMethod(string name) { + isTornadoRequestHandlerInstance(result.getFunction().(AttrNode).getObject(name)) +} \ No newline at end of file diff --git a/python/ql/src/semmle/python/web/twisted/Request.qll b/python/ql/src/semmle/python/web/twisted/Request.qll new file mode 100644 index 00000000000..8be5db7bb4d --- /dev/null +++ b/python/ql/src/semmle/python/web/twisted/Request.qll @@ -0,0 +1,54 @@ +import python + +import semmle.python.security.TaintTracking +import semmle.python.web.Http +import Twisted + +/** A twisted.web.http.Request object */ +class TwistedRequest extends TaintKind { + + TwistedRequest() { + this = "twisted.request.http.Request" + } + + override TaintKind getTaintOfAttribute(string name) { + result instanceof ExternalStringSequenceDictKind and + ( + name = "args" + ) + or + result instanceof ExternalStringKind and + ( + name = "uri" or + name = "path" + ) + } + + override TaintKind getTaintOfMethodResult(string name) { + ( + name = "getHeader" or + name = "getCookie" or + name = "getUser" or + name = "getPassword" + ) and + result instanceof ExternalStringKind + } + +} + + +class TwistedRequestSource extends TaintSource { + + TwistedRequestSource() { + isTwistedRequestInstance(this) + } + + override string toString() { + result = "Twisted request source" + } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof TwistedRequest + } + +} diff --git a/python/ql/src/semmle/python/web/twisted/Response.qll b/python/ql/src/semmle/python/web/twisted/Response.qll new file mode 100644 index 00000000000..45c5ad56b35 --- /dev/null +++ b/python/ql/src/semmle/python/web/twisted/Response.qll @@ -0,0 +1,54 @@ +import python + +import semmle.python.security.TaintTracking +import semmle.python.web.Http +import semmle.python.security.strings.Basic +import Twisted +import Request + +class TwistedResponse extends TaintSink { + TwistedResponse() { + exists(PyFunctionObject func, string name | + isKnownRequestHandlerMethodName(name) and + name = func.getName() and + func = getTwistedRequestHandlerMethod(name) and + this = func.getAReturnedNode() + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + + override string toString() { + result = "Twisted response" + } +} + +/** + * A sink of taint in the form of a "setter" method on a twisted request + * object, which affects the properties of the subsequent response sent to this + * request. + */ + class TwistedRequestSetter extends TaintSink { + TwistedRequestSetter() { + exists(CallNode call, ControlFlowNode node, string name | + ( + name = "setHeader" or + name = "addCookie" or + name = "write" + ) and + any(TwistedRequest t).taints(node) and + node = call.getFunction().(AttrNode).getObject(name) and + this = call.getAnArg() + ) + } + + override predicate sinks(TaintKind kind) { + kind instanceof ExternalStringKind + } + + override string toString() { + result = "Twisted request setter" + } +} \ No newline at end of file diff --git a/python/ql/src/semmle/python/web/twisted/Twisted.qll b/python/ql/src/semmle/python/web/twisted/Twisted.qll new file mode 100644 index 00000000000..13db1dc9a8e --- /dev/null +++ b/python/ql/src/semmle/python/web/twisted/Twisted.qll @@ -0,0 +1,52 @@ +import python + +import semmle.python.security.TaintTracking + +private ClassObject theTwistedHttpRequestClass() { + result = any(ModuleObject m | m.getName() = "twisted.web.http").getAttribute("Request") +} + +private ClassObject theTwistedHttpResourceClass() { + result = any(ModuleObject m | m.getName() = "twisted.web.resource").getAttribute("Resource") +} + +ClassObject aTwistedRequestHandlerClass() { + result.getASuperType() = theTwistedHttpResourceClass() +} + +FunctionObject getTwistedRequestHandlerMethod(string name) { + result = aTwistedRequestHandlerClass().declaredAttribute(name) +} + +bindingset[name] +predicate isKnownRequestHandlerMethodName(string name) { + name = "render" or + name.matches("render_%") +} + +/** Holds if `node` is likely to refer to an instance of the twisted + * `Request` class. + */ +predicate isTwistedRequestInstance(NameNode node) { + node.refersTo(_, theTwistedHttpRequestClass(), _) + or + /* In points-to analysis cannot infer that a given object is an instance of + * the `twisted.web.http.Request` class, we also include any parameter + * called `request` that appears inside a subclass of a request handler + * class, and the appropriate arguments of known request handler methods. + */ + exists(Function func | func = node.getScope() | + func.getEnclosingScope().(Class).getClassObject() = aTwistedRequestHandlerClass() + ) and + ( + /* Any parameter called `request` */ + node.getId() = "request" and + node.isParameter() + or + /* Any request parameter of a known request handler method */ + exists(FunctionObject func | node.getScope() = func.getFunction() | + isKnownRequestHandlerMethodName(func.getName()) and + node.getNode() = func.getFunction().getArg(1) + ) + ) +} diff --git a/python/ql/src/semmle/python/web/webob/Request.qll b/python/ql/src/semmle/python/web/webob/Request.qll new file mode 100644 index 00000000000..5cb11ba23a6 --- /dev/null +++ b/python/ql/src/semmle/python/web/webob/Request.qll @@ -0,0 +1,51 @@ +import python + +import semmle.python.security.TaintTracking +import semmle.python.web.Http + +abstract class BaseWebobRequest extends TaintKind { + + bindingset[this] + BaseWebobRequest() { any() } + + override TaintKind getTaintOfAttribute(string name) { + result instanceof ExternalStringDictKind and + ( + name = "GET" or + name = "POST" or + name = "headers" + ) + or + result instanceof ExternalStringKind and + ( + name = "body" + ) + } + + override TaintKind getTaintOfMethodResult(string name) { + result = this and + ( + name = "copy" or + name = "copy_get" or + name = "copy_body" + ) + or + result instanceof ExternalStringKind and + ( + name = "as_bytes" + ) + } + +} + +class WebobRequest extends BaseWebobRequest { + + WebobRequest() { + this = "webob.Request" + } + + override ClassObject getClass() { + result = any(ModuleObject m | m.getName() = "webob.request").getAttribute("Request") + } + +} diff --git a/python/ql/src/semmle/python/xml/XML.qll b/python/ql/src/semmle/python/xml/XML.qll new file mode 100755 index 00000000000..13cc8d84f37 --- /dev/null +++ b/python/ql/src/semmle/python/xml/XML.qll @@ -0,0 +1,282 @@ +/** + * A library for working with XML files and their content. + */ + +import semmle.python.Files + +/** An XML element that has a location. */ +abstract class XMLLocatable extends @xmllocatable { + /** The source location for this element. */ + Location getLocation() { xmllocations(this,result) } + + /** + * Whether this element has the specified location information, + * including file path, start line, start column, end line and end column. + */ + predicate hasLocationInfo(string filepath, int startline, int startcolumn, int endline, int endcolumn) { + exists(File f, Location l | l = this.getLocation() | + locations_default(l,f,startline,startcolumn,endline,endcolumn) and + filepath = f.getName() + ) + } + + /** A printable representation of this element. */ + abstract string toString(); +} + +/** + * An `XMLParent` is either an `XMLElement` or an `XMLFile`, + * both of which can contain other elements. + */ +class XMLParent extends @xmlparent { + /** + * A printable representation of this XML parent. + * (Intended to be overridden in subclasses.) + */ + /*abstract*/ string getName() { result = "parent" } + + /** The file to which this XML parent belongs. */ + XMLFile getFile() { result = this or xmlElements(this,_,_,_,result) } + + /** The child element at a specified index of this XML parent. */ + XMLElement getChild(int index) { xmlElements(result, _, this, index, _) } + + /** A child element of this XML parent. */ + XMLElement getAChild() { xmlElements(result,_,this,_,_) } + + /** A child element of this XML parent with the given `name`. */ + XMLElement getAChild(string name) { xmlElements(result,_,this,_,_) and result.hasName(name) } + + /** A comment that is a child of this XML parent. */ + XMLComment getAComment() { xmlComments(result,_,this,_) } + + /** A character sequence that is a child of this XML parent. */ + XMLCharacters getACharactersSet() { xmlChars(result,_,this,_,_,_) } + + /** The depth in the tree. (Overridden in XMLElement.) */ + int getDepth() { result = 0 } + + /** The number of child XML elements of this XML parent. */ + int getNumberOfChildren() { + result = count(XMLElement e | xmlElements(e,_,this,_,_)) + } + + /** The number of places in the body of this XML parent where text occurs. */ + int getNumberOfCharacterSets() { + result = count(int pos | xmlChars(_,_,this,pos,_,_)) + } + + /** + * Append the character sequences of this XML parent from left to right, separated by a space, + * up to a specified (zero-based) index. + */ + string charsSetUpTo(int n) { + (n = 0 and xmlChars(_,result,this,0,_,_)) or + (n > 0 and exists(string chars | xmlChars(_,chars,this,n,_,_) | + result = this.charsSetUpTo(n-1) + " " + chars)) + } + + /** Append all the character sequences of this XML parent from left to right, separated by a space. */ + string allCharactersString() { + exists(int n | n = this.getNumberOfCharacterSets() | + (n = 0 and result = "") or + (n > 0 and result = this.charsSetUpTo(n-1)) + ) + } + + /** The text value contained in this XML parent. */ + string getTextValue() { + result = allCharactersString() + } + + /** A printable representation of this XML parent. */ + string toString() { result = this.getName() } +} + +/** An XML file. */ +class XMLFile extends XMLParent, File { + XMLFile() { + xmlEncoding(this,_) + } + + /** A printable representation of this XML file. */ + override + string toString() { result = XMLParent.super.toString() } + + /** The name of this XML file. */ + override + string getName() { files(this,result,_,_,_) } + + /** The path of this XML file. */ + string getPath() { files(this,_,result,_,_) } + + /** The path of the folder that contains this XML file. */ + string getFolder() { + result = this.getPath().substring(0, this.getPath().length()-this.getName().length()) + } + + /** The encoding of this XML file. */ + string getEncoding() { xmlEncoding(this,result) } + + /** The XML file itself. */ + override + XMLFile getFile() { result = this } + + /** A top-most element in an XML file. */ + XMLElement getARootElement() { result = this.getAChild() } + + /** A DTD associated with this XML file. */ + XMLDTD getADTD() { xmlDTDs(result,_,_,_,this) } +} + +/** A "Document Type Definition" of an XML file. */ +class XMLDTD extends @xmldtd { + /** The name of the root element of this DTD. */ + string getRoot() { xmlDTDs(this,result,_,_,_) } + + /** The public ID of this DTD. */ + string getPublicId() { xmlDTDs(this,_,result,_,_) } + + /** The system ID of this DTD. */ + string getSystemId() { xmlDTDs(this,_,_,result,_) } + + /** Whether this DTD is public. */ + predicate isPublic() { not xmlDTDs(this,_,"",_,_) } + + /** The parent of this DTD. */ + XMLParent getParent() { xmlDTDs(this,_,_,_,result) } + + /** A printable representation of this DTD. */ + string toString() { + (this.isPublic() and result = this.getRoot() + " PUBLIC '" + + this.getPublicId() + "' '" + + this.getSystemId() + "'") or + (not this.isPublic() and result = this.getRoot() + + " SYSTEM '" + + this.getSystemId() + "'") + } +} + +/** An XML tag in an XML file. */ +class XMLElement extends @xmlelement, XMLParent, XMLLocatable { + /** Whether this XML element has the given `name`. */ + predicate hasName(string name) { name = getName() } + + /** The name of this XML element. */ + override + string getName() { xmlElements(this,result,_,_,_) } + + /** The XML file in which this XML element occurs. */ + override + XMLFile getFile() { xmlElements(this,_,_,_,result) } + + /** The parent of this XML element. */ + XMLParent getParent() { xmlElements(this,_,result,_,_) } + + /** The index of this XML element among its parent's children. */ + int getIndex() { xmlElements(this, _, _, result, _) } + + /** Whether this XML element has a namespace. */ + predicate hasNamespace() { xmlHasNs(this,_,_) } + + /** The namespace of this XML element, if any. */ + XMLNamespace getNamespace() { xmlHasNs(this,result,_) } + + /** The index of this XML element among its parent's children. */ + int getElementPositionIndex() { xmlElements(this,_,_,result,_) } + + /** The depth of this element within the XML file tree structure. */ + override + int getDepth() { result = this.getParent().getDepth() + 1 } + + /** An XML attribute of this XML element. */ + XMLAttribute getAnAttribute() { result.getElement() = this } + + /** The attribute with the specified `name`, if any. */ + XMLAttribute getAttribute(string name) { + result.getElement() = this and result.getName() = name + } + + /** Whether this XML element has an attribute with the specified `name`. */ + predicate hasAttribute(string name) { + exists(XMLAttribute a| a = this.getAttribute(name)) + } + + /** The value of the attribute with the specified `name`, if any. */ + string getAttributeValue(string name) { + result = this.getAttribute(name).getValue() + } + + /** A printable representation of this XML element. */ + override + string toString() { result = XMLParent.super.toString() } +} + +/** An attribute that occurs inside an XML element. */ +class XMLAttribute extends @xmlattribute, XMLLocatable { + /** The name of this attribute. */ + string getName() { xmlAttrs(this,_,result,_,_,_) } + + /** The XML element to which this attribute belongs. */ + XMLElement getElement() { xmlAttrs(this,result,_,_,_,_) } + + /** Whether this attribute has a namespace. */ + predicate hasNamespace() { xmlHasNs(this,_,_) } + + /** The namespace of this attribute, if any. */ + XMLNamespace getNamespace() { xmlHasNs(this,result,_) } + + /** The value of this attribute. */ + string getValue() { xmlAttrs(this,_,_,result,_,_) } + + /** A printable representation of this XML attribute. */ + override string toString() { result = this.getName() + "=" + this.getValue() } +} + +/** A namespace used in an XML file */ +class XMLNamespace extends @xmlnamespace { + /** The prefix of this namespace. */ + string getPrefix() { xmlNs(this,result,_,_) } + + /** The URI of this namespace. */ + string getURI() { xmlNs(this,_,result,_) } + + /** Whether this namespace has no prefix. */ + predicate isDefault() { this.getPrefix() = "" } + + /** A printable representation of this XML namespace. */ + string toString() { + (this.isDefault() and result = this.getURI()) or + (not this.isDefault() and result = this.getPrefix() + ":" + this.getURI()) + } +} + +/** A comment of the form `` is an XML comment. */ +class XMLComment extends @xmlcomment, XMLLocatable { + /** The text content of this XML comment. */ + string getText() { xmlComments(this,result,_,_) } + + /** The parent of this XML comment. */ + XMLParent getParent() { xmlComments(this,_,result,_) } + + /** A printable representation of this XML comment. */ + override string toString() { result = this.getText() } +} + +/** + * A sequence of characters that occurs between opening and + * closing tags of an XML element, excluding other elements. + */ +class XMLCharacters extends @xmlcharacters, XMLLocatable { + /** The content of this character sequence. */ + string getCharacters() { xmlChars(this,result,_,_,_,_) } + + /** The parent of this character sequence. */ + XMLParent getParent() { xmlChars(this,_,result,_,_,_) } + + /** Whether this character sequence is CDATA. */ + predicate isCDATA() { xmlChars(this,_,_,_,1,_) } + + /** A printable representation of this XML character sequence. */ + override string toString() { result = this.getCharacters() } +} diff --git a/python/ql/src/semmlecode.python.dbscheme b/python/ql/src/semmlecode.python.dbscheme new file mode 100644 index 00000000000..62a30d37a72 --- /dev/null +++ b/python/ql/src/semmlecode.python.dbscheme @@ -0,0 +1,982 @@ +/* + * This dbscheme is auto-generated by 'semmle/dbscheme_gen.py'. + * WARNING: Any modifications to this file will be lost. + * Relations can be changed by modifying master.py or + * by adding rules to dbscheme.template + */ + + /* + * External artifacts + */ + +externalDefects( + unique int id : @externalDefect, + varchar(900) queryPath : string ref, + int location : @location ref, + varchar(900) message : string ref, + float severity : float ref +); + +externalMetrics( + unique int id : @externalMetric, + varchar(900) queryPath : string ref, + int location : @location ref, + float value : float ref +); + +externalData( + int id : @externalDataElement, + varchar(900) queryPath : string ref, + int column: int ref, + varchar(900) data : string ref +); + +snapshotDate(unique date snapshotDate : date ref); + +sourceLocationPrefix(varchar(900) prefix : string ref); + + +/* + * Duplicate code + */ + +duplicateCode( + unique int id : @duplication, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +similarCode( + unique int id : @similarity, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +@duplication_or_similarity = @duplication | @similarity + +tokens( + int id : @duplication_or_similarity ref, + int offset : int ref, + int beginLine : int ref, + int beginColumn : int ref, + int endLine : int ref, + int endColumn : int ref); + +/* + * Line metrics + */ +py_codelines(int id : @py_scope ref, + int count : int ref); + +py_commentlines(int id : @py_scope ref, + int count : int ref); + +py_docstringlines(int id : @py_scope ref, + int count : int ref); + +py_alllines(int id : @py_scope ref, + int count : int ref); + +/* + * Version history + */ + +svnentries( + int id : @svnentry, + varchar(500) revision : string ref, + varchar(500) author : string ref, + date revisionDate : date ref, + int changeSize : int ref +) + +svnaffectedfiles( + int id : @svnentry ref, + int file : @file ref, + varchar(500) action : string ref +) + +svnentrymsg( + int id : @svnentry ref, + varchar(500) message : string ref +) + +svnchurn( + int commit : @svnentry ref, + int file : @file ref, + int addedLines : int ref, + int deletedLines : int ref +) + +/**************************** + Python dbscheme +****************************/ + +/* fromSource is ignored */ +files(unique int id: @file, + varchar(900) name: string ref, + varchar(900) simple: string ref, + varchar(900) ext: string ref, + int fromSource: int ref); + +folders(unique int id: @folder, + varchar(900) name: string ref, + varchar(900) simple: string ref); + +@container = @folder | @file; + +containerparent(int parent: @container ref, + unique int child: @container ref); + +@sourceline = @file | @py_Module | @xmllocatable; + +numlines(int element_id: @sourceline ref, + int num_lines: int ref, + int num_code: int ref, + int num_comment: int ref + ); + +@location = @location_ast | @location_default ; + +locations_default(unique int id: @location_default, + int file: @file ref, + int beginLine: int ref, + int beginColumn: int ref, + int endLine: int ref, + int endColumn: int ref); + +locations_ast(unique int id: @location_ast, + int module: @py_Module ref, + int beginLine: int ref, + int beginColumn: int ref, + int endLine: int ref, + int endColumn: int ref); + +file_contents(unique int file: @file ref, string contents: string ref); + +py_module_path(int module: @py_Module ref, int file: @container ref); + +variable(unique int id : @py_variable, + int scope : @py_scope ref, + varchar(1) name : string ref); + +py_line_lengths(unique int id : @py_line, + int file: @py_Module ref, + int line : int ref, + int length : int ref); + +py_extracted_version(int module : @py_Module ref, + varchar(1) version : string ref); + +/* AUTO GENERATED PART STARTS HERE */ + + +/* AnnAssign.location = 0, location */ +/* AnnAssign.value = 1, expr */ +/* AnnAssign.annotation = 2, expr */ +/* AnnAssign.target = 3, expr */ + +/* Assert.location = 0, location */ +/* Assert.test = 1, expr */ +/* Assert.msg = 2, expr */ + +/* Assign.location = 0, location */ +/* Assign.value = 1, expr */ +/* Assign.targets = 2, expr_list */ + +/* AssignExpr.location = 0, location */ +/* AssignExpr.parenthesised = 1, bool */ +/* AssignExpr.value = 2, expr */ +/* AssignExpr.target = 3, expr */ + +/* Attribute.location = 0, location */ +/* Attribute.parenthesised = 1, bool */ +/* Attribute.value = 2, expr */ +/* Attribute.attr = 3, str */ +/* Attribute.ctx = 4, expr_context */ + +/* AugAssign.location = 0, location */ +/* AugAssign.operation = 1, BinOp */ + +/* Await.location = 0, location */ +/* Await.parenthesised = 1, bool */ +/* Await.value = 2, expr */ + +/* BinaryExpr.location = 0, location */ +/* BinaryExpr.parenthesised = 1, bool */ +/* BinaryExpr.left = 2, expr */ +/* BinaryExpr.op = 3, operator */ +/* BinaryExpr.right = 4, expr */ +/* BinaryExpr = AugAssign */ + +/* BoolExpr.location = 0, location */ +/* BoolExpr.parenthesised = 1, bool */ +/* BoolExpr.op = 2, boolop */ +/* BoolExpr.values = 3, expr_list */ + +/* Break.location = 0, location */ + +/* Bytes.location = 0, location */ +/* Bytes.parenthesised = 1, bool */ +/* Bytes.s = 2, bytes */ +/* Bytes.prefix = 3, bytes */ +/* Bytes.implicitly_concatenated_parts = 4, StringPart_list */ + +/* Call.location = 0, location */ +/* Call.parenthesised = 1, bool */ +/* Call.func = 2, expr */ +/* Call.positional_args = 3, expr_list */ +/* Call.named_args = 4, dict_item_list */ + +/* Class.name = 0, str */ +/* Class.body = 1, stmt_list */ +/* Class = ClassExpr */ + +/* ClassExpr.location = 0, location */ +/* ClassExpr.parenthesised = 1, bool */ +/* ClassExpr.name = 2, str */ +/* ClassExpr.bases = 3, expr_list */ +/* ClassExpr.keywords = 4, dict_item_list */ +/* ClassExpr.inner_scope = 5, Class */ + +/* Compare.location = 0, location */ +/* Compare.parenthesised = 1, bool */ +/* Compare.left = 2, expr */ +/* Compare.ops = 3, cmpop_list */ +/* Compare.comparators = 4, expr_list */ + +/* Continue.location = 0, location */ + +/* Delete.location = 0, location */ +/* Delete.targets = 1, expr_list */ + +/* Dict.location = 0, location */ +/* Dict.parenthesised = 1, bool */ +/* Dict.items = 2, dict_item_list */ + +/* DictComp.location = 0, location */ +/* DictComp.parenthesised = 1, bool */ +/* DictComp.function = 2, Function */ +/* DictComp.iterable = 3, expr */ + +/* DictUnpacking.location = 0, location */ +/* DictUnpacking.value = 1, expr */ + +/* Ellipsis.location = 0, location */ +/* Ellipsis.parenthesised = 1, bool */ + +/* ExceptStmt.location = 0, location */ +/* ExceptStmt.type = 1, expr */ +/* ExceptStmt.name = 2, expr */ +/* ExceptStmt.body = 3, stmt_list */ + +/* Exec.location = 0, location */ +/* Exec.body = 1, expr */ +/* Exec.globals = 2, expr */ +/* Exec.locals = 3, expr */ + +/* ExprStmt.location = 0, location */ +/* ExprStmt.value = 1, expr */ + +/* Filter.location = 0, location */ +/* Filter.parenthesised = 1, bool */ +/* Filter.value = 2, expr */ +/* Filter.filter = 3, expr */ + +/* For.location = 0, location */ +/* For.target = 1, expr */ +/* For.iter = 2, expr */ +/* For.body = 3, stmt_list */ +/* For.orelse = 4, stmt_list */ +/* For.is_async = 5, bool */ + +/* FormattedValue.location = 0, location */ +/* FormattedValue.parenthesised = 1, bool */ +/* FormattedValue.value = 2, expr */ +/* FormattedValue.conversion = 3, str */ +/* FormattedValue.format_spec = 4, JoinedStr */ + +/* Function.name = 0, str */ +/* Function.args = 1, parameter_list */ +/* Function.vararg = 2, expr */ +/* Function.kwonlyargs = 3, expr_list */ +/* Function.kwarg = 4, expr */ +/* Function.body = 5, stmt_list */ +/* Function.is_async = 6, bool */ +/* Function = FunctionParent */ + +/* FunctionExpr.location = 0, location */ +/* FunctionExpr.parenthesised = 1, bool */ +/* FunctionExpr.name = 2, str */ +/* FunctionExpr.args = 3, arguments */ +/* FunctionExpr.returns = 4, expr */ +/* FunctionExpr.inner_scope = 5, Function */ + +/* GeneratorExp.location = 0, location */ +/* GeneratorExp.parenthesised = 1, bool */ +/* GeneratorExp.function = 2, Function */ +/* GeneratorExp.iterable = 3, expr */ + +/* Global.location = 0, location */ +/* Global.names = 1, str_list */ + +/* If.location = 0, location */ +/* If.test = 1, expr */ +/* If.body = 2, stmt_list */ +/* If.orelse = 3, stmt_list */ + +/* IfExp.location = 0, location */ +/* IfExp.parenthesised = 1, bool */ +/* IfExp.test = 2, expr */ +/* IfExp.body = 3, expr */ +/* IfExp.orelse = 4, expr */ + +/* Import.location = 0, location */ +/* Import.names = 1, alias_list */ + +/* ImportExpr.location = 0, location */ +/* ImportExpr.parenthesised = 1, bool */ +/* ImportExpr.level = 2, int */ +/* ImportExpr.name = 3, str */ +/* ImportExpr.top = 4, bool */ + +/* ImportStar.location = 0, location */ +/* ImportStar.module = 1, expr */ + +/* ImportMember.location = 0, location */ +/* ImportMember.parenthesised = 1, bool */ +/* ImportMember.module = 2, expr */ +/* ImportMember.name = 3, str */ + +/* Fstring.location = 0, location */ +/* Fstring.parenthesised = 1, bool */ +/* Fstring.values = 2, expr_list */ +/* Fstring = FormattedValue */ + +/* KeyValuePair.location = 0, location */ +/* KeyValuePair.value = 1, expr */ +/* KeyValuePair.key = 2, expr */ + +/* Lambda.location = 0, location */ +/* Lambda.parenthesised = 1, bool */ +/* Lambda.args = 2, arguments */ +/* Lambda.inner_scope = 3, Function */ + +/* List.location = 0, location */ +/* List.parenthesised = 1, bool */ +/* List.elts = 2, expr_list */ +/* List.ctx = 3, expr_context */ + +/* ListComp.location = 0, location */ +/* ListComp.parenthesised = 1, bool */ +/* ListComp.function = 2, Function */ +/* ListComp.iterable = 3, expr */ +/* ListComp.generators = 4, comprehension_list */ +/* ListComp.elt = 5, expr */ + +/* Module.name = 0, str */ +/* Module.hash = 1, str */ +/* Module.body = 2, stmt_list */ +/* Module.kind = 3, str */ + +/* Name.location = 0, location */ +/* Name.parenthesised = 1, bool */ +/* Name.variable = 2, variable */ +/* Name.ctx = 3, expr_context */ +/* Name = ParameterList */ + +/* Nonlocal.location = 0, location */ +/* Nonlocal.names = 1, str_list */ + +/* Num.location = 0, location */ +/* Num.parenthesised = 1, bool */ +/* Num.n = 2, number */ +/* Num.text = 3, number */ + +/* Pass.location = 0, location */ + +/* PlaceHolder.location = 0, location */ +/* PlaceHolder.parenthesised = 1, bool */ +/* PlaceHolder.variable = 2, variable */ +/* PlaceHolder.ctx = 3, expr_context */ + +/* Print.location = 0, location */ +/* Print.dest = 1, expr */ +/* Print.values = 2, expr_list */ +/* Print.nl = 3, bool */ + +/* Raise.location = 0, location */ +/* Raise.exc = 1, expr */ +/* Raise.cause = 2, expr */ +/* Raise.type = 3, expr */ +/* Raise.inst = 4, expr */ +/* Raise.tback = 5, expr */ + +/* Repr.location = 0, location */ +/* Repr.parenthesised = 1, bool */ +/* Repr.value = 2, expr */ + +/* Return.location = 0, location */ +/* Return.value = 1, expr */ + +/* Set.location = 0, location */ +/* Set.parenthesised = 1, bool */ +/* Set.elts = 2, expr_list */ + +/* SetComp.location = 0, location */ +/* SetComp.parenthesised = 1, bool */ +/* SetComp.function = 2, Function */ +/* SetComp.iterable = 3, expr */ + +/* Slice.location = 0, location */ +/* Slice.parenthesised = 1, bool */ +/* Slice.start = 2, expr */ +/* Slice.stop = 3, expr */ +/* Slice.step = 4, expr */ + +/* Starred.location = 0, location */ +/* Starred.parenthesised = 1, bool */ +/* Starred.value = 2, expr */ +/* Starred.ctx = 3, expr_context */ + +/* Str.location = 0, location */ +/* Str.parenthesised = 1, bool */ +/* Str.s = 2, str */ +/* Str.prefix = 3, str */ +/* Str.implicitly_concatenated_parts = 4, StringPart_list */ + +/* StringPart.text = 0, str */ +/* StringPart.location = 1, location */ +/* StringPart = StringPartList */ +/* StringPartList = BytesOrStr */ + +/* Subscript.location = 0, location */ +/* Subscript.parenthesised = 1, bool */ +/* Subscript.value = 2, expr */ +/* Subscript.index = 3, expr */ +/* Subscript.ctx = 4, expr_context */ + +/* TemplateDottedNotation.location = 0, location */ +/* TemplateDottedNotation.parenthesised = 1, bool */ +/* TemplateDottedNotation.value = 2, expr */ +/* TemplateDottedNotation.attr = 3, str */ +/* TemplateDottedNotation.ctx = 4, expr_context */ + +/* TemplateWrite.location = 0, location */ +/* TemplateWrite.value = 1, expr */ + +/* Try.location = 0, location */ +/* Try.body = 1, stmt_list */ +/* Try.orelse = 2, stmt_list */ +/* Try.handlers = 3, stmt_list */ +/* Try.finalbody = 4, stmt_list */ + +/* Tuple.location = 0, location */ +/* Tuple.parenthesised = 1, bool */ +/* Tuple.elts = 2, expr_list */ +/* Tuple.ctx = 3, expr_context */ +/* Tuple = ParameterList */ + +/* UnaryExpr.location = 0, location */ +/* UnaryExpr.parenthesised = 1, bool */ +/* UnaryExpr.op = 2, unaryop */ +/* UnaryExpr.operand = 3, expr */ + +/* While.location = 0, location */ +/* While.test = 1, expr */ +/* While.body = 2, stmt_list */ +/* While.orelse = 3, stmt_list */ + +/* With.location = 0, location */ +/* With.context_expr = 1, expr */ +/* With.optional_vars = 2, expr */ +/* With.body = 3, stmt_list */ +/* With.is_async = 4, bool */ + +/* Yield.location = 0, location */ +/* Yield.parenthesised = 1, bool */ +/* Yield.value = 2, expr */ + +/* YieldFrom.location = 0, location */ +/* YieldFrom.parenthesised = 1, bool */ +/* YieldFrom.value = 2, expr */ + +/* Alias.value = 0, expr */ +/* Alias.asname = 1, expr */ +/* Alias = AliasList */ +/* AliasList = Import */ + +/* Arguments.kw_defaults = 0, expr_list */ +/* Arguments.defaults = 1, expr_list */ +/* Arguments.annotations = 2, expr_list */ +/* Arguments.varargannotation = 3, expr */ +/* Arguments.kwargannotation = 4, expr */ +/* Arguments.kw_annotations = 5, expr_list */ +/* Arguments = ArgumentsParent */ +/* boolean = BoolParent */ +/* Boolop = BoolExpr */ +/* string = Bytes */ +/* Cmpop = CmpopList */ +/* CmpopList = Compare */ + +/* Comprehension.location = 0, location */ +/* Comprehension.iter = 1, expr */ +/* Comprehension.target = 2, expr */ +/* Comprehension.ifs = 3, expr_list */ +/* Comprehension = ComprehensionList */ +/* ComprehensionList = ListComp */ +/* DictItem = DictItemList */ +/* DictItemList = DictItemListParent */ + +/* Expr.location = 0, location */ +/* Expr.parenthesised = 1, bool */ +/* Expr = ExprParent */ +/* ExprContext = ExprContextParent */ +/* ExprList = ExprListParent */ +/* int = ImportExpr */ + +/* Keyword.location = 0, location */ +/* Keyword.value = 1, expr */ +/* Keyword.arg = 2, str */ +/* Location = LocationParent */ +/* string = Num */ +/* Operator = BinaryExpr */ +/* ParameterList = Function */ + +/* Stmt.location = 0, location */ +/* Stmt = StmtList */ +/* StmtList = StmtListParent */ +/* string = StrParent */ +/* StringList = StrListParent */ +/* Unaryop = UnaryExpr */ +/* Variable = VariableParent */ +py_Classes(unique int id : @py_Class, + unique int parent : @py_ClassExpr ref); + +py_Functions(unique int id : @py_Function, + unique int parent : @py_Function_parent ref); + +py_Modules(unique int id : @py_Module); + +py_StringParts(unique int id : @py_StringPart, + int parent : @py_StringPart_list ref, + int idx : int ref); + +py_StringPart_lists(unique int id : @py_StringPart_list, + unique int parent : @py_Bytes_or_Str ref); + +py_aliases(unique int id : @py_alias, + int parent : @py_alias_list ref, + int idx : int ref); + +py_alias_lists(unique int id : @py_alias_list, + unique int parent : @py_Import ref); + +py_arguments(unique int id : @py_arguments, + unique int parent : @py_arguments_parent ref); + +py_bools(int parent : @py_bool_parent ref, + int idx : int ref); + +py_boolops(unique int id : @py_boolop, + int kind: int ref, + unique int parent : @py_BoolExpr ref); + +py_bytes(varchar(1) id : string ref, + int parent : @py_Bytes ref, + int idx : int ref); + +py_cmpops(unique int id : @py_cmpop, + int kind: int ref, + int parent : @py_cmpop_list ref, + int idx : int ref); + +py_cmpop_lists(unique int id : @py_cmpop_list, + unique int parent : @py_Compare ref); + +py_comprehensions(unique int id : @py_comprehension, + int parent : @py_comprehension_list ref, + int idx : int ref); + +py_comprehension_lists(unique int id : @py_comprehension_list, + unique int parent : @py_ListComp ref); + +py_dict_items(unique int id : @py_dict_item, + int kind: int ref, + int parent : @py_dict_item_list ref, + int idx : int ref); + +py_dict_item_lists(unique int id : @py_dict_item_list, + unique int parent : @py_dict_item_list_parent ref); + +py_exprs(unique int id : @py_expr, + int kind: int ref, + int parent : @py_expr_parent ref, + int idx : int ref); + +py_expr_contexts(unique int id : @py_expr_context, + int kind: int ref, + unique int parent : @py_expr_context_parent ref); + +py_expr_lists(unique int id : @py_expr_list, + int parent : @py_expr_list_parent ref, + int idx : int ref); + +py_ints(int id : int ref, + unique int parent : @py_ImportExpr ref); + +py_locations(unique int id : @location ref, + unique int parent : @py_location_parent ref); + +py_numbers(varchar(1) id : string ref, + int parent : @py_Num ref, + int idx : int ref); + +py_operators(unique int id : @py_operator, + int kind: int ref, + unique int parent : @py_BinaryExpr ref); + +py_parameter_lists(unique int id : @py_parameter_list, + unique int parent : @py_Function ref); + +py_stmts(unique int id : @py_stmt, + int kind: int ref, + int parent : @py_stmt_list ref, + int idx : int ref); + +py_stmt_lists(unique int id : @py_stmt_list, + int parent : @py_stmt_list_parent ref, + int idx : int ref); + +py_strs(varchar(1) id : string ref, + int parent : @py_str_parent ref, + int idx : int ref); + +py_str_lists(unique int id : @py_str_list, + unique int parent : @py_str_list_parent ref); + +py_unaryops(unique int id : @py_unaryop, + int kind: int ref, + unique int parent : @py_UnaryExpr ref); + +py_variables(int id : @py_variable ref, + unique int parent : @py_variable_parent ref); + +case @py_boolop.kind of + 0 = @py_And +| 1 = @py_Or; + +case @py_cmpop.kind of + 0 = @py_Eq +| 1 = @py_Gt +| 2 = @py_GtE +| 3 = @py_In +| 4 = @py_Is +| 5 = @py_IsNot +| 6 = @py_Lt +| 7 = @py_LtE +| 8 = @py_NotEq +| 9 = @py_NotIn; + +case @py_dict_item.kind of + 0 = @py_DictUnpacking +| 1 = @py_KeyValuePair +| 2 = @py_keyword; + +case @py_expr.kind of + 0 = @py_Attribute +| 1 = @py_BinaryExpr +| 2 = @py_BoolExpr +| 3 = @py_Bytes +| 4 = @py_Call +| 5 = @py_ClassExpr +| 6 = @py_Compare +| 7 = @py_Dict +| 8 = @py_DictComp +| 9 = @py_Ellipsis +| 10 = @py_FunctionExpr +| 11 = @py_GeneratorExp +| 12 = @py_IfExp +| 13 = @py_ImportExpr +| 14 = @py_ImportMember +| 15 = @py_Lambda +| 16 = @py_List +| 17 = @py_ListComp +| 18 = @py_Name +| 19 = @py_Num +| 20 = @py_Repr +| 21 = @py_Set +| 22 = @py_SetComp +| 23 = @py_Slice +| 24 = @py_Starred +| 25 = @py_Str +| 26 = @py_Subscript +| 27 = @py_Tuple +| 28 = @py_UnaryExpr +| 29 = @py_Yield +| 30 = @py_YieldFrom +| 31 = @py_TemplateDottedNotation +| 32 = @py_Filter +| 33 = @py_PlaceHolder +| 34 = @py_Await +| 35 = @py_Fstring +| 36 = @py_FormattedValue +| 37 = @py_AssignExpr; + +case @py_expr_context.kind of + 0 = @py_AugLoad +| 1 = @py_AugStore +| 2 = @py_Del +| 3 = @py_Load +| 4 = @py_Param +| 5 = @py_Store; + +case @py_operator.kind of + 0 = @py_Add +| 1 = @py_BitAnd +| 2 = @py_BitOr +| 3 = @py_BitXor +| 4 = @py_Div +| 5 = @py_FloorDiv +| 6 = @py_LShift +| 7 = @py_Mod +| 8 = @py_Mult +| 9 = @py_Pow +| 10 = @py_RShift +| 11 = @py_Sub +| 12 = @py_MatMult; + +case @py_stmt.kind of + 0 = @py_Assert +| 1 = @py_Assign +| 2 = @py_AugAssign +| 3 = @py_Break +| 4 = @py_Continue +| 5 = @py_Delete +| 6 = @py_ExceptStmt +| 7 = @py_Exec +| 8 = @py_Expr_stmt +| 9 = @py_For +| 10 = @py_Global +| 11 = @py_If +| 12 = @py_Import +| 13 = @py_ImportStar +| 14 = @py_Nonlocal +| 15 = @py_Pass +| 16 = @py_Print +| 17 = @py_Raise +| 18 = @py_Return +| 19 = @py_Try +| 20 = @py_While +| 21 = @py_With +| 22 = @py_TemplateWrite +| 23 = @py_AnnAssign; + +case @py_unaryop.kind of + 0 = @py_Invert +| 1 = @py_Not +| 2 = @py_UAdd +| 3 = @py_USub; + +@py_Bytes_or_Str = @py_Bytes | @py_Str; + +@py_Function_parent = @py_DictComp | @py_FunctionExpr | @py_GeneratorExp | @py_Lambda | @py_ListComp | @py_SetComp; + +@py_arguments_parent = @py_FunctionExpr | @py_Lambda; + +@py_ast_node = @py_Class | @py_Function | @py_Module | @py_StringPart | @py_comprehension | @py_dict_item | @py_expr | @py_stmt; + +@py_bool_parent = @py_For | @py_Function | @py_Print | @py_With | @py_expr; + +@py_dict_item_list_parent = @py_Call | @py_ClassExpr | @py_Dict; + +@py_expr_context_parent = @py_Attribute | @py_List | @py_Name | @py_PlaceHolder | @py_Starred | @py_Subscript | @py_TemplateDottedNotation | @py_Tuple; + +@py_expr_list_parent = @py_Assign | @py_BoolExpr | @py_Call | @py_ClassExpr | @py_Compare | @py_Delete | @py_Fstring | @py_Function | @py_List | @py_Print | @py_Set | @py_Tuple | @py_arguments | @py_comprehension; + +@py_expr_or_stmt = @py_expr | @py_stmt; + +@py_expr_parent = @py_AnnAssign | @py_Assert | @py_Assign | @py_AssignExpr | @py_Attribute | @py_AugAssign | @py_Await | @py_BinaryExpr | @py_Call | @py_Compare | @py_DictComp | @py_DictUnpacking | @py_ExceptStmt | @py_Exec | @py_Expr_stmt | @py_Filter | @py_For | @py_FormattedValue | @py_Function | @py_FunctionExpr | @py_GeneratorExp | @py_If | @py_IfExp | @py_ImportMember | @py_ImportStar | @py_KeyValuePair | @py_ListComp | @py_Print | @py_Raise | @py_Repr | @py_Return | @py_SetComp | @py_Slice | @py_Starred | @py_Subscript | @py_TemplateDottedNotation | @py_TemplateWrite | @py_UnaryExpr | @py_While | @py_With | @py_Yield | @py_YieldFrom | @py_alias | @py_arguments | @py_comprehension | @py_expr_list | @py_keyword | @py_parameter_list; + +@py_location_parent = @py_DictUnpacking | @py_KeyValuePair | @py_StringPart | @py_comprehension | @py_expr | @py_keyword | @py_stmt; + +@py_parameter = @py_Name | @py_Tuple; + +@py_scope = @py_Class | @py_Function | @py_Module; + +@py_stmt_list_parent = @py_Class | @py_ExceptStmt | @py_For | @py_Function | @py_If | @py_Module | @py_Try | @py_While | @py_With; + +@py_str_list_parent = @py_Global | @py_Nonlocal; + +@py_str_parent = @py_Attribute | @py_Class | @py_ClassExpr | @py_FormattedValue | @py_Function | @py_FunctionExpr | @py_ImportExpr | @py_ImportMember | @py_Module | @py_Str | @py_StringPart | @py_TemplateDottedNotation | @py_keyword | @py_str_list; + +@py_variable_parent = @py_Name | @py_PlaceHolder; + + +/* + * End of auto-generated part + */ + + + +/* Map relative names to absolute names for imports */ +py_absolute_names(int module : @py_Module ref, + varchar(1) relname : string ref, + varchar(1) absname : string ref); + +py_exports(int id : @py_Module ref, + varchar(1) name : string ref); + +/* Successor information */ +py_successors(int predecessor : @py_flow_node ref, + int successor : @py_flow_node ref); + +py_true_successors(int predecessor : @py_flow_node ref, + int successor : @py_flow_node ref); + +py_exception_successors(int predecessor : @py_flow_node ref, + int successor : @py_flow_node ref); + +py_false_successors(int predecessor : @py_flow_node ref, + int successor : @py_flow_node ref); + +py_flow_bb_node(unique int flownode : @py_flow_node, + int realnode : @py_ast_node ref, + int basicblock : @py_flow_node ref, + int index : int ref); + +py_scope_flow(int flow : @py_flow_node ref, + int scope : @py_scope ref, + int kind : int ref); + +py_idoms(unique int node : @py_flow_node ref, + int immediate_dominator : @py_flow_node ref); + +py_ssa_phi(int phi : @py_ssa_var ref, + int arg: @py_ssa_var ref); + +py_ssa_var(unique int id : @py_ssa_var, + int var : @py_variable ref); + +py_ssa_use(int node: @py_flow_node ref, + int var : @py_ssa_var ref); + +py_ssa_defn(unique int id : @py_ssa_var ref, + int node: @py_flow_node ref); + +@py_base_var = @py_variable | @py_ssa_var; + +py_scopes(unique int node : @py_expr_or_stmt ref, + int scope : @py_scope ref); + +py_scope_location(unique int id : @location ref, + unique int scope : @py_scope ref); + +py_flags_versioned(varchar(1) name : string ref, + varchar(1) value : string ref, + varchar(1) version : string ref); + +py_syntax_error_versioned(unique int id : @location ref, + varchar(1) message : string ref, + varchar(1) version : string ref); + +py_comments(unique int id : @py_comment, + varchar(1) text : string ref, + unique int location : @location ref); + +/* Type information support */ + +py_cobjects(unique int obj : @py_cobject); + +py_cobjecttypes(unique int obj : @py_cobject ref, + int typeof : @py_cobject ref); + +py_cobjectnames(unique int obj : @py_cobject ref, + varchar(1) name : string ref); + +/* Kind should be 0 for introspection, > 0 from source, as follows: + 1 from C extension source + */ +py_cobject_sources(int obj : @py_cobject ref, + int kind : int ref); + +py_cmembers_versioned(int object : @py_cobject ref, + varchar(1) name : string ref, + int member : @py_cobject ref, + varchar(1) version : string ref); + +py_citems(int object : @py_cobject ref, + int index : int ref, + int member : @py_cobject ref); + +ext_argtype(int funcid : @py_object ref, + int arg : int ref, + int typeid : @py_object ref); + +ext_rettype(int funcid : @py_object ref, + int typeid : @py_object ref); + +ext_proptype(int propid : @py_object ref, + int typeid : @py_object ref); + +ext_argreturn(int funcid : @py_object ref, + int arg : int ref); + +py_special_objects(unique int obj : @py_cobject ref, + unique varchar(1) name : string ref); + +py_decorated_object(int object : @py_object ref, + int level: int ref); + +@py_object = @py_cobject | @py_flow_node; + +@py_source_element = @py_ast_node | @container; + +/* XML Files */ + +xmlEncoding (unique int id: @file ref, varchar(900) encoding: string ref); + +xmlDTDs (unique int id: @xmldtd, + varchar(900) root: string ref, + varchar(900) publicId: string ref, + varchar(900) systemId: string ref, + int fileid: @file ref); + +xmlElements (unique int id: @xmlelement, + varchar(900) name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref); + +xmlAttrs (unique int id: @xmlattribute, + int elementid: @xmlelement ref, + varchar(900) name: string ref, + varchar(3600) value: string ref, + int idx: int ref, + int fileid: @file ref); + +xmlNs (int id: @xmlnamespace, + varchar(900) prefixName: string ref, + varchar(900) URI: string ref, + int fileid: @file ref); + +xmlHasNs (int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref); + +xmlComments (unique int id: @xmlcomment, + varchar(3600) text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref); + +xmlChars (unique int id: @xmlcharacters, + varchar(3600) text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations(int xmlElement: @xmllocatable ref, + int location: @location_default ref); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; diff --git a/python/ql/src/semmlecode.python.dbscheme.stats b/python/ql/src/semmlecode.python.dbscheme.stats new file mode 100644 index 00000000000..a8a501f7660 --- /dev/null +++ b/python/ql/src/semmlecode.python.dbscheme.stats @@ -0,0 +1,17472 @@ + + +@externalDefect +100 + + +@externalMetric +100 + + +@externalDataElement +20 + + +@duplication +890 + + +@similarity +5591 + + +@svnentry +100 + + +@file +3066 + + +@folder +686 + + +@location_default +100 + + +@location_ast +2310679 + + +@py_variable +242770 + + +@py_line +100 + + +@py_Class +10244 + + +@py_Function +44860 + + +@py_Module +5983 + + +@py_StringPart +6399 + + +@py_StringPart_list +2296 + + +@py_alias +21374 + + +@py_alias_list +14396 + + +@py_arguments +41982 + + +@py_boolop +10907 + + +@py_And +7243 + + +@py_Or +3663 + + +@py_cmpop +38007 + + +@py_Eq +11370 + + +@py_Gt +1999 + + +@py_GtE +1306 + + +@py_In +4743 + + +@py_Is +6368 + + +@py_IsNot +4541 + + +@py_Lt +1920 + + +@py_LtE +1128 + + +@py_NotEq +3050 + + +@py_NotIn +1672 + + +@py_cmpop_list +37666 + + +@py_comprehension +1688 + + +@py_comprehension_list +1682 + + +@py_dict_item +167901 + + +@py_DictUnpacking +1521 + + +@py_KeyValuePair +92837 + + +@py_keyword +74612 + + +@py_dict_item_list +33758 + + +@py_expr +1684031 + + +@py_Attribute +249565 + + +@py_BinaryExpr +28868 + + +@py_BoolExpr +10907 + + +@py_Bytes +105600 + + +@py_Call +198138 + + +@py_ClassExpr +10244 + + +@py_Compare +37666 + + +@py_Dict +9635 + + +@py_DictComp +99 + + +@py_Ellipsis +115 + + +@py_Fstring +100 + + +@py_FormattedValue +100 + + +@py_FunctionExpr +41531 + + +@py_GeneratorExp +1066 + + +@py_IfExp +923 + + +@py_ImportExpr +21532 + + +@py_ImportMember +17714 + + +@py_Lambda +870 + + +@py_List +23200 + + +@py_ListComp +1690 + + +@py_Name +845963 + + +@py_Num +58723 + + +@py_Set +261 + + +@py_SetComp +49 + + +@py_Slice +5316 + + +@py_Starred +1265 + + +@py_Str +288427 + + +@py_Subscript +31583 + + +@py_Tuple +27693 + + +@py_UnaryExpr +13295 + + +@py_Yield +3941 + + +@py_YieldFrom +398 + + +@py_Repr +100 + + +@py_TemplateDottedNotation +100 + + +@py_Filter +100 + + +@py_PlaceHolder +100 + + +@py_Await +500 + + +@py_AssignExpr +200 + + +@py_expr_context +1140675 + + +@py_Del +1324 + + +@py_Load +853094 + + +@py_Param +96047 + + +@py_Store +198700 + + +@py_AugLoad +100 + + +@py_AugStore +100 + + +@py_expr_list +430986 + + +@py_operator +28868 + + +@py_Add +13603 + + +@py_BitAnd +796 + + +@py_BitOr +799 + + +@py_BitXor +190 + + +@py_Div +393 + + +@py_FloorDiv +362 + + +@py_LShift +279 + + +@py_Mod +8234 + + +@py_Mult +2218 + + +@py_Pow +501 + + +@py_RShift +157 + + +@py_Sub +3136 + + +@py_MatMult +100 + + +@py_parameter_list +43271 + + +@py_stmt +372643 + + +@py_Assert +1999 + + +@py_Assign +151576 + + +@py_AugAssign +3656 + + +@py_Break +1699 + + +@py_Continue +1199 + + +@py_Delete +1149 + + +@py_ExceptStmt +5610 + + +@py_Expr_stmt +76750 + + +@py_For +11495 + + +@py_Global +392 + + +@py_If +53619 + + +@py_Import +14396 + + +@py_ImportStar +158 + + +@py_Nonlocal +35 + + +@py_Pass +2872 + + +@py_Raise +7794 + + +@py_Return +36127 + + +@py_Try +6210 + + +@py_While +2138 + + +@py_With +4193 + + +@py_Exec +43 + + +@py_Print +1032 + + +@py_TemplateWrite +100 + + +@py_AnnAssign +100 + + +@py_stmt_list +156700 + + +@py_str_list +427 + + +@py_unaryop +13295 + + +@py_Invert +107 + + +@py_Not +8655 + + +@py_UAdd +14 + + +@py_USub +4565 + + +@py_flow_node +2323431 + + +@py_ssa_var +272292 + + +@py_comment +77830 + + +@py_cobject +112856 + + +@xmldtd +100 + + +@xmlelement +100 + + +@xmlattribute +100 + + +@xmlnamespace +100 + + +@xmlcomment +100 + + +@xmlcharacters +100 + + + +externalDefects +100 + + +id +100 + + +queryPath +100 + + +location +100 + + +message +100 + + +severity +100 + + + + +id +queryPath + + +12 + + +1 +2 +2 + + + + + + +id +location + + +12 + + +1 +2 +2 + + + + + + +id +message + + +12 + + +1 +2 +2 + + + + + + +id +severity + + +12 + + +1 +2 +2 + + + + + + +queryPath +id + + +12 + + + + + +queryPath +location + + +12 + + + + + +queryPath +message + + +12 + + + + + +queryPath +severity + + +12 + + + + + +location +id + + +12 + + + + + +location +queryPath + + +12 + + + + + +location +message + + +12 + + + + + +location +severity + + +12 + + + + + +message +id + + +12 + + + + + +message +queryPath + + +12 + + + + + +message +location + + +12 + + + + + +message +severity + + +12 + + + + + +severity +id + + +12 + + + + + +severity +queryPath + + +12 + + + + + +severity +location + + +12 + + + + + +severity +message + + +12 + + + + + + + +externalMetrics +100 + + +id +100 + + +queryPath +100 + + +location +100 + + +value +100 + + + + +id +queryPath + + +12 + + +1 +2 +1 + + + + + + +id +location + + +12 + + +1 +2 +1 + + + + + + +id +value + + +12 + + +1 +2 +1 + + + + + + +queryPath +id + + +12 + + + + + +queryPath +location + + +12 + + + + + +queryPath +value + + +12 + + + + + +location +id + + +12 + + + + + +location +queryPath + + +12 + + + + + +location +value + + +12 + + + + + +value +id + + +12 + + + + + +value +queryPath + + +12 + + + + + +value +location + + +12 + + + + + + + +externalData +41 + + +id +20 + + +queryPath +2 + + +column +5 + + +data +41 + + + + +id +queryPath + + +12 + + +1 +2 +20 + + + + + + +id +column + + +12 + + +2 +3 +20 + + + + + + +id +data + + +12 + + +2 +3 +20 + + + + + + +queryPath +id + + +12 + + +7 +8 +2 + + + + + + +queryPath +column + + +12 + + +2 +3 +2 + + + + + + +queryPath +data + + +12 + + +14 +15 +2 + + + + + + +column +id + + +12 + + +7 +8 +5 + + + + + + +column +queryPath + + +12 + + +1 +2 +5 + + + + + + +column +data + + +12 + + +7 +8 +5 + + + + + + +data +id + + +12 + + +1 +2 +41 + + + + + + +data +queryPath + + +12 + + +1 +2 +41 + + + + + + +data +column + + +12 + + +1 +2 +41 + + + + + + + + +snapshotDate +2 + + +snapshotDate +2 + + + + + +sourceLocationPrefix +2 + + +prefix +2 + + + + + +duplicateCode +890 + + +id +890 + + +relativePath +91 + + +equivClass +415 + + + + +id +relativePath + + +12 + + +1 +2 +890 + + + + + + +id +equivClass + + +12 + + +1 +2 +890 + + + + + + +relativePath +id + + +12 + + +1 +2 +30 + + +2 +3 +16 + + +3 +4 +4 + + +4 +5 +8 + + +6 +8 +6 + + +8 +12 +6 + + +12 +19 +6 + + +23 +47 +6 + + +48 +109 +4 + + + + + + +relativePath +equivClass + + +12 + + +1 +2 +38 + + +2 +3 +12 + + +3 +4 +6 + + +4 +5 +8 + + +6 +10 +8 + + +10 +15 +6 + + +15 +46 +6 + + +92 +105 +2 + + + + + + +equivClass +id + + +12 + + +2 +3 +371 + + +3 +4 +31 + + +4 +7 +12 + + + + + + +equivClass +relativePath + + +12 + + +1 +2 +95 + + +2 +3 +288 + + +3 +5 +31 + + + + + + + + +similarCode +5591 + + +id +5591 + + +relativePath +347 + + +equivClass +1696 + + + + +id +relativePath + + +12 + + +1 +2 +5591 + + + + + + +id +equivClass + + +12 + + +1 +2 +5591 + + + + + + +relativePath +id + + +12 + + +1 +2 +44 + + +2 +3 +33 + + +3 +5 +31 + + +5 +7 +30 + + +7 +9 +18 + + +9 +11 +26 + + +11 +13 +26 + + +13 +18 +29 + + +18 +23 +29 + + +23 +30 +24 + + +30 +42 +26 + + +45 +155 +26 + + +161 +162 +1 + + + + + + +relativePath +equivClass + + +12 + + +1 +2 +66 + + +2 +3 +19 + + +3 +4 +20 + + +4 +5 +18 + + +5 +6 +18 + + +6 +8 +27 + + +8 +10 +30 + + +10 +13 +26 + + +13 +18 +26 + + +18 +23 +26 + + +23 +31 +31 + + +31 +53 +26 + + +54 +145 +9 + + + + + + +equivClass +id + + +12 + + +2 +3 +937 + + +3 +4 +260 + + +4 +5 +166 + + +5 +6 +88 + + +6 +8 +138 + + +8 +11 +105 + + + + + + +equivClass +relativePath + + +12 + + +1 +2 +358 + + +2 +3 +733 + + +3 +4 +216 + + +4 +5 +139 + + +5 +7 +110 + + +7 +10 +127 + + +10 +11 +9 + + + + + + + + +tokens +889686 + + +id +6481 + + +offset +10514 + + +beginLine +9882 + + +beginColumn +1197 + + +endLine +9882 + + +endColumn +1207 + + + + +id +offset + + +12 + + +100 +101 +394 + + +101 +102 +750 + + +102 +103 +347 + + +103 +104 +414 + + +104 +105 +405 + + +105 +107 +528 + + +107 +108 +414 + + +108 +111 +513 + + +111 +117 +555 + + +117 +127 +494 + + +127 +145 +490 + + +145 +176 +487 + + +176 +284 +488 + + +289 +7594 +196 + + + + + + +id +beginLine + + +12 + + +5 +9 +396 + + +9 +10 +299 + + +10 +11 +559 + + +11 +12 +432 + + +12 +13 +598 + + +13 +14 +747 + + +14 +15 +541 + + +15 +17 +564 + + +17 +20 +589 + + +20 +24 +573 + + +24 +28 +526 + + +28 +51 +498 + + +51 +1520 +155 + + + + + + +id +beginColumn + + +12 + + +9 +17 +516 + + +17 +22 +488 + + +22 +31 +563 + + +31 +37 +566 + + +37 +43 +585 + + +43 +46 +472 + + +46 +49 +591 + + +49 +51 +438 + + +51 +54 +571 + + +54 +56 +443 + + +56 +59 +484 + + +59 +68 +524 + + +68 +131 +234 + + + + + + +id +endLine + + +12 + + +5 +9 +396 + + +9 +10 +299 + + +10 +11 +559 + + +11 +12 +432 + + +12 +13 +598 + + +13 +14 +747 + + +14 +15 +541 + + +15 +17 +564 + + +17 +20 +589 + + +20 +24 +573 + + +24 +28 +526 + + +28 +51 +502 + + +51 +1520 +150 + + + + + + +id +endColumn + + +12 + + +10 +18 +450 + + +18 +23 +523 + + +23 +33 +531 + + +33 +39 +495 + + +39 +44 +504 + + +44 +48 +533 + + +48 +51 +544 + + +51 +54 +549 + + +54 +56 +492 + + +56 +58 +458 + + +58 +61 +508 + + +61 +67 +498 + + +67 +133 +391 + + + + + + +offset +id + + +12 + + +2 +3 +6935 + + +4 +5 +693 + + +6 +11 +706 + + +12 +15 +887 + + +16 +93 +790 + + +94 +4682 +499 + + + + + + +offset +beginLine + + +12 + + +2 +3 +6935 + + +4 +5 +693 + + +6 +11 +706 + + +12 +15 +891 + + +16 +91 +789 + + +91 +1817 +497 + + + + + + +offset +beginColumn + + +12 + + +1 +2 +6952 + + +2 +3 +722 + + +3 +5 +674 + + +5 +8 +969 + + +8 +41 +797 + + +41 +169 +397 + + + + + + +offset +endLine + + +12 + + +2 +3 +6935 + + +4 +5 +693 + + +6 +11 +706 + + +12 +15 +891 + + +16 +91 +789 + + +91 +1817 +497 + + + + + + +offset +endColumn + + +12 + + +1 +2 +6973 + + +2 +3 +696 + + +3 +6 +929 + + +6 +9 +801 + + +9 +57 +798 + + +57 +172 +314 + + + + + + +beginLine +id + + +12 + + +1 +2 +1613 + + +2 +3 +1931 + + +3 +4 +987 + + +4 +5 +650 + + +5 +7 +825 + + +7 +9 +744 + + +9 +12 +772 + + +12 +17 +836 + + +17 +37 +749 + + +37 +148 +742 + + +151 +217 +29 + + + + + + +beginLine +offset + + +12 + + +1 +4 +697 + + +4 +8 +882 + + +8 +11 +746 + + +11 +15 +883 + + +15 +20 +801 + + +20 +25 +756 + + +25 +32 +757 + + +32 +42 +743 + + +42 +55 +742 + + +55 +72 +778 + + +72 +98 +747 + + +98 +148 +751 + + +148 +211 +594 + + + + + + +beginLine +beginColumn + + +12 + + +1 +3 +749 + + +3 +6 +686 + + +6 +8 +605 + + +8 +10 +779 + + +10 +12 +733 + + +12 +14 +714 + + +14 +17 +726 + + +17 +21 +880 + + +21 +26 +872 + + +26 +32 +852 + + +32 +40 +810 + + +40 +54 +771 + + +54 +184 +699 + + + + + + +beginLine +endLine + + +12 + + +1 +2 +9740 + + +2 +4 +142 + + + + + + +beginLine +endColumn + + +12 + + +1 +3 +750 + + +3 +6 +666 + + +6 +8 +621 + + +8 +10 +722 + + +10 +12 +720 + + +12 +14 +699 + + +14 +17 +721 + + +17 +21 +890 + + +21 +26 +862 + + +26 +32 +839 + + +32 +40 +794 + + +40 +53 +790 + + +53 +81 +746 + + +81 +185 +56 + + + + + + +beginColumn +id + + +12 + + +1 +2 +389 + + +2 +3 +200 + + +3 +4 +80 + + +4 +7 +105 + + +7 +8 +90 + + +8 +11 +91 + + +11 +45 +91 + + +48 +2322 +90 + + +2328 +3928 +59 + + + + + + +beginColumn +offset + + +12 + + +1 +2 +404 + + +2 +3 +206 + + +3 +4 +65 + + +4 +7 +101 + + +7 +8 +88 + + +8 +11 +94 + + +11 +33 +90 + + +33 +345 +90 + + +360 +2645 +58 + + + + + + +beginColumn +beginLine + + +12 + + +1 +2 +628 + + +2 +3 +204 + + +3 +4 +90 + + +4 +10 +99 + + +10 +750 +90 + + +762 +5047 +84 + + + + + + +beginColumn +endLine + + +12 + + +1 +2 +628 + + +2 +3 +204 + + +3 +4 +90 + + +4 +10 +99 + + +10 +750 +90 + + +762 +5046 +84 + + + + + + +beginColumn +endColumn + + +12 + + +1 +2 +822 + + +2 +3 +152 + + +3 +6 +95 + + +6 +31 +92 + + +31 +99 +34 + + + + + + +endLine +id + + +12 + + +1 +2 +1613 + + +2 +3 +1931 + + +3 +4 +987 + + +4 +5 +652 + + +5 +7 +823 + + +7 +9 +744 + + +9 +12 +772 + + +12 +17 +836 + + +17 +37 +749 + + +37 +148 +742 + + +151 +217 +29 + + + + + + +endLine +offset + + +12 + + +1 +4 +702 + + +4 +8 +876 + + +8 +11 +749 + + +11 +15 +883 + + +15 +20 +801 + + +20 +25 +756 + + +25 +32 +753 + + +32 +42 +744 + + +42 +55 +743 + + +55 +72 +779 + + +72 +98 +746 + + +98 +148 +751 + + +148 +211 +594 + + + + + + +endLine +beginLine + + +12 + + +1 +2 +9734 + + +2 +3 +148 + + + + + + +endLine +beginColumn + + +12 + + +1 +3 +749 + + +3 +6 +685 + + +6 +8 +607 + + +8 +10 +782 + + +10 +12 +728 + + +12 +14 +714 + + +14 +17 +728 + + +17 +21 +880 + + +21 +26 +873 + + +26 +32 +851 + + +32 +40 +810 + + +40 +54 +771 + + +54 +184 +699 + + + + + + +endLine +endColumn + + +12 + + +1 +3 +750 + + +3 +6 +664 + + +6 +8 +625 + + +8 +10 +721 + + +10 +12 +718 + + +12 +14 +702 + + +14 +17 +721 + + +17 +21 +883 + + +21 +26 +862 + + +26 +32 +841 + + +32 +40 +797 + + +40 +53 +792 + + +53 +81 +743 + + +81 +185 +56 + + + + + + +endColumn +id + + +12 + + +1 +2 +391 + + +2 +3 +192 + + +3 +4 +84 + + +4 +7 +102 + + +7 +8 +92 + + +8 +11 +98 + + +11 +47 +91 + + +50 +2174 +91 + + +2189 +4114 +62 + + + + + + +endColumn +offset + + +12 + + +1 +2 +408 + + +2 +3 +193 + + +3 +4 +74 + + +4 +7 +95 + + +7 +8 +85 + + +8 +11 +103 + + +11 +36 +91 + + +37 +353 +91 + + +364 +1140 +62 + + + + + + +endColumn +beginLine + + +12 + + +1 +2 +625 + + +2 +3 +211 + + +3 +4 +84 + + +4 +8 +91 + + +8 +405 +91 + + +414 +3303 +91 + + +3320 +3523 +11 + + + + + + +endColumn +beginColumn + + +12 + + +1 +2 +812 + + +2 +3 +167 + + +3 +8 +95 + + +8 +33 +92 + + +33 +42 +38 + + + + + + +endColumn +endLine + + +12 + + +1 +2 +625 + + +2 +3 +211 + + +3 +4 +84 + + +4 +8 +91 + + +8 +405 +91 + + +414 +3303 +91 + + +3320 +3523 +11 + + + + + + + + +py_codelines +52985 + + +id +52985 + + +count +732 + + + + +id +count + + +12 + + +1 +2 +52985 + + + + + + +count +id + + +12 + + +1 +2 +307 + + +2 +3 +116 + + +3 +4 +59 + + +4 +6 +61 + + +6 +11 +62 + + +11 +28 +57 + + +28 +612 +55 + + +631 +13079 +15 + + + + + + + + +py_commentlines +52983 + + +id +52983 + + +count +198 + + + + +id +count + + +12 + + +1 +2 +52983 + + + + + + +count +id + + +12 + + +1 +2 +78 + + +2 +3 +26 + + +3 +4 +11 + + +4 +6 +16 + + +6 +10 +15 + + +10 +19 +15 + + +19 +48 +15 + + +49 +351 +15 + + +494 +40367 +7 + + + + + + + + +py_docstringlines +52983 + + +id +52983 + + +count +123 + + + + +id +count + + +12 + + +1 +2 +52983 + + + + + + +count +id + + +12 + + +1 +2 +20 + + +2 +3 +11 + + +3 +4 +9 + + +4 +5 +10 + + +5 +8 +11 + + +8 +13 +10 + + +14 +22 +11 + + +22 +29 +10 + + +29 +54 +10 + + +56 +175 +10 + + +232 +5368 +10 + + +36413 +36414 +1 + + + + + + + + +py_alllines +52983 + + +id +52983 + + +count +829 + + + + +id +count + + +12 + + +1 +2 +52983 + + + + + + +count +id + + +12 + + +1 +2 +361 + + +2 +3 +108 + + +3 +4 +68 + + +4 +5 +47 + + +5 +8 +69 + + +8 +17 +65 + + +17 +93 +64 + + +113 +9596 +47 + + + + + + + + +svnentries +100 + + +id +100 + + +revision +100 + + +author +100 + + +revisionDate +100 + + +changeSize +100 + + + + +id +revision + + +12 + + + + + +id +author + + +12 + + + + + +id +revisionDate + + +12 + + + + + +id +changeSize + + +12 + + + + + +revision +id + + +12 + + + + + +revision +author + + +12 + + + + + +revision +revisionDate + + +12 + + + + + +revision +changeSize + + +12 + + + + + +author +id + + +12 + + + + + +author +revision + + +12 + + + + + +author +revisionDate + + +12 + + + + + +author +changeSize + + +12 + + + + + +revisionDate +id + + +12 + + + + + +revisionDate +revision + + +12 + + + + + +revisionDate +author + + +12 + + + + + +revisionDate +changeSize + + +12 + + + + + +changeSize +id + + +12 + + + + + +changeSize +revision + + +12 + + + + + +changeSize +author + + +12 + + + + + +changeSize +revisionDate + + +12 + + + + + + + +svnaffectedfiles +100 + + +id +100 + + +file +100 + + +action +100 + + + + +id +file + + +12 + + + + + +id +action + + +12 + + + + + +file +id + + +12 + + + + + +file +action + + +12 + + + + + +action +id + + +12 + + + + + +action +file + + +12 + + + + + + + +svnentrymsg +100 + + +id +100 + + +message +100 + + + + +id +message + + +12 + + + + + +message +id + + +12 + + + + + + + +svnchurn +100 + + +commit +100 + + +file +100 + + +addedLines +100 + + +deletedLines +100 + + + + +commit +file + + +12 + + + + + +commit +addedLines + + +12 + + + + + +commit +deletedLines + + +12 + + + + + +file +commit + + +12 + + + + + +file +addedLines + + +12 + + + + + +file +deletedLines + + +12 + + + + + +addedLines +commit + + +12 + + + + + +addedLines +file + + +12 + + + + + +addedLines +deletedLines + + +12 + + + + + +deletedLines +commit + + +12 + + + + + +deletedLines +file + + +12 + + + + + +deletedLines +addedLines + + +12 + + + + + + + +files +3066 + + +id +3066 + + +name +3066 + + +simple +1294 + + +ext +1 + + +fromSource +1 + + + + +id +name + + +12 + + +1 +2 +3066 + + + + + + +id +simple + + +12 + + +1 +2 +3066 + + + + + + +id +ext + + +12 + + +1 +2 +3066 + + + + + + +id +fromSource + + +12 + + +1 +2 +3066 + + + + + + +name +id + + +12 + + +1 +2 +3066 + + + + + + +name +simple + + +12 + + +1 +2 +3066 + + + + + + +name +ext + + +12 + + +1 +2 +3066 + + + + + + +name +fromSource + + +12 + + +1 +2 +3066 + + + + + + +simple +id + + +12 + + +1 +2 +1058 + + +2 +3 +132 + + +3 +38 +98 + + +47 +646 +6 + + + + + + +simple +name + + +12 + + +1 +2 +1058 + + +2 +3 +132 + + +3 +38 +98 + + +47 +646 +6 + + + + + + +simple +ext + + +12 + + +1 +2 +1294 + + + + + + +simple +fromSource + + +12 + + +1 +2 +1294 + + + + + + +ext +id + + +12 + + +3066 +3067 +1 + + + + + + +ext +name + + +12 + + +3066 +3067 +1 + + + + + + +ext +simple + + +12 + + +1294 +1295 +1 + + + + + + +ext +fromSource + + +12 + + +1 +2 +1 + + + + + + +fromSource +id + + +12 + + +3066 +3067 +1 + + + + + + +fromSource +name + + +12 + + +3066 +3067 +1 + + + + + + +fromSource +simple + + +12 + + +1294 +1295 +1 + + + + + + +fromSource +ext + + +12 + + +1 +2 +1 + + + + + + + + +folders +686 + + +id +686 + + +name +686 + + +simple +538 + + + + +id +name + + +12 + + +1 +2 +686 + + + + + + +id +simple + + +12 + + +1 +2 +686 + + + + + + +name +id + + +12 + + +1 +2 +686 + + + + + + +name +simple + + +12 + + +1 +2 +686 + + + + + + +simple +id + + +12 + + +1 +2 +481 + + +2 +4 +45 + + +4 +27 +12 + + + + + + +simple +name + + +12 + + +1 +2 +481 + + +2 +4 +45 + + +4 +27 +12 + + + + + + + + +containerparent +3750 + + +parent +685 + + +child +3750 + + + + +parent +child + + +12 + + +1 +2 +53 + + +2 +3 +202 + + +3 +4 +176 + + +4 +5 +57 + + +5 +6 +34 + + +6 +8 +56 + + +8 +13 +54 + + +13 +149 +52 + + +204 +205 +1 + + + + + + +child +parent + + +12 + + +1 +2 +3750 + + + + + + + + +numlines +2553 + + +element_id +2553 + + +num_lines +687 + + +num_code +648 + + +num_comment +193 + + + + +element_id +num_lines + + +12 + + +1 +2 +2553 + + + + + + +element_id +num_code + + +12 + + +1 +2 +2553 + + + + + + +element_id +num_comment + + +12 + + +1 +2 +2553 + + + + + + +num_lines +element_id + + +12 + + +1 +2 +345 + + +2 +3 +129 + + +3 +4 +44 + + +4 +6 +57 + + +6 +11 +54 + + +11 +34 +52 + + +35 +60 +6 + + + + + + +num_lines +num_code + + +12 + + +1 +2 +348 + + +2 +3 +134 + + +3 +4 +46 + + +4 +5 +41 + + +5 +6 +39 + + +6 +9 +60 + + +9 +17 +19 + + + + + + +num_lines +num_comment + + +12 + + +1 +2 +348 + + +2 +3 +134 + + +3 +4 +46 + + +4 +5 +41 + + +5 +6 +39 + + +6 +9 +60 + + +9 +17 +19 + + + + + + +num_code +element_id + + +12 + + +1 +2 +319 + + +2 +3 +110 + + +3 +4 +53 + + +4 +6 +56 + + +6 +11 +54 + + +11 +36 +49 + + +36 +56 +7 + + + + + + +num_code +num_lines + + +12 + + +1 +2 +321 + + +2 +3 +110 + + +3 +4 +62 + + +4 +5 +38 + + +5 +7 +52 + + +7 +10 +51 + + +10 +14 +14 + + + + + + +num_code +num_comment + + +12 + + +1 +2 +321 + + +2 +3 +110 + + +3 +4 +62 + + +4 +5 +38 + + +5 +7 +52 + + +7 +10 +51 + + +10 +14 +14 + + + + + + +num_comment +element_id + + +12 + + +1 +2 +72 + + +2 +3 +29 + + +3 +4 +16 + + +4 +5 +15 + + +5 +8 +12 + + +8 +13 +15 + + +13 +29 +16 + + +30 +98 +15 + + +112 +578 +3 + + + + + + +num_comment +num_lines + + +12 + + +1 +2 +72 + + +2 +3 +29 + + +3 +4 +16 + + +4 +5 +15 + + +5 +8 +12 + + +8 +13 +15 + + +13 +26 +15 + + +27 +75 +16 + + +75 +112 +3 + + + + + + +num_comment +num_code + + +12 + + +1 +2 +72 + + +2 +3 +29 + + +3 +4 +16 + + +4 +5 +15 + + +5 +8 +12 + + +8 +13 +15 + + +13 +26 +15 + + +27 +75 +16 + + +75 +112 +3 + + + + + + + + +locations_default +100 + + +id +100 + + +file +100 + + +beginLine +100 + + +beginColumn +100 + + +endLine +100 + + +endColumn +100 + + + + +id +file + + +12 + + +1 +2 +2 + + + + + + +id +beginLine + + +12 + + +1 +2 +2 + + + + + + +id +beginColumn + + +12 + + +1 +2 +2 + + + + + + +id +endLine + + +12 + + +1 +2 +2 + + + + + + +id +endColumn + + +12 + + +1 +2 +2 + + + + + + +file +id + + +12 + + + + + +file +beginLine + + +12 + + + + + +file +beginColumn + + +12 + + + + + +file +endLine + + +12 + + + + + +file +endColumn + + +12 + + + + + +beginLine +id + + +12 + + + + + +beginLine +file + + +12 + + + + + +beginLine +beginColumn + + +12 + + + + + +beginLine +endLine + + +12 + + + + + +beginLine +endColumn + + +12 + + + + + +beginColumn +id + + +12 + + + + + +beginColumn +file + + +12 + + + + + +beginColumn +beginLine + + +12 + + + + + +beginColumn +endLine + + +12 + + + + + +beginColumn +endColumn + + +12 + + + + + +endLine +id + + +12 + + + + + +endLine +file + + +12 + + + + + +endLine +beginLine + + +12 + + + + + +endLine +beginColumn + + +12 + + + + + +endLine +endColumn + + +12 + + + + + +endColumn +id + + +12 + + + + + +endColumn +file + + +12 + + + + + +endColumn +beginLine + + +12 + + + + + +endColumn +beginColumn + + +12 + + + + + +endColumn +endLine + + +12 + + + + + + + +locations_ast +2310679 + + +id +2310679 + + +module +1527 + + +beginLine +12546 + + +beginColumn +2819 + + +endLine +12539 + + +endColumn +2939 + + + + +id +module + + +12 + + +1 +2 +2310679 + + + + + + +id +beginLine + + +12 + + +1 +2 +2310679 + + + + + + +id +beginColumn + + +12 + + +1 +2 +2310679 + + + + + + +id +endLine + + +12 + + +1 +2 +2310679 + + + + + + +id +endColumn + + +12 + + +1 +2 +2310679 + + + + + + +module +id + + +12 + + +1 +2 +288 + + +2 +30 +114 + + +30 +159 +114 + + +159 +276 +114 + + +279 +427 +116 + + +434 +716 +114 + + +719 +1003 +114 + + +1007 +1409 +116 + + +1426 +1860 +114 + + +1862 +2782 +114 + + +2798 +5578 +114 + + +5667 +58828 +87 + + + + + + +module +beginLine + + +12 + + +1 +2 +288 + + +2 +17 +116 + + +17 +42 +114 + + +42 +72 +116 + + +72 +113 +116 + + +114 +165 +116 + + +167 +231 +116 + + +232 +314 +114 + + +314 +411 +114 + + +413 +634 +114 + + +640 +1326 +114 + + +1326 +6932 +83 + + + + + + +module +beginColumn + + +12 + + +1 +2 +288 + + +2 +7 +114 + + +7 +29 +117 + + +29 +41 +119 + + +41 +49 +126 + + +49 +56 +137 + + +56 +60 +110 + + +60 +64 +123 + + +64 +68 +117 + + +68 +74 +127 + + +74 +91 +116 + + +91 +1405 +29 + + + + + + +module +endLine + + +12 + + +1 +2 +288 + + +2 +17 +117 + + +17 +43 +119 + + +44 +74 +121 + + +74 +117 +114 + + +117 +173 +114 + + +173 +238 +114 + + +238 +322 +114 + + +326 +421 +114 + + +421 +666 +116 + + +668 +1461 +114 + + +1472 +6948 +74 + + + + + + +module +endColumn + + +12 + + +1 +2 +288 + + +2 +18 +116 + + +18 +45 +114 + + +45 +59 +130 + + +59 +65 +131 + + +65 +69 +108 + + +69 +72 +109 + + +72 +75 +114 + + +75 +79 +121 + + +79 +86 +120 + + +86 +99 +120 + + +99 +1425 +51 + + + + + + +beginLine +id + + +12 + + +1 +8 +783 + + +8 +11 +960 + + +11 +15 +1027 + + +15 +20 +1012 + + +20 +27 +1050 + + +27 +36 +995 + + +36 +49 +1003 + + +49 +66 +977 + + +66 +107 +951 + + +107 +170 +949 + + +170 +297 +947 + + +297 +636 +941 + + +637 +2279 +941 + + +2283 +2351 +2 + + + + + + +beginLine +module + + +12 + + +1 +2 +1188 + + +2 +3 +1761 + + +3 +4 +510 + + +4 +5 +792 + + +5 +6 +792 + + +6 +9 +1114 + + +9 +11 +726 + + +11 +14 +1084 + + +14 +25 +955 + + +25 +42 +942 + + +42 +71 +976 + + +71 +177 +942 + + +177 +1104 +758 + + + + + + +beginLine +beginColumn + + +12 + + +1 +6 +995 + + +6 +8 +486 + + +8 +9 +780 + + +9 +11 +1091 + + +11 +13 +952 + + +13 +16 +1093 + + +16 +19 +954 + + +19 +23 +1128 + + +23 +29 +954 + + +29 +38 +972 + + +38 +47 +980 + + +47 +59 +976 + + +59 +75 +984 + + +75 +542 +196 + + + + + + +beginLine +endLine + + +12 + + +1 +2 +3511 + + +2 +3 +3490 + + +3 +4 +1501 + + +4 +5 +767 + + +5 +7 +1110 + + +7 +10 +988 + + +10 +17 +1010 + + +17 +51 +166 + + + + + + +beginLine +endColumn + + +12 + + +1 +5 +672 + + +5 +7 +785 + + +7 +9 +868 + + +9 +12 +1028 + + +12 +16 +1156 + + +16 +20 +952 + + +20 +25 +1052 + + +25 +30 +983 + + +30 +40 +1003 + + +40 +52 +959 + + +52 +64 +1026 + + +64 +74 +951 + + +74 +89 +965 + + +89 +546 +141 + + + + + + +beginColumn +id + + +12 + + +1 +2 +1542 + + +2 +3 +877 + + +3 +5 +213 + + +5 +250154 +185 + + + + + + +beginColumn +module + + +12 + + +1 +2 +2376 + + +2 +3 +238 + + +3 +1104 +204 + + + + + + +beginColumn +beginLine + + +12 + + +1 +2 +1542 + + +2 +3 +882 + + +3 +6 +220 + + +6 +7984 +174 + + + + + + +beginColumn +endLine + + +12 + + +1 +2 +1542 + + +2 +3 +882 + + +3 +6 +220 + + +6 +7972 +174 + + + + + + +beginColumn +endColumn + + +12 + + +1 +2 +2295 + + +2 +3 +304 + + +3 +114 +211 + + +120 +161 +6 + + + + + + +endLine +id + + +12 + + +1 +8 +793 + + +8 +11 +965 + + +11 +15 +996 + + +15 +20 +1005 + + +20 +27 +1056 + + +27 +36 +1016 + + +36 +49 +981 + + +49 +65 +966 + + +65 +106 +956 + + +106 +169 +951 + + +169 +295 +947 + + +295 +626 +941 + + +627 +2214 +941 + + +2217 +2349 +19 + + + + + + +endLine +module + + +12 + + +1 +2 +1210 + + +2 +3 +1754 + + +3 +4 +526 + + +4 +5 +797 + + +5 +6 +760 + + +6 +9 +1109 + + +9 +11 +732 + + +11 +14 +1078 + + +14 +25 +947 + + +25 +42 +956 + + +42 +70 +942 + + +70 +170 +941 + + +170 +1104 +782 + + + + + + +endLine +beginLine + + +12 + + +1 +2 +4048 + + +2 +3 +3046 + + +3 +4 +1345 + + +4 +5 +851 + + +5 +7 +1021 + + +7 +10 +1010 + + +10 +17 +1010 + + +17 +34 +203 + + + + + + +endLine +beginColumn + + +12 + + +1 +6 +999 + + +6 +9 +1140 + + +9 +11 +1056 + + +11 +13 +933 + + +13 +16 +1154 + + +16 +19 +992 + + +19 +23 +1129 + + +23 +29 +999 + + +29 +38 +981 + + +38 +47 +983 + + +47 +59 +985 + + +59 +75 +988 + + +75 +542 +192 + + + + + + +endLine +endColumn + + +12 + + +1 +6 +1045 + + +6 +8 +1010 + + +8 +11 +1073 + + +11 +14 +933 + + +14 +18 +1055 + + +18 +23 +1084 + + +23 +28 +1020 + + +28 +36 +984 + + +36 +48 +999 + + +48 +60 +991 + + +60 +70 +959 + + +70 +84 +963 + + +84 +547 +418 + + + + + + +endColumn +id + + +12 + + +1 +2 +1505 + + +2 +3 +972 + + +3 +5 +227 + + +5 +41083 +221 + + +42453 +55223 +13 + + + + + + +endColumn +module + + +12 + + +1 +2 +2435 + + +2 +3 +264 + + +3 +782 +221 + + +782 +1104 +18 + + + + + + +endColumn +beginLine + + +12 + + +1 +2 +1606 + + +2 +3 +902 + + +3 +6 +228 + + +6 +6777 +202 + + + + + + +endColumn +beginColumn + + +12 + + +1 +2 +2250 + + +2 +3 +408 + + +3 +56 +221 + + +56 +79 +59 + + + + + + +endColumn +endLine + + +12 + + +1 +2 +1606 + + +2 +3 +902 + + +3 +6 +228 + + +6 +6726 +202 + + + + + + + + +py_module_path +3066 + + +module +3066 + + +file +3066 + + + + +module +file + + +12 + + +1 +2 +3066 + + + + + + +file +module + + +12 + + +1 +2 +3066 + + + + + + + + +file_contents +100 + + +file +3066 + + +contents +100 + + + + +file +contents + + +12 + + +1 +2 +100 + + + + + + +contents +file + + +12 + + +1 +2 +100 + + + + + + + + +variable +242770 + + +id +242770 + + +scope +50174 + + +name +54891 + + + + +id +scope + + +12 + + +1 +2 +242770 + + + + + + +id +name + + +12 + + +1 +2 +242770 + + + + + + +scope +id + + +12 + + +1 +2 +10764 + + +2 +3 +14394 + + +3 +4 +7657 + + +4 +5 +4580 + + +5 +6 +2991 + + +6 +9 +4606 + + +9 +22 +3819 + + +22 +233 +1360 + + + + + + +scope +name + + +12 + + +1 +2 +10764 + + +2 +3 +14394 + + +3 +4 +7657 + + +4 +5 +4580 + + +5 +6 +2991 + + +6 +9 +4606 + + +9 +22 +3819 + + +22 +233 +1360 + + + + + + +name +id + + +12 + + +1 +2 +36525 + + +2 +3 +8506 + + +3 +5 +4396 + + +5 +20 +4134 + + +20 +10542 +1327 + + + + + + +name +scope + + +12 + + +1 +2 +36525 + + +2 +3 +8506 + + +3 +5 +4396 + + +5 +20 +4134 + + +20 +10542 +1327 + + + + + + + + +py_line_lengths +100 + + +id +100 + + +file +100 + + +line +100 + + +length +100 + + + + +id +file + + +12 + + +1 +2 +2 + + + + + + +id +line + + +12 + + +1 +2 +2 + + + + + + +id +length + + +12 + + +1 +2 +2 + + + + + + +file +id + + +12 + + + + + +file +line + + +12 + + + + + +file +length + + +12 + + + + + +line +id + + +12 + + + + + +line +file + + +12 + + + + + +line +length + + +12 + + + + + +length +id + + +12 + + + + + +length +file + + +12 + + + + + +length +line + + +12 + + + + + + + +py_Classes +10244 + + +id +10244 + + +parent +10244 + + + + +id +parent + + +12 + + +1 +2 +10244 + + + + + + +parent +id + + +12 + + +1 +2 +10244 + + + + + + + + +py_Functions +44860 + + +id +44860 + + +parent +44860 + + + + +id +parent + + +12 + + +1 +2 +44860 + + + + + + +parent +id + + +12 + + +1 +2 +44860 + + + + + + + + +py_Modules +5983 + + +id +5983 + + + + + +py_extracted_version +3337 + + +module +3337 + + +version +1 + + + + +module +version + + +12 + + +1 +2 +3337 + + + + + + +version +module + + +12 + + +3337 +3338 +1 + + + + + + + + +py_StringParts +6399 + + +id +6399 + + +parent +2296 + + +idx +62 + + + + +id +parent + + +12 + + +1 +2 +6399 + + + + + + +id +idx + + +12 + + +1 +2 +6399 + + + + + + +parent +id + + +12 + + +2 +3 +1598 + + +3 +4 +380 + + +4 +5 +142 + + +5 +63 +176 + + + + + + +parent +idx + + +12 + + +2 +3 +1598 + + +3 +4 +380 + + +4 +5 +142 + + +5 +63 +176 + + + + + + +idx +id + + +12 + + +4 +5 +17 + + +5 +6 +23 + + +6 +9 +5 + + +9 +14 +5 + + +16 +59 +5 + + +72 +699 +5 + + +2296 +2297 +2 + + + + + + +idx +parent + + +12 + + +4 +5 +17 + + +5 +6 +23 + + +6 +9 +5 + + +9 +14 +5 + + +16 +59 +5 + + +72 +699 +5 + + +2296 +2297 +2 + + + + + + + + +py_StringPart_lists +2296 + + +id +2296 + + +parent +2296 + + + + +id +parent + + +12 + + +1 +2 +2296 + + + + + + +parent +id + + +12 + + +1 +2 +2296 + + + + + + + + +py_aliases +21374 + + +id +21374 + + +parent +14396 + + +idx +110 + + + + +id +parent + + +12 + + +1 +2 +21374 + + + + + + +id +idx + + +12 + + +1 +2 +21374 + + + + + + +parent +id + + +12 + + +1 +2 +11488 + + +2 +3 +1597 + + +3 +7 +1116 + + +7 +111 +195 + + + + + + +parent +idx + + +12 + + +1 +2 +11488 + + +2 +3 +1597 + + +3 +7 +1116 + + +7 +111 +195 + + + + + + +idx +id + + +12 + + +1 +2 +21 + + +2 +3 +2 + + +3 +4 +30 + + +4 +5 +4 + + +5 +6 +9 + + +6 +9 +10 + + +9 +15 +8 + + +18 +32 +9 + + +36 +113 +9 + + +142 +14397 +8 + + + + + + +idx +parent + + +12 + + +1 +2 +21 + + +2 +3 +2 + + +3 +4 +30 + + +4 +5 +4 + + +5 +6 +9 + + +6 +9 +10 + + +9 +15 +8 + + +18 +32 +9 + + +36 +113 +9 + + +142 +14397 +8 + + + + + + + + +py_alias_lists +14396 + + +id +14396 + + +parent +14396 + + + + +id +parent + + +12 + + +1 +2 +14396 + + + + + + +parent +id + + +12 + + +1 +2 +14396 + + + + + + + + +py_arguments +41982 + + +id +41982 + + +parent +41982 + + + + +id +parent + + +12 + + +1 +2 +41982 + + + + + + +parent +id + + +12 + + +1 +2 +41982 + + + + + + + + +py_bools +26986 + + +parent +26986 + + +idx +3 + + + + +parent +idx + + +12 + + +1 +2 +26986 + + + + + + +idx +parent + + +12 + + +964 +965 +1 + + +3487 +3488 +1 + + +22535 +22536 +1 + + + + + + + + +py_boolops +10907 + + +id +10907 + + +kind +2 + + +parent +10907 + + + + +id +kind + + +12 + + +1 +2 +10907 + + + + + + +id +parent + + +12 + + +1 +2 +10907 + + + + + + +kind +id + + +12 + + +2646 +2647 +1 + + +5231 +5232 +1 + + + + + + +kind +parent + + +12 + + +2646 +2647 +1 + + +5231 +5232 +1 + + + + + + +parent +id + + +12 + + +1 +2 +10907 + + + + + + +parent +kind + + +12 + + +1 +2 +10907 + + + + + + + + +py_bytes +211200 + + +id +48658 + + +parent +105600 + + +idx +2 + + + + +id +parent + + +12 + + +1 +2 +37453 + + +2 +3 +6003 + + +3 +8 +3791 + + +8 +71667 +1411 + + + + + + +id +idx + + +12 + + +1 +2 +48644 + + +2 +3 +14 + + + + + + +parent +id + + +12 + + +1 +2 +14 + + +2 +3 +105586 + + + + + + +parent +idx + + +12 + + +2 +3 +105600 + + + + + + +idx +id + + +12 + + +14 +15 +1 + + +48658 +48659 +1 + + + + + + +idx +parent + + +12 + + +105600 +105601 +2 + + + + + + + + +py_cmpops +38007 + + +id +38007 + + +kind +29 + + +parent +37666 + + +idx +8 + + + + +id +kind + + +12 + + +1 +2 +38007 + + + + + + +id +parent + + +12 + + +1 +2 +38007 + + + + + + +id +idx + + +12 + + +1 +2 +38007 + + + + + + +kind +id + + +12 + + +380 +381 +2 + + +440 +441 +2 + + +563 +564 +2 + + +615 +616 +2 + + +673 +674 +2 + + +1027 +1028 +2 + + +1529 +1530 +2 + + +1597 +1598 +2 + + +2144 +2145 +2 + + +3828 +3829 +2 + + + + + + +kind +parent + + +12 + + +317 +318 +2 + + +439 +440 +2 + + +563 +564 +2 + + +612 +613 +2 + + +669 +670 +2 + + +1027 +1028 +2 + + +1529 +1530 +2 + + +1597 +1598 +2 + + +2144 +2145 +2 + + +3819 +3820 +2 + + + + + + +kind +idx + + +12 + + +1 +2 +11 + + +2 +3 +14 + + +3 +4 +2 + + + + + + +parent +id + + +12 + + +1 +2 +37330 + + +2 +4 +335 + + + + + + +parent +kind + + +12 + + +1 +2 +37562 + + +2 +3 +103 + + + + + + +parent +idx + + +12 + + +1 +2 +37330 + + +2 +4 +335 + + + + + + +idx +id + + +12 + + +2 +3 +2 + + +113 +114 +2 + + +12681 +12682 +2 + + + + + + +idx +kind + + +12 + + +1 +2 +2 + + +6 +7 +2 + + +10 +11 +2 + + + + + + +idx +parent + + +12 + + +2 +3 +2 + + +113 +114 +2 + + +12681 +12682 +2 + + + + + + + + +py_cmpop_lists +37666 + + +id +37666 + + +parent +37666 + + + + +id +parent + + +12 + + +1 +2 +37666 + + + + + + +parent +id + + +12 + + +1 +2 +37666 + + + + + + + + +py_comprehensions +1688 + + +id +1688 + + +parent +1682 + + +idx +2 + + + + +id +parent + + +12 + + +1 +2 +1688 + + + + + + +id +idx + + +12 + + +1 +2 +1688 + + + + + + +parent +id + + +12 + + +1 +2 +1676 + + +2 +3 +6 + + + + + + +parent +idx + + +12 + + +1 +2 +1676 + + +2 +3 +6 + + + + + + +idx +id + + +12 + + +6 +7 +1 + + +1682 +1683 +1 + + + + + + +idx +parent + + +12 + + +6 +7 +1 + + +1682 +1683 +1 + + + + + + + + +py_comprehension_lists +1682 + + +id +1682 + + +parent +1682 + + + + +id +parent + + +12 + + +1 +2 +1682 + + + + + + +parent +id + + +12 + + +1 +2 +1682 + + + + + + + + +py_dict_items +167901 + + +id +167901 + + +kind +4 + + +parent +19804 + + +idx +7730 + + + + +id +kind + + +12 + + +1 +2 +167901 + + + + + + +id +parent + + +12 + + +1 +2 +167901 + + + + + + +id +idx + + +12 + + +1 +2 +167901 + + + + + + +kind +id + + +12 + + +326 +327 +1 + + +53883 +53884 +1 + + +67045 +67046 +1 + + + + + + +kind +parent + + +12 + + +326 +327 +1 + + +1881 +1882 +1 + + +12123 +12124 +1 + + + + + + +kind +idx + + +12 + + +7 +8 +1 + + +18 +19 +1 + + +5583 +5584 +1 + + + + + + +parent +id + + +12 + + +1 +2 +5811 + + +2 +3 +1851 + + +3 +6 +1700 + + +6 +7 +8083 + + +7 +12 +1826 + + +12 +5584 +530 + + + + + + +parent +kind + + +12 + + +1 +2 +19765 + + +2 +3 +38 + + + + + + +parent +idx + + +12 + + +1 +2 +5811 + + +2 +3 +1851 + + +3 +6 +1700 + + +6 +7 +8083 + + +7 +12 +1826 + + +12 +5584 +530 + + + + + + +idx +id + + +12 + + +1 +2 +1654 + + +2 +3 +1982 + + +3 +4 +811 + + +4 +6 +192 + + +6 +7 +753 + + +7 +8 +962 + + +8 +20 +610 + + +20 +69 +584 + + +69 +14303 +178 + + + + + + +idx +kind + + +12 + + +1 +2 +7705 + + +2 +4 +24 + + + + + + +idx +parent + + +12 + + +1 +2 +1654 + + +2 +3 +1982 + + +3 +4 +811 + + +4 +6 +192 + + +6 +7 +753 + + +7 +8 +962 + + +8 +20 +610 + + +20 +69 +584 + + +69 +14303 +178 + + + + + + + + +py_dict_item_lists +33758 + + +id +33758 + + +parent +33758 + + + + +id +parent + + +12 + + +1 +2 +33758 + + + + + + +parent +id + + +12 + + +1 +2 +33758 + + + + + + + + +py_exprs +1684031 + + +id +1684031 + + +kind +89 + + +parent +1380134 + + +idx +597 + + + + +id +kind + + +12 + + +1 +2 +1684031 + + + + + + +id +parent + + +12 + + +1 +2 +1684031 + + + + + + +id +idx + + +12 + + +1 +2 +1684031 + + + + + + +kind +id + + +12 + + +15 +28 +5 + + +39 +89 +5 + + +134 +189 +5 + + +281 +360 +5 + + +426 +570 +5 + + +1056 +1205 +5 + + +1327 +1791 +5 + + +1942 +3179 +5 + + +3398 +4019 +5 + + +4476 +4980 +5 + + +8519 +9720 +5 + + +10633 +12682 +5 + + +13945 +16376 +5 + + +46173 +58988 +5 + + +75624 +284809 +5 + + + + + + +kind +parent + + +12 + + +15 +28 +5 + + +39 +87 +5 + + +134 +175 +5 + + +271 +359 +5 + + +426 +560 +5 + + +1036 +1119 +5 + + +1327 +1791 +5 + + +1942 +3179 +5 + + +3357 +3716 +5 + + +4285 +4980 +5 + + +8177 +9473 +5 + + +10060 +11624 +5 + + +13945 +15094 +5 + + +35526 +57772 +5 + + +72662 +245283 +5 + + + + + + +kind +idx + + +12 + + +1 +2 +8 + + +2 +3 +17 + + +3 +4 +2 + + +5 +6 +5 + + +6 +7 +11 + + +8 +9 +2 + + +9 +10 +5 + + +11 +12 +5 + + +12 +13 +5 + + +15 +18 +5 + + +23 +27 +5 + + +37 +127 +5 + + +201 +202 +2 + + + + + + +parent +id + + +12 + + +1 +2 +1147073 + + +2 +3 +197316 + + +3 +202 +35744 + + + + + + +parent +kind + + +12 + + +1 +2 +1255206 + + +2 +3 +120198 + + +3 +11 +4728 + + + + + + +parent +idx + + +12 + + +1 +2 +1147073 + + +2 +3 +197316 + + +3 +202 +35744 + + + + + + +idx +id + + +12 + + +1 +2 +23 + + +2 +3 +199 + + +3 +4 +148 + + +4 +6 +35 + + +6 +8 +50 + + +9 +26 +47 + + +26 +102 +47 + + +113 +197687 +44 + + + + + + +idx +kind + + +12 + + +1 +2 +222 + + +2 +3 +258 + + +3 +4 +8 + + +4 +5 +47 + + +5 +21 +47 + + +22 +29 +11 + + + + + + +idx +parent + + +12 + + +1 +2 +23 + + +2 +3 +199 + + +3 +4 +148 + + +4 +6 +35 + + +6 +8 +50 + + +9 +26 +47 + + +26 +102 +47 + + +113 +197687 +44 + + + + + + + + +py_expr_contexts +1140675 + + +id +1140675 + + +kind +11 + + +parent +1140675 + + + + +id +kind + + +12 + + +1 +2 +1140675 + + + + + + +id +parent + + +12 + + +1 +2 +1140675 + + + + + + +kind +id + + +12 + + +446 +447 +2 + + +29477 +29478 +2 + + +66896 +66897 +2 + + +287209 +287210 +2 + + + + + + +kind +parent + + +12 + + +446 +447 +2 + + +29477 +29478 +2 + + +66896 +66897 +2 + + +287209 +287210 +2 + + + + + + +parent +id + + +12 + + +1 +2 +1140675 + + + + + + +parent +kind + + +12 + + +1 +2 +1140675 + + + + + + + + +py_expr_lists +430986 + + +id +430986 + + +parent +423623 + + +idx +17 + + + + +id +parent + + +12 + + +1 +2 +430986 + + + + + + +id +idx + + +12 + + +1 +2 +430986 + + + + + + +parent +id + + +12 + + +1 +2 +416966 + + +2 +5 +6656 + + + + + + +parent +idx + + +12 + + +1 +2 +416966 + + +2 +5 +6656 + + + + + + +idx +id + + +12 + + +175 +176 +5 + + +2522 +2523 +2 + + +12681 +12682 +2 + + +54095 +54096 +2 + + +75451 +75452 +2 + + + + + + +idx +parent + + +12 + + +175 +176 +5 + + +2522 +2523 +2 + + +12681 +12682 +2 + + +54095 +54096 +2 + + +75451 +75452 +2 + + + + + + + + +py_ints +21532 + + +id +4 + + +parent +21532 + + + + +id +parent + + +12 + + +2 +3 +1 + + +207 +208 +1 + + +2770 +2771 +1 + + +18553 +18554 +1 + + + + + + +parent +id + + +12 + + +1 +2 +21532 + + + + + + + + +py_locations +2184728 + + +id +2184728 + + +parent +2184728 + + + + +id +parent + + +12 + + +1 +2 +2184728 + + + + + + +parent +id + + +12 + + +1 +2 +2184728 + + + + + + + + +py_numbers +117446 + + +id +4249 + + +parent +58723 + + +idx +2 + + + + +id +parent + + +12 + + +1 +2 +2830 + + +2 +3 +632 + + +3 +4 +291 + + +4 +11 +320 + + +11 +15704 +176 + + + + + + +id +idx + + +12 + + +1 +2 +1355 + + +2 +3 +2894 + + + + + + +parent +id + + +12 + + +1 +2 +57251 + + +2 +3 +1472 + + + + + + +parent +idx + + +12 + + +2 +3 +58723 + + + + + + +idx +id + + +12 + + +3302 +3303 +1 + + +3841 +3842 +1 + + + + + + +idx +parent + + +12 + + +58723 +58724 +2 + + + + + + + + +py_operators +28868 + + +id +28868 + + +kind +35 + + +parent +28868 + + + + +id +kind + + +12 + + +1 +2 +28868 + + + + + + +id +parent + + +12 + + +1 +2 +28868 + + + + + + +kind +id + + +12 + + +53 +54 +2 + + +64 +65 +2 + + +94 +95 +2 + + +121 +122 +2 + + +122 +123 +2 + + +169 +170 +2 + + +268 +269 +2 + + +269 +270 +2 + + +747 +748 +2 + + +1056 +1057 +2 + + +2176 +2177 +2 + + +4580 +4581 +2 + + + + + + +kind +parent + + +12 + + +53 +54 +2 + + +64 +65 +2 + + +94 +95 +2 + + +121 +122 +2 + + +122 +123 +2 + + +169 +170 +2 + + +268 +269 +2 + + +269 +270 +2 + + +747 +748 +2 + + +1056 +1057 +2 + + +2176 +2177 +2 + + +4580 +4581 +2 + + + + + + +parent +id + + +12 + + +1 +2 +28868 + + + + + + +parent +kind + + +12 + + +1 +2 +28868 + + + + + + + + +py_parameter_lists +43271 + + +id +43271 + + +parent +43271 + + + + +id +parent + + +12 + + +1 +2 +43271 + + + + + + +parent +id + + +12 + + +1 +2 +43271 + + + + + + + + +py_stmts +372643 + + +id +372643 + + +kind +59 + + +parent +156700 + + +idx +888 + + + + +id +kind + + +12 + + +1 +2 +372643 + + + + + + +id +parent + + +12 + + +1 +2 +372643 + + + + + + +id +idx + + +12 + + +1 +2 +372643 + + + + + + +kind +id + + +12 + + +12 +13 +2 + + +47 +48 +2 + + +132 +133 +2 + + +387 +388 +2 + + +404 +405 +2 + + +559 +560 +2 + + +572 +573 +2 + + +673 +674 +2 + + +720 +721 +2 + + +967 +968 +2 + + +1231 +1232 +2 + + +1889 +1890 +2 + + +2091 +2092 +2 + + +2624 +2625 +2 + + +3001 +3002 +2 + + +3870 +3871 +2 + + +12163 +12164 +2 + + +18052 +18053 +2 + + +25032 +25033 +2 + + +51031 +51032 +2 + + + + + + +kind +parent + + +12 + + +12 +13 +2 + + +37 +38 +2 + + +123 +124 +2 + + +356 +357 +2 + + +404 +405 +2 + + +471 +472 +2 + + +557 +558 +2 + + +572 +573 +2 + + +677 +678 +2 + + +967 +968 +2 + + +984 +985 +2 + + +1094 +1095 +2 + + +1777 +1778 +2 + + +1895 +1896 +2 + + +2624 +2625 +2 + + +3544 +3545 +2 + + +12163 +12164 +2 + + +12758 +12759 +2 + + +18445 +18446 +2 + + +20426 +20427 +2 + + + + + + +kind +idx + + +12 + + +2 +3 +5 + + +6 +7 +2 + + +7 +8 +5 + + +8 +9 +2 + + +13 +14 +2 + + +15 +16 +2 + + +18 +19 +5 + + +21 +22 +2 + + +27 +28 +2 + + +33 +34 +2 + + +37 +38 +2 + + +38 +39 +2 + + +42 +43 +2 + + +51 +52 +2 + + +84 +85 +2 + + +187 +188 +2 + + +293 +294 +2 + + + + + + +parent +id + + +12 + + +1 +2 +96284 + + +2 +3 +25704 + + +3 +4 +11789 + + +4 +7 +14376 + + +7 +300 +8545 + + + + + + +parent +kind + + +12 + + +1 +2 +106000 + + +2 +3 +31003 + + +3 +4 +12071 + + +4 +9 +7624 + + + + + + +parent +idx + + +12 + + +1 +2 +96284 + + +2 +3 +25704 + + +3 +4 +11789 + + +4 +7 +14376 + + +7 +300 +8545 + + + + + + +idx +id + + +12 + + +1 +2 +335 + + +2 +5 +59 + + +5 +6 +83 + + +6 +14 +74 + + +14 +25 +68 + + +25 +53 +68 + + +53 +103 +68 + + +107 +335 +68 + + +369 +52757 +62 + + + + + + +idx +kind + + +12 + + +1 +2 +344 + + +2 +3 +267 + + +3 +4 +83 + + +4 +5 +62 + + +5 +10 +71 + + +10 +21 +59 + + + + + + +idx +parent + + +12 + + +1 +2 +335 + + +2 +5 +59 + + +5 +6 +83 + + +6 +14 +74 + + +14 +25 +68 + + +25 +53 +68 + + +53 +103 +68 + + +107 +335 +68 + + +369 +52757 +62 + + + + + + + + +py_stmt_lists +156700 + + +id +156700 + + +parent +132647 + + +idx +14 + + + + +id +parent + + +12 + + +1 +2 +156700 + + + + + + +id +idx + + +12 + + +1 +2 +156700 + + + + + + +parent +id + + +12 + + +1 +2 +109538 + + +2 +3 +22179 + + +3 +5 +929 + + + + + + +parent +idx + + +12 + + +1 +2 +109538 + + +2 +3 +22179 + + +3 +5 +929 + + + + + + +idx +id + + +12 + + +460 +461 +2 + + +4033 +4034 +2 + + +13686 +13687 +2 + + +15103 +15104 +2 + + +19474 +19475 +2 + + + + + + +idx +parent + + +12 + + +460 +461 +2 + + +4033 +4034 +2 + + +13686 +13687 +2 + + +15103 +15104 +2 + + +19474 +19475 +2 + + + + + + + + +py_strs +985327 + + +id +140335 + + +parent +695288 + + +idx +5 + + + + +id +parent + + +12 + + +1 +2 +79968 + + +2 +3 +31802 + + +3 +4 +9602 + + +4 +8 +11026 + + +8 +143732 +7935 + + + + + + +id +idx + + +12 + + +1 +2 +106110 + + +2 +3 +22027 + + +3 +4 +12190 + + +4 +5 +6 + + + + + + +parent +id + + +12 + + +1 +2 +405951 + + +2 +3 +289317 + + +3 +5 +19 + + + + + + +parent +idx + + +12 + + +1 +2 +405275 + + +2 +3 +289993 + + +3 +5 +19 + + + + + + +idx +id + + +12 + + +34 +35 +1 + + +17059 +17060 +1 + + +25371 +25372 +1 + + +92414 +92415 +1 + + + + + + +idx +parent + + +12 + + +42 +43 +1 + + +37559 +37560 +1 + + +294366 +294367 +1 + + +379612 +379613 +1 + + + + + + + + +py_str_lists +427 + + +id +427 + + +parent +427 + + + + +id +parent + + +12 + + +1 +2 +427 + + + + + + +parent +id + + +12 + + +1 +2 +427 + + + + + + + + +py_unaryops +13295 + + +id +13295 + + +kind +11 + + +parent +13295 + + + + +id +kind + + +12 + + +1 +2 +13295 + + + + + + +id +parent + + +12 + + +1 +2 +13295 + + + + + + +kind +id + + +12 + + +5 +6 +2 + + +20 +21 +2 + + +1537 +1538 +2 + + +2914 +2915 +2 + + + + + + +kind +parent + + +12 + + +5 +6 +2 + + +20 +21 +2 + + +1537 +1538 +2 + + +2914 +2915 +2 + + + + + + +parent +id + + +12 + + +1 +2 +13295 + + + + + + +parent +kind + + +12 + + +1 +2 +13295 + + + + + + + + +py_variables +845963 + + +id +242770 + + +parent +845963 + + + + +id +parent + + +12 + + +1 +2 +61149 + + +2 +3 +77254 + + +3 +4 +38584 + + +4 +5 +21392 + + +5 +7 +20913 + + +7 +15 +18418 + + +15 +318 +5058 + + + + + + +parent +id + + +12 + + +1 +2 +845963 + + + + + + + + +py_absolute_names +100 + + +module +100 + + +relname +100 + + +absname +100 + + + + +module +relname + + +12 + + + + + +module +absname + + +12 + + + + + +relname +module + + +12 + + + + + +relname +absname + + +12 + + + + + +absname +module + + +12 + + + + + +absname +relname + + +12 + + + + + + + +py_exports +19755 + + +id +1138 + + +name +16813 + + + + +id +name + + +12 + + +1 +2 +141 + + +2 +3 +164 + + +3 +4 +109 + + +4 +5 +112 + + +5 +7 +103 + + +7 +10 +91 + + +10 +14 +88 + + +14 +20 +90 + + +20 +33 +94 + + +33 +53 +90 + + +53 +2260 +52 + + + + + + +name +id + + +12 + + +1 +2 +16070 + + +2 +143 +742 + + + + + + + + +py_successors +2366367 + + +predecessor +2270167 + + +successor +2275369 + + + + +predecessor +successor + + +12 + + +1 +2 +2177926 + + +2 +9 +92240 + + + + + + +successor +predecessor + + +12 + + +1 +2 +2225590 + + +2 +173 +49778 + + + + + + + + +py_true_successors +70315 + + +predecessor +70315 + + +successor +67897 + + + + +predecessor +successor + + +12 + + +1 +2 +70315 + + + + + + +successor +predecessor + + +12 + + +1 +2 +65747 + + +2 +7 +2150 + + + + + + + + +py_exception_successors +43951 + + +predecessor +39261 + + +successor +6911 + + + + +predecessor +successor + + +12 + + +1 +2 +35379 + + +2 +3 +3448 + + +3 +7 +433 + + + + + + +successor +predecessor + + +12 + + +1 +2 +1045 + + +2 +3 +1497 + + +3 +4 +1271 + + +4 +5 +760 + + +5 +6 +463 + + +6 +8 +519 + + +8 +12 +525 + + +12 +27 +534 + + +27 +173 +294 + + + + + + + + +py_false_successors +69439 + + +predecessor +69439 + + +successor +59260 + + + + +predecessor +successor + + +12 + + +1 +2 +69439 + + + + + + +successor +predecessor + + +12 + + +1 +2 +51296 + + +2 +3 +6510 + + +3 +13 +1452 + + + + + + + + +py_flow_bb_node +2323431 + + +flownode +2323431 + + +realnode +2208164 + + +basicblock +215280 + + +index +23948 + + + + +flownode +realnode + + +12 + + +1 +2 +2323431 + + + + + + +flownode +basicblock + + +12 + + +1 +2 +2323431 + + + + + + +flownode +index + + +12 + + +1 +2 +2323431 + + + + + + +realnode +flownode + + +12 + + +1 +2 +2102771 + + +2 +9 +105392 + + + + + + +realnode +basicblock + + +12 + + +1 +2 +2135213 + + +2 +7 +72950 + + + + + + +realnode +index + + +12 + + +1 +2 +2155174 + + +2 +5 +52989 + + + + + + +basicblock +flownode + + +12 + + +1 +2 +37515 + + +2 +3 +17987 + + +3 +4 +19072 + + +4 +5 +17365 + + +5 +6 +17931 + + +6 +7 +13664 + + +7 +8 +10900 + + +8 +10 +16975 + + +10 +13 +17232 + + +13 +19 +17763 + + +19 +26 +16605 + + +26 +17296 +12265 + + + + + + +basicblock +realnode + + +12 + + +1 +2 +37832 + + +2 +3 +17905 + + +3 +4 +19216 + + +4 +5 +18823 + + +5 +6 +16929 + + +6 +7 +13644 + + +7 +8 +11703 + + +8 +10 +16817 + + +10 +13 +16741 + + +13 +19 +17322 + + +19 +26 +16368 + + +26 +17295 +11973 + + + + + + +basicblock +index + + +12 + + +1 +2 +37515 + + +2 +3 +17987 + + +3 +4 +19072 + + +4 +5 +17365 + + +5 +6 +17931 + + +6 +7 +13664 + + +7 +8 +10900 + + +8 +10 +16975 + + +10 +13 +17232 + + +13 +19 +17763 + + +19 +26 +16605 + + +26 +17296 +12265 + + + + + + +index +flownode + + +12 + + +1 +2 +4957 + + +2 +3 +4220 + + +3 +4 +1805 + + +4 +6 +1253 + + +6 +8 +1750 + + +8 +9 +2240 + + +9 +10 +2678 + + +10 +19 +1819 + + +19 +60 +1815 + + +60 +155471 +1408 + + + + + + +index +realnode + + +12 + + +1 +2 +4957 + + +2 +3 +4220 + + +3 +4 +1805 + + +4 +6 +1253 + + +6 +8 +1750 + + +8 +9 +2240 + + +9 +10 +2678 + + +10 +19 +1819 + + +19 +60 +1815 + + +60 +141411 +1408 + + + + + + +index +basicblock + + +12 + + +1 +2 +4957 + + +2 +3 +4220 + + +3 +4 +1805 + + +4 +6 +1253 + + +6 +8 +1750 + + +8 +9 +2240 + + +9 +10 +2678 + + +10 +19 +1819 + + +19 +60 +1815 + + +60 +155471 +1408 + + + + + + + + +py_scope_flow +405895 + + +flow +405895 + + +scope +56616 + + +kind +4 + + + + +flow +scope + + +12 + + +1 +2 +405895 + + + + + + +flow +kind + + +12 + + +1 +2 +405895 + + + + + + +scope +flow + + +12 + + +2 +3 +15663 + + +3 +4 +8677 + + +4 +5 +7135 + + +5 +6 +4823 + + +6 +7 +3426 + + +7 +9 +4807 + + +9 +13 +5102 + + +13 +23 +4277 + + +23 +767 +2706 + + + + + + +scope +kind + + +12 + + +2 +3 +16115 + + +3 +4 +39685 + + +4 +5 +816 + + + + + + +kind +flow + + +12 + + +18869 +18870 +1 + + +37919 +37920 +1 + + +56616 +56617 +1 + + +292491 +292492 +1 + + + + + + +kind +scope + + +12 + + +18869 +18870 +1 + + +37919 +37920 +1 + + +41145 +41146 +1 + + +56616 +56617 +1 + + + + + + + + +py_idoms +2275369 + + +node +2275369 + + +immediate_dominator +2207166 + + + + +node +immediate_dominator + + +12 + + +1 +2 +2275369 + + + + + + +immediate_dominator +node + + +12 + + +1 +2 +2153132 + + +2 +11 +54033 + + + + + + + + +py_ssa_phi +46687 + + +phi +21496 + + +arg +44830 + + + + +phi +arg + + +12 + + +1 +2 +1782 + + +2 +3 +16149 + + +3 +4 +2560 + + +4 +23 +1003 + + + + + + +arg +phi + + +12 + + +1 +2 +43208 + + +2 +8 +1621 + + + + + + + + +py_ssa_var +272292 + + +id +272292 + + +var +217265 + + + + +id +var + + +12 + + +1 +2 +272292 + + + + + + +var +id + + +12 + + +1 +2 +194518 + + +2 +4 +16728 + + +4 +35 +6017 + + + + + + + + +py_ssa_use +487906 + + +node +421169 + + +var +239604 + + + + +node +var + + +12 + + +1 +2 +416004 + + +2 +185 +5165 + + + + + + +var +node + + +12 + + +1 +2 +151110 + + +2 +3 +42380 + + +3 +4 +18095 + + +4 +7 +18656 + + +7 +203 +9362 + + + + + + + + +py_ssa_defn +267795 + + +id +267795 + + +node +261828 + + + + +id +node + + +12 + + +1 +2 +267795 + + + + + + +node +id + + +12 + + +1 +2 +258774 + + +2 +81 +3053 + + + + + + + + +py_scopes +2056674 + + +node +2056674 + + +scope +51911 + + + + +node +scope + + +12 + + +1 +2 +2056674 + + + + + + +scope +node + + +12 + + +1 +5 +3923 + + +5 +7 +3611 + + +7 +9 +3715 + + +9 +11 +3941 + + +11 +14 +4776 + + +14 +17 +3965 + + +17 +22 +4491 + + +22 +28 +4078 + + +28 +37 +4161 + + +37 +50 +3938 + + +50 +72 +3914 + + +72 +118 +3953 + + +118 +5003 +3439 + + + + + + + + +py_scope_location +56618 + + +id +56618 + + +scope +56618 + + + + +id +scope + + +12 + + +1 +2 +56618 + + + + + + +scope +id + + +12 + + +1 +2 +56618 + + + + + + + + +py_flags_versioned +136 + + +name +136 + + +value +83 + + +version +2 + + + + +name +value + + +12 + + +1 +2 +136 + + + + + + +value +name + + +12 + + +1 +2 +68 + + +2 +3 +11 + + +15 +16 +2 + + + + + + + + +py_syntax_error_versioned +30 + + +id +30 + + +message +4 + + +version +2 + + + + +id +message + + +12 + + +1 +2 +30 + + + + + + +message +id + + +12 + + +1 +2 +1 + + +4 +5 +1 + + +17 +18 +1 + + + + + + + + +py_comments +77830 + + +id +77830 + + +text +61555 + + +location +77830 + + + + +id +text + + +12 + + +1 +2 +77830 + + + + + + +id +location + + +12 + + +1 +2 +77830 + + + + + + +text +id + + +12 + + +1 +2 +56275 + + +2 +5 +4845 + + +5 +942 +434 + + + + + + +text +location + + +12 + + +1 +2 +56275 + + +2 +5 +4845 + + +5 +942 +434 + + + + + + +location +id + + +12 + + +1 +2 +77830 + + + + + + +location +text + + +12 + + +1 +2 +77830 + + + + + + + + +py_cobjects +112856 + + +obj +112856 + + + + + +py_cobjecttypes +111600 + + +obj +111600 + + +typeof +65 + + + + +obj +typeof + + +12 + + +1 +2 +111600 + + + + + + +typeof +obj + + +12 + + +1 +2 +27 + + +2 +3 +4 + + +3 +5 +5 + + +6 +19 +5 + + +19 +54 +5 + + +58 +295 +5 + + +325 +857 +5 + + +923 +73625 +5 + + + + + + + + +py_cobjectnames +111600 + + +obj +111600 + + +name +106332 + + + + +obj +name + + +12 + + +1 +2 +111600 + + + + + + +name +obj + + +12 + + +1 +2 +105898 + + +2 +413 +434 + + + + + + + + +py_cobject_sources +114955 + + +obj +112856 + + +kind +2 + + + + +obj +kind + + +12 + + +1 +2 +110757 + + +2 +3 +2099 + + + + + + +kind +obj + + +12 + + +2423 +2424 +1 + + +80595 +80596 +1 + + + + + + + + +py_cmembers_versioned +21362 + + +object +1681 + + +name +8322 + + +member +15501 + + +version +2 + + + + +object +name + + +12 + + +3 +4 +59 + + +4 +5 +448 + + +5 +8 +118 + + +8 +9 +582 + + +9 +12 +154 + + +12 +20 +133 + + +20 +50 +127 + + +58 +312 +56 + + + + + + +object +member + + +12 + + +3 +4 +59 + + +4 +5 +448 + + +5 +8 +118 + + +8 +9 +591 + + +9 +12 +154 + + +12 +20 +133 + + +21 +59 +127 + + +60 +206 +47 + + + + + + +name +object + + +12 + + +1 +2 +7390 + + +2 +6 +656 + + +6 +567 +276 + + + + + + +name +member + + +12 + + +1 +2 +7407 + + +2 +6 +647 + + +6 +280 +267 + + + + + + +member +object + + +12 + + +1 +2 +14765 + + +2 +249 +736 + + + + + + +member +name + + +12 + + +1 +2 +14803 + + +2 +84 +698 + + + + + + + + +py_citems +3959 + + +object +213 + + +index +593 + + +member +1906 + + + + +object +index + + +12 + + +1 +2 +41 + + +2 +3 +37 + + +3 +4 +37 + + +4 +5 +7 + + +5 +6 +29 + + +6 +12 +16 + + +12 +22 +16 + + +24 +42 +16 + + +42 +594 +14 + + + + + + +object +member + + +12 + + +1 +2 +41 + + +2 +3 +40 + + +3 +4 +34 + + +4 +5 +20 + + +5 +6 +16 + + +6 +12 +16 + + +12 +22 +16 + + +24 +42 +16 + + +42 +546 +14 + + + + + + +index +object + + +12 + + +1 +2 +186 + + +2 +3 +62 + + +3 +4 +89 + + +4 +6 +44 + + +6 +8 +41 + + +8 +9 +83 + + +9 +14 +46 + + +14 +214 +42 + + + + + + +index +member + + +12 + + +1 +2 +186 + + +2 +3 +62 + + +3 +4 +89 + + +4 +6 +44 + + +6 +8 +41 + + +8 +9 +83 + + +9 +14 +46 + + +14 +158 +42 + + + + + + +member +object + + +12 + + +1 +2 +1112 + + +2 +3 +215 + + +3 +4 +303 + + +4 +5 +101 + + +5 +7 +166 + + +7 +21 +9 + + + + + + +member +index + + +12 + + +1 +2 +1139 + + +2 +3 +212 + + +3 +4 +298 + + +4 +5 +92 + + +5 +9 +165 + + + + + + + + +ext_argtype +6320 + + +funcid +4069 + + +arg +50 + + +typeid +466 + + + + +funcid +arg + + +12 + + +1 +2 +2726 + + +2 +3 +932 + + +3 +4 +329 + + +4 +18 +80 + + + + + + +funcid +typeid + + +12 + + +1 +2 +2694 + + +2 +3 +1149 + + +3 +6 +225 + + + + + + +arg +funcid + + +12 + + +1 +2 +23 + + +2 +3 +5 + + +3 +4 +2 + + +7 +8 +2 + + +10 +11 +2 + + +31 +32 +2 + + +141 +142 +2 + + +449 +450 +2 + + +1365 +1366 +2 + + + + + + +arg +typeid + + +12 + + +1 +2 +26 + + +2 +3 +8 + + +3 +4 +2 + + +4 +5 +2 + + +8 +9 +2 + + +12 +13 +2 + + +157 +158 +2 + + + + + + +typeid +funcid + + +12 + + +1 +2 +68 + + +2 +3 +86 + + +3 +4 +68 + + +4 +5 +38 + + +5 +6 +26 + + +6 +8 +29 + + +8 +10 +35 + + +10 +16 +41 + + +16 +22 +35 + + +24 +505 +35 + + + + + + +typeid +arg + + +12 + + +1 +2 +424 + + +2 +5 +35 + + +9 +17 +5 + + + + + + + + +ext_rettype +4719 + + +funcid +4321 + + +typeid +154 + + + + +funcid +typeid + + +12 + + +1 +2 +4042 + + +2 +11 +279 + + + + + + +typeid +funcid + + +12 + + +1 +2 +59 + + +2 +3 +14 + + +3 +4 +23 + + +4 +6 +8 + + +8 +14 +11 + + +22 +40 +11 + + +43 +115 +11 + + +116 +454 +11 + + + + + + + + +ext_proptype +398 + + +propid +386 + + +typeid +32 + + + + +propid +typeid + + +12 + + +1 +2 +374 + + +2 +3 +11 + + + + + + +typeid +propid + + +12 + + +1 +2 +11 + + +2 +3 +2 + + +7 +8 +5 + + +8 +9 +2 + + +19 +20 +2 + + +35 +36 +2 + + +52 +53 +2 + + + + + + + + +ext_argreturn +26 + + +funcid +26 + + +arg +5 + + + + +funcid +arg + + +12 + + +1 +2 +26 + + + + + + +arg +funcid + + +12 + + +2 +3 +2 + + +7 +8 +2 + + + + + + + + +py_special_objects +40 + + +obj +40 + + +name +40 + + + + +obj +name + + +12 + + +1 +2 +40 + + + + + + +name +obj + + +12 + + +1 +2 +40 + + + + + + + + +py_decorated_object +100 + + +object +100 + + +level +100 + + + + +object +level + + +12 + + + + + +level +object + + +12 + + + + + + + +xmlEncoding +100 + + +id +100 + + +encoding +100 + + + + +id +encoding + + +12 + + +1 +2 +2 + + + + + + +encoding +id + + +12 + + + + + + + +xmlDTDs +100 + + +id +100 + + +root +100 + + +publicId +100 + + +systemId +100 + + +fileid +100 + + + + +id +root + + +12 + + +1 +2 +2 + + + + + + +id +publicId + + +12 + + +1 +2 +2 + + + + + + +id +systemId + + +12 + + +1 +2 +2 + + + + + + +id +fileid + + +12 + + +1 +2 +2 + + + + + + +root +id + + +12 + + + + + +root +publicId + + +12 + + + + + +root +systemId + + +12 + + + + + +root +fileid + + +12 + + + + + +publicId +id + + +12 + + + + + +publicId +root + + +12 + + + + + +publicId +systemId + + +12 + + + + + +publicId +fileid + + +12 + + + + + +systemId +id + + +12 + + + + + +systemId +root + + +12 + + + + + +systemId +publicId + + +12 + + + + + +systemId +fileid + + +12 + + + + + +fileid +id + + +12 + + + + + +fileid +root + + +12 + + + + + +fileid +publicId + + +12 + + + + + +fileid +systemId + + +12 + + + + + + + +xmlElements +100 + + +id +100 + + +name +100 + + +parentid +100 + + +idx +100 + + +fileid +100 + + + + +id +name + + +12 + + +1 +2 +2 + + + + + + +id +parentid + + +12 + + +1 +2 +2 + + + + + + +id +idx + + +12 + + +1 +2 +2 + + + + + + +id +fileid + + +12 + + +1 +2 +2 + + + + + + +name +id + + +12 + + + + + +name +parentid + + +12 + + + + + +name +idx + + +12 + + + + + +name +fileid + + +12 + + + + + +parentid +id + + +12 + + + + + +parentid +name + + +12 + + + + + +parentid +idx + + +12 + + + + + +parentid +fileid + + +12 + + + + + +idx +id + + +12 + + + + + +idx +name + + +12 + + + + + +idx +parentid + + +12 + + + + + +idx +fileid + + +12 + + + + + +fileid +id + + +12 + + + + + +fileid +name + + +12 + + + + + +fileid +parentid + + +12 + + + + + +fileid +idx + + +12 + + + + + + + +xmlAttrs +100 + + +id +100 + + +elementid +100 + + +name +100 + + +value +100 + + +idx +100 + + +fileid +100 + + + + +id +elementid + + +12 + + +1 +2 +1 + + + + + + +id +name + + +12 + + +1 +2 +1 + + + + + + +id +value + + +12 + + +1 +2 +1 + + + + + + +id +idx + + +12 + + +1 +2 +1 + + + + + + +id +fileid + + +12 + + +1 +2 +1 + + + + + + +elementid +id + + +12 + + + + + +elementid +name + + +12 + + + + + +elementid +value + + +12 + + + + + +elementid +idx + + +12 + + + + + +elementid +fileid + + +12 + + + + + +name +id + + +12 + + + + + +name +elementid + + +12 + + + + + +name +value + + +12 + + + + + +name +idx + + +12 + + + + + +name +fileid + + +12 + + + + + +value +id + + +12 + + + + + +value +elementid + + +12 + + + + + +value +name + + +12 + + + + + +value +idx + + +12 + + + + + +value +fileid + + +12 + + + + + +idx +id + + +12 + + + + + +idx +elementid + + +12 + + + + + +idx +name + + +12 + + + + + +idx +value + + +12 + + + + + +idx +fileid + + +12 + + + + + +fileid +id + + +12 + + + + + +fileid +elementid + + +12 + + + + + +fileid +name + + +12 + + + + + +fileid +value + + +12 + + + + + +fileid +idx + + +12 + + + + + + + +xmlNs +100 + + +id +100 + + +prefixName +100 + + +URI +100 + + +fileid +100 + + + + +id +prefixName + + +12 + + + + + +id +URI + + +12 + + + + + +id +fileid + + +12 + + + + + +prefixName +id + + +12 + + + + + +prefixName +URI + + +12 + + + + + +prefixName +fileid + + +12 + + + + + +URI +id + + +12 + + + + + +URI +prefixName + + +12 + + + + + +URI +fileid + + +12 + + + + + +fileid +id + + +12 + + + + + +fileid +prefixName + + +12 + + + + + +fileid +URI + + +12 + + + + + + + +xmlHasNs +100 + + +elementId +100 + + +nsId +100 + + +fileid +100 + + + + +elementId +nsId + + +12 + + + + + +elementId +fileid + + +12 + + + + + +nsId +elementId + + +12 + + + + + +nsId +fileid + + +12 + + + + + +fileid +elementId + + +12 + + + + + +fileid +nsId + + +12 + + + + + + + +xmlComments +100 + + +id +100 + + +text +100 + + +parentid +100 + + +fileid +100 + + + + +id +text + + +12 + + +1 +2 +2 + + + + + + +id +parentid + + +12 + + +1 +2 +2 + + + + + + +id +fileid + + +12 + + +1 +2 +2 + + + + + + +text +id + + +12 + + + + + +text +parentid + + +12 + + + + + +text +fileid + + +12 + + + + + +parentid +id + + +12 + + + + + +parentid +text + + +12 + + + + + +parentid +fileid + + +12 + + + + + +fileid +id + + +12 + + + + + +fileid +text + + +12 + + + + + +fileid +parentid + + +12 + + + + + + + +xmlChars +100 + + +id +100 + + +text +100 + + +parentid +100 + + +idx +100 + + +isCDATA +100 + + +fileid +100 + + + + +id +text + + +12 + + +1 +2 +1 + + + + + + +id +parentid + + +12 + + +1 +2 +1 + + + + + + +id +idx + + +12 + + +1 +2 +1 + + + + + + +id +isCDATA + + +12 + + +1 +2 +1 + + + + + + +id +fileid + + +12 + + +1 +2 +1 + + + + + + +text +id + + +12 + + + + + +text +parentid + + +12 + + + + + +text +idx + + +12 + + + + + +text +isCDATA + + +12 + + + + + +text +fileid + + +12 + + + + + +parentid +id + + +12 + + + + + +parentid +text + + +12 + + + + + +parentid +idx + + +12 + + + + + +parentid +isCDATA + + +12 + + + + + +parentid +fileid + + +12 + + + + + +idx +id + + +12 + + + + + +idx +text + + +12 + + + + + +idx +parentid + + +12 + + + + + +idx +isCDATA + + +12 + + + + + +idx +fileid + + +12 + + + + + +isCDATA +id + + +12 + + + + + +isCDATA +text + + +12 + + + + + +isCDATA +parentid + + +12 + + + + + +isCDATA +idx + + +12 + + + + + +isCDATA +fileid + + +12 + + + + + +fileid +id + + +12 + + + + + +fileid +text + + +12 + + + + + +fileid +parentid + + +12 + + + + + +fileid +idx + + +12 + + + + + +fileid +isCDATA + + +12 + + + + + + + +xmllocations +100 + + +xmlElement +100 + + +location +100 + + + + +xmlElement +location + + +12 + + + + + +location +xmlElement + + +12 + + + + + + + + diff --git a/python/ql/src/site.qll b/python/ql/src/site.qll new file mode 100644 index 00000000000..3a301497828 --- /dev/null +++ b/python/ql/src/site.qll @@ -0,0 +1,7 @@ +/** Site library + * + * Include predicates and classes here, if they are required to customize all analysis. + * + */ + +