diff --git a/python/ql/src/semmle/python/objects/Callables.qll b/python/ql/src/semmle/python/objects/Callables.qll index aac3dfcdc5c..6eefaf7a762 100644 --- a/python/ql/src/semmle/python/objects/Callables.qll +++ b/python/ql/src/semmle/python/objects/Callables.qll @@ -156,6 +156,8 @@ class PythonFunctionObjectInternal extends CallableObjectInternal, TPythonFuncti function = this and offset = 0 } + override predicate useOriginAsLegacyObject() { none() } + } @@ -277,6 +279,8 @@ class BuiltinFunctionObjectInternal extends CallableObjectInternal, TBuiltinFunc function = this and offset = 0 } + override predicate useOriginAsLegacyObject() { none() } + } /** Class representing methods of built-in classes (otherwise known as method-descriptors) such as `list.append`. @@ -367,6 +371,8 @@ class BuiltinMethodObjectInternal extends CallableObjectInternal, TBuiltinMethod function = this and offset = 0 } + override predicate useOriginAsLegacyObject() { none() } + } /** Class representing bound-methods. @@ -453,6 +459,8 @@ class BoundMethodObjectInternal extends CallableObjectInternal, TBoundMethod { function = this.getFunction() and offset = 1 } + override predicate useOriginAsLegacyObject() { any() } + } diff --git a/python/ql/src/semmle/python/objects/Classes.qll b/python/ql/src/semmle/python/objects/Classes.qll index d852e1ce86f..a1b278d37a2 100644 --- a/python/ql/src/semmle/python/objects/Classes.qll +++ b/python/ql/src/semmle/python/objects/Classes.qll @@ -89,6 +89,9 @@ abstract class ClassObjectInternal extends ObjectInternal { } override predicate subscriptUnknown() { none() } + + override predicate useOriginAsLegacyObject() { none() } + } /** Class representing Python source classes */ diff --git a/python/ql/src/semmle/python/objects/Constants.qll b/python/ql/src/semmle/python/objects/Constants.qll index eeddfbe559c..39eceb6fee7 100644 --- a/python/ql/src/semmle/python/objects/Constants.qll +++ b/python/ql/src/semmle/python/objects/Constants.qll @@ -69,6 +69,8 @@ abstract class ConstantObjectInternal extends ObjectInternal { override string getName() { none() } + override predicate useOriginAsLegacyObject() { none() } + } private abstract class BooleanObjectInternal extends ConstantObjectInternal { diff --git a/python/ql/src/semmle/python/objects/Descriptors.qll b/python/ql/src/semmle/python/objects/Descriptors.qll index 3dd31ce7e23..9ed37ab2e05 100644 --- a/python/ql/src/semmle/python/objects/Descriptors.qll +++ b/python/ql/src/semmle/python/objects/Descriptors.qll @@ -91,6 +91,7 @@ class PropertyInternal extends ObjectInternal, TProperty { ) } + override predicate useOriginAsLegacyObject() { none() } } /** A class representing classmethods in Python */ @@ -176,6 +177,8 @@ class ClassMethodObjectInternal extends ObjectInternal, TClassMethod { result = this.getFunction().getName() } + override predicate useOriginAsLegacyObject() { none() } + } class StaticMethodObjectInternal extends ObjectInternal, TStaticMethod { @@ -247,4 +250,6 @@ class StaticMethodObjectInternal extends ObjectInternal, TStaticMethod { result = this.getFunction().getName() } + override predicate useOriginAsLegacyObject() { none() } + } diff --git a/python/ql/src/semmle/python/objects/Instances.qll b/python/ql/src/semmle/python/objects/Instances.qll index b2dc7eb07d0..7109bffc7c5 100644 --- a/python/ql/src/semmle/python/objects/Instances.qll +++ b/python/ql/src/semmle/python/objects/Instances.qll @@ -160,6 +160,8 @@ class SpecificInstanceInternal extends TSpecificInstance, InstanceObject { ) } + override predicate useOriginAsLegacyObject() { none() } + } /** A class representing context-free instances represented by `self` in the source code @@ -262,6 +264,8 @@ class SelfInstanceInternal extends TSelfInstance, InstanceObject { this.getClass().attribute("__init__", init, _) } + override predicate useOriginAsLegacyObject() { none() } + } /** A class representing a value that has a known class, but no other information */ @@ -366,6 +370,8 @@ class UnknownInstanceInternal extends TUnknownInstance, ObjectInternal { override string getName() { none() } + override predicate useOriginAsLegacyObject() { any() } + } private int lengthFromClass(ClassObjectInternal cls) { @@ -472,5 +478,7 @@ class SuperInstance extends TSuperInstance, ObjectInternal { override string getName() { none() } + override predicate useOriginAsLegacyObject() { any() } + } diff --git a/python/ql/src/semmle/python/objects/Modules.qll b/python/ql/src/semmle/python/objects/Modules.qll index 5edc710c2b3..b0f5cb78661 100644 --- a/python/ql/src/semmle/python/objects/Modules.qll +++ b/python/ql/src/semmle/python/objects/Modules.qll @@ -52,6 +52,8 @@ abstract class ModuleObjectInternal extends ObjectInternal { any(PackageObjectInternal package).getInitModule() = this } + override predicate useOriginAsLegacyObject() { none() } + } /** A class representing built-in modules */ @@ -308,10 +310,6 @@ class AbsentModuleObjectInternal extends ModuleObjectInternal, TAbsentModule { none() } - override predicate isMissing() { - any() - } - } /** A class representing an attribute of a missing module. */ @@ -397,12 +395,10 @@ class AbsentModuleAttributeObjectInternal extends ObjectInternal, TAbsentModuleA override predicate subscriptUnknown() { any() } - override predicate isMissing() { - any() - } - /* We know what this is called, but not its innate name */ override string getName() { none() } + override predicate useOriginAsLegacyObject() { none() } + } diff --git a/python/ql/src/semmle/python/objects/ObjectAPI.qll b/python/ql/src/semmle/python/objects/ObjectAPI.qll index 477aa1aac68..36cf48afdd7 100644 --- a/python/ql/src/semmle/python/objects/ObjectAPI.qll +++ b/python/ql/src/semmle/python/objects/ObjectAPI.qll @@ -78,14 +78,6 @@ class Value extends TObject { this.(ObjectInternal).isBuiltin() } - /** Holds if this value represents an entity that is inferred to exist, - * but missing from the database. - * Most commonly, this is a module that is imported, but wasn't present during extraction. - */ - predicate isMissing() { - this.(ObjectInternal).isMissing() - } - predicate hasLocationInfo(string filepath, int bl, int bc, int el, int ec) { this.(ObjectInternal).getOrigin().getLocation().hasLocationInfo(filepath, bl, bc, el, ec) or diff --git a/python/ql/src/semmle/python/objects/ObjectInternal.qll b/python/ql/src/semmle/python/objects/ObjectInternal.qll index b5cf48eab04..ee62308abe2 100644 --- a/python/ql/src/semmle/python/objects/ObjectInternal.qll +++ b/python/ql/src/semmle/python/objects/ObjectInternal.qll @@ -155,11 +155,12 @@ class ObjectInternal extends TObject { */ predicate functionAndOffset(CallableObjectInternal function, int offset) { none() } - /** Holds if this 'object' represents an entity that is inferred to exist - * but is missing from the database */ - predicate isMissing() { - none() - } + /** Holds if this 'object' represents an entity that should be exposed to the legacy points_to API + * This should hold for almost all objects that do not have an underlying DB object representing their source, + * for example `super` objects and bound-method. This should not hold for objects that are inferred to exists by + * an import statements or the like, but which aren't in the database. */ + /* This predicate can be removed when the legacy points_to API is removed. */ + abstract predicate useOriginAsLegacyObject(); /** Gets the name of this of this object if it has a meaningful name. * Note that the name of an object is not necessarily the name by which it is called @@ -249,6 +250,9 @@ class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject { override string getName() { result = this.getBuiltin().getName() } + + override predicate useOriginAsLegacyObject() { none() } + } @@ -326,6 +330,8 @@ class UnknownInternal extends ObjectInternal, TUnknown { override string getName() { none() } + override predicate useOriginAsLegacyObject() { none() } + } class UndefinedInternal extends ObjectInternal, TUndefined { @@ -404,6 +410,8 @@ class UndefinedInternal extends ObjectInternal, TUndefined { override string getName() { none() } + override predicate useOriginAsLegacyObject() { none() } + } module ObjectInternal { @@ -498,6 +506,7 @@ module ObjectInternal { result.(BuiltinTupleObjectInternal).length() = 0 } + } /** Helper for boolean predicates returning both `true` and `false` */ diff --git a/python/ql/src/semmle/python/objects/Sequences.qll b/python/ql/src/semmle/python/objects/Sequences.qll index 7c72c7ade2f..4b062c46dfe 100644 --- a/python/ql/src/semmle/python/objects/Sequences.qll +++ b/python/ql/src/semmle/python/objects/Sequences.qll @@ -117,6 +117,9 @@ class BuiltinTupleObjectInternal extends TBuiltinTuple, TupleObjectInternal { result = count(int n | exists(b.getItem(n))) ) } + + override predicate useOriginAsLegacyObject() { none() } + } /** A tuple declared by a tuple expression in the Python source code */ @@ -148,6 +151,8 @@ class PythonTupleObjectInternal extends TPythonTuple, TupleObjectInternal { ) } + override predicate useOriginAsLegacyObject() { none() } + } /** A tuple created by a `*` parameter */ @@ -176,6 +181,9 @@ class VarargsTupleObjectInternal extends TVarargsTuple, TupleObjectInternal { override int length() { this = TVarargsTuple(_, _, _, result) } + + override predicate useOriginAsLegacyObject() { any() } + } @@ -256,4 +264,6 @@ class SysVersionInfoObjectInternal extends TSysVersionInfo, SequenceObjectIntern override predicate functionAndOffset(CallableObjectInternal function, int offset) { none() } + override predicate useOriginAsLegacyObject() { any() } + } diff --git a/python/ql/src/semmle/python/pointsto/PointsTo.qll b/python/ql/src/semmle/python/pointsto/PointsTo.qll index f750d31ad3f..786aae79916 100644 --- a/python/ql/src/semmle/python/pointsto/PointsTo.qll +++ b/python/ql/src/semmle/python/pointsto/PointsTo.qll @@ -130,7 +130,7 @@ module PointsTo { PointsToInternal::pointsTo(f, context, value, origin) and cls = value.getClass().getSource() | obj = value.getSource() or - not exists(value.getSource()) and not value.isMissing() and obj = origin + value.useOriginAsLegacyObject() and obj = origin ) or /* Backwards compatibility for *args and **kwargs */ @@ -145,7 +145,7 @@ module PointsTo { PointsToInternal::pointsTo(f.(DefinitionNode).getValue(), context, value, origin) and cls = value.getClass().getSource() | obj = value.getSource() or - not exists(value.getSource()) and obj = origin + value.useOriginAsLegacyObject() and obj = origin ) } diff --git a/python/ql/src/semmle/python/regex.qll b/python/ql/src/semmle/python/regex.qll index 652c89147a2..04ac4b407a8 100644 --- a/python/ql/src/semmle/python/regex.qll +++ b/python/ql/src/semmle/python/regex.qll @@ -1,4 +1,5 @@ import python +import semmle.python.objects.ObjectInternal private predicate re_module_function(string name, int flags) { name = "compile" and flags = 1 or @@ -14,44 +15,42 @@ private predicate re_module_function(string name, int flags) { predicate used_as_regex(Expr s, string mode) { (s instanceof Bytes or s instanceof Unicode) and - exists(ModuleObject re | re.getName() = "re" | + exists(ModuleValue re | re.getName() = "re" | /* Call to re.xxx(regex, ... [mode]) */ exists(CallNode call, string name | call.getArg(0).refersTo(_, _, s.getAFlowNode()) and - call.getFunction().refersTo(re.attr(name)) | + call.getFunction().pointsTo(re.attr(name)) | mode = "None" or - exists(Object obj | + exists(Value obj | mode = mode_from_mode_object(obj) | exists(int flags_arg | re_module_function(name, flags_arg) and - call.getArg(flags_arg).refersTo(obj) + call.getArg(flags_arg).pointsTo(obj) ) or - call.getArgByName("flags").refersTo(obj) + call.getArgByName("flags").pointsTo(obj) ) ) ) } -string mode_from_mode_object(Object obj) { +string mode_from_mode_object(Value obj) { ( result = "DEBUG" or result = "IGNORECASE" or result = "LOCALE" or result = "MULTILINE" or result = "DOTALL" or result = "UNICODE" or result = "VERBOSE" ) and - obj = ModuleObject::named("sre_constants").attr("SRE_FLAG_" + result) - or - exists(BinaryExpr be, Object sub | obj.getOrigin() = be | - be.getOp() instanceof BitOr and - be.getASubExpression().refersTo(sub) and - result = mode_from_mode_object(sub) + exists(int flag | + flag = Value::named("sre_constants.SRE_FLAG_" + result).(ObjectInternal).intValue() + and + obj.(ObjectInternal).intValue().bitAnd(flag) = flag ) } /** A StrConst used as a regular expression */ abstract class RegexString extends Expr { - + RegexString() { (this instanceof Bytes or this instanceof Unicode) } diff --git a/python/ql/src/semmle/python/types/Extensions.qll b/python/ql/src/semmle/python/types/Extensions.qll index b4e01e47285..a97c620951c 100644 --- a/python/ql/src/semmle/python/types/Extensions.qll +++ b/python/ql/src/semmle/python/types/Extensions.qll @@ -13,9 +13,11 @@ import python private import semmle.python.pointsto.PointsTo private import semmle.python.pointsto.PointsToContext private import semmle.python.objects.TObject -private import semmle.python.objects.ObjectInternal private import semmle.python.web.HttpConstants +/* Make ObjectInternal visible to save extra imports in user code */ +import semmle.python.objects.ObjectInternal + abstract class PointsToExtension extends @py_flow_node { string toString() { none() } diff --git a/python/ql/test/3/library-tests/PointsTo/attributes/Test.expected b/python/ql/test/3/library-tests/PointsTo/attributes/Test.expected index 7286fb20959..0adb7c86bef 100644 --- a/python/ql/test/3/library-tests/PointsTo/attributes/Test.expected +++ b/python/ql/test/3/library-tests/PointsTo/attributes/Test.expected @@ -1,4 +1,3 @@ -| 1 | ControlFlowNode for unicode_literals | ImportMember | 1 | | 2 | ControlFlowNode for C | class C | 2 | | 2 | ControlFlowNode for ClassExpr | class C | 2 | | 2 | ControlFlowNode for object | builtin-class object | 2 | diff --git a/python/ql/test/library-tests/PointsTo/extensions/Extend.expected b/python/ql/test/library-tests/PointsTo/extensions/Extend.expected index c728fb74233..791f854e105 100644 --- a/python/ql/test/library-tests/PointsTo/extensions/Extend.expected +++ b/python/ql/test/library-tests/PointsTo/extensions/Extend.expected @@ -1,6 +1,8 @@ -WARNING: Type CustomPointsToAttribute has been deprecated and may be removed in future (Extend.ql:26,35-58) -WARNING: Type CustomPointsToObjectFact has been deprecated and may be removed in future (Extend.ql:41,32-56) -WARNING: Type CustomPointsToOriginFact has been deprecated and may be removed in future (Extend.ql:8,28-52) +WARNING: Predicate points_to has been deprecated and may be removed in future (Extend.ql:58,9-28) +WARNING: Type CustomPointsToAttribute has been deprecated and may be removed in future (Extend.ql:27,35-58) +WARNING: Type CustomPointsToObjectFact has been deprecated and may be removed in future (Extend.ql:42,32-56) +WARNING: Type CustomPointsToOriginFact has been deprecated and may be removed in future (Extend.ql:9,28-52) +WARNING: Type CustomPointsToOriginFact has been deprecated and may be removed in future (Extend.ql:55,38-62) | test.py:4:1:4:3 | ControlFlowNode for one | int 1 | | test.py:5:1:5:3 | ControlFlowNode for two | int 2 | | test.py:8:1:8:1 | ControlFlowNode for IntegerLiteral | int 1 | diff --git a/python/ql/test/library-tests/PointsTo/extensions/Extend.ql b/python/ql/test/library-tests/PointsTo/extensions/Extend.ql index b947bd0a900..af4a696ea4f 100644 --- a/python/ql/test/library-tests/PointsTo/extensions/Extend.ql +++ b/python/ql/test/library-tests/PointsTo/extensions/Extend.ql @@ -2,6 +2,7 @@ import python +import semmle.python.pointsto.PointsTo private import semmle.python.types.Extensions @@ -50,6 +51,18 @@ class NoClassExtension extends CustomPointsToObjectFact { } +/* Check that we can use old API without causing non-monotonic recursion */ +class RecurseIntoOldPointsTo extends CustomPointsToOriginFact { + + RecurseIntoOldPointsTo() { + PointsTo::points_to(this, _, unknownValue(), _, _) + } + + override predicate pointsTo(Object value, ClassObject cls) { + value = unknownValue() and cls = theUnknownType() + } +} + from ControlFlowNode f, Object o where f.getLocation().getFile().getBaseName() = "test.py" and f.refersTo(o) diff --git a/python/ql/test/library-tests/PointsTo/general/GlobalPointsTo.expected b/python/ql/test/library-tests/PointsTo/general/GlobalPointsTo.expected index 806114fee02..e394f72918f 100644 --- a/python/ql/test/library-tests/PointsTo/general/GlobalPointsTo.expected +++ b/python/ql/test/library-tests/PointsTo/general/GlobalPointsTo.expected @@ -98,7 +98,6 @@ | Module pointsto_test | 76 | ControlFlowNode for sys | Module sys | | Module pointsto_test | 76 | ControlFlowNode for type | builtin-class type | | Module pointsto_test | 76 | ControlFlowNode for type() | builtin-class module | -| Module pointsto_test | 77 | ControlFlowNode for unknown | ImportMember | | Module pointsto_test | 78 | ControlFlowNode for type | builtin-class type | | Module pointsto_test | 79 | ControlFlowNode for Dict | Dict | | Module pointsto_test | 79 | ControlFlowNode for Tuple | Tuple | diff --git a/python/ql/test/library-tests/PointsTo/general/LocalPointsTo.expected b/python/ql/test/library-tests/PointsTo/general/LocalPointsTo.expected index 787c36bded4..1960aa00095 100644 --- a/python/ql/test/library-tests/PointsTo/general/LocalPointsTo.expected +++ b/python/ql/test/library-tests/PointsTo/general/LocalPointsTo.expected @@ -106,7 +106,6 @@ | 76 | ControlFlowNode for sys | Module sys | | 76 | ControlFlowNode for type | builtin-class type | | 76 | ControlFlowNode for type() | builtin-class module | -| 77 | ControlFlowNode for unknown | ImportMember | | 78 | ControlFlowNode for type | builtin-class type | | 79 | ControlFlowNode for Dict | Dict | | 79 | ControlFlowNode for Tuple | Tuple | diff --git a/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected b/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected index 13d6eaa88f1..5b4b438bbab 100644 --- a/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected +++ b/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected @@ -1142,7 +1142,6 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P | t_type.py:7 | ControlFlowNode for sys | Module sys | builtin-class module | 1 | import | | t_type.py:7 | ControlFlowNode for type | builtin-class type | builtin-class type | 7 | import | | t_type.py:7 | ControlFlowNode for type() | builtin-class module | builtin-class type | 7 | import | -| t_type.py:8 | ControlFlowNode for unknown | ImportMember | *UNKNOWN TYPE* | 8 | import | | t_type.py:9 | ControlFlowNode for type | builtin-class type | builtin-class type | 9 | import | | t_type.py:9 | ControlFlowNode for type() | *UNKNOWN TYPE* | *UNKNOWN TYPE* | 9 | import | | t_type.py:10 | ControlFlowNode for Dict | Dict | builtin-class dict | 10 | import | diff --git a/python/ql/test/library-tests/regex/options b/python/ql/test/library-tests/regex/options index e7a518df45b..be048160aeb 100644 --- a/python/ql/test/library-tests/regex/options +++ b/python/ql/test/library-tests/regex/options @@ -1 +1,2 @@ semmle-extractor-options: --max-import-depth=3 +optimize: true