From 30b340de68eddc8be9939e8f7fca250d57ac8d68 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 3 Jun 2019 12:00:59 +0100 Subject: [PATCH 1/5] Python points-to: Handle varargs in callee. --- python/ql/src/semmle/python/Exprs.qll | 4 ++ .../src/semmle/python/objects/Sequences.qll | 32 ++++++++++++++- .../ql/src/semmle/python/objects/TObject.qll | 5 +++ .../src/semmle/python/pointsto/PointsTo.qll | 39 ++++++++++++++----- .../library-tests/PointsTo/new/Call.expected | 3 ++ .../PointsTo/new/NameSpace.expected | 3 ++ .../PointsTo/new/PointsToWithContext.expected | 37 ++++++++++++++++++ .../PointsTo/new/PointsToWithType.expected | 33 ++++++++++++++++ .../PointsTo/new/Values.expected | 32 +++++++++++++++ .../PointsTo/new/code/l_calls.py | 13 +++++++ 10 files changed, 191 insertions(+), 10 deletions(-) diff --git a/python/ql/src/semmle/python/Exprs.qll b/python/ql/src/semmle/python/Exprs.qll index 82c5bfd5219..b471afef95d 100644 --- a/python/ql/src/semmle/python/Exprs.qll +++ b/python/ql/src/semmle/python/Exprs.qll @@ -227,6 +227,10 @@ class Call extends Call_ { result = this.getKwargs().(Dict).getAKey().(StrConst).getText() } + int getPositionalArgumentCount() { + count(this.getStarargs()) < 2 and + result = count(this.getAPositionalArg()) + } } /** A conditional expression such as, `body if test else orelse` */ diff --git a/python/ql/src/semmle/python/objects/Sequences.qll b/python/ql/src/semmle/python/objects/Sequences.qll index 17ec3ec6db6..e274f6f40a2 100644 --- a/python/ql/src/semmle/python/objects/Sequences.qll +++ b/python/ql/src/semmle/python/objects/Sequences.qll @@ -39,7 +39,9 @@ abstract class TupleObjectInternal extends SequenceObjectInternal { } private string contents(int n) { - n = this.length() and result = "" + n < 4 and n = this.length() and result = "" + or + n = 4 and n < this.length() and result = "... " + (this.length()-4).toString() + " more" or result = this.getItem(n).toString() + ", " + this.contents(n+1) } @@ -145,6 +147,34 @@ class PythonTupleObjectInternal extends TPythonTuple, TupleObjectInternal { } +class VarargsTupleObjectInternal extends TVarargsTuple, TupleObjectInternal { + + override predicate introducedAt(ControlFlowNode node, PointsToContext context) { + none() + } + + override Builtin getBuiltin() { + none() + } + + override ControlFlowNode getOrigin() { + none() + } + + override ObjectInternal getItem(int n) { + exists(CallNode call, PointsToContext context, int offset, int length | + this = TVarargsTuple(call, context, offset, length) and + n < length and + PointsToInternal::pointsTo(call.getArg(offset+n), context, result, _) + ) + } + + override int length() { + this = TVarargsTuple(_, _, _, result) + } +} + + /** The `sys.version_info` object. We treat this specially to prevent premature pruning and * false positives when we are unsure of the actual version of Python that the code is expecting. */ diff --git a/python/ql/src/semmle/python/objects/TObject.qll b/python/ql/src/semmle/python/objects/TObject.qll index c6e398e5691..3b2ef393716 100644 --- a/python/ql/src/semmle/python/objects/TObject.qll +++ b/python/ql/src/semmle/python/objects/TObject.qll @@ -179,6 +179,11 @@ cached newtype TObject = context.appliesTo(origin) } or + /* Varargs tuple */ + TVarargsTuple(CallNode call, PointsToContext context, int offset, int length) { + InterProceduralPointsTo::varargs_tuple(call, _, context, _, offset, length) + } + or /* `type` */ TType() or diff --git a/python/ql/src/semmle/python/pointsto/PointsTo.qll b/python/ql/src/semmle/python/pointsto/PointsTo.qll index af5133ca3c8..a0e33021744 100644 --- a/python/ql/src/semmle/python/pointsto/PointsTo.qll +++ b/python/ql/src/semmle/python/pointsto/PointsTo.qll @@ -899,17 +899,38 @@ module InterProceduralPointsTo { pragma [noinline] private predicate special_parameter_points_to(ParameterDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin) { special_parameter_value(def, value) and - ( - context.isRuntime() - or - exists(PointsToContext caller, CallNode call | - context.fromCall(call, caller) and - context.appliesToScope(def.getScope()) and - not exists(call.getArg(def.getParameter().getPosition())) and - not exists(call.getArgByName(def.getParameter().getName())) - ) + context.isRuntime() and + origin = def.getDefiningNode() + or + exists(CallNode call, Function scope, PointsToContext caller, int offset, int length | + varargs_tuple(call, scope, caller, context, offset, length) and + value = TVarargsTuple(call, caller, offset, length) and + def.getScope() = scope ) and origin = def.getDefiningNode() + or + exists(Function scope | + varargs_empty_tuple(scope, context) and + value.(BuiltinTupleObjectInternal).length() = 0 and + def.getScope() = scope + ) and + origin = def.getDefiningNode() + } + + predicate varargs_tuple(CallNode call, Function scope, PointsToContext caller, PointsToContext callee, int startOffset, int length) { + exists(int parameter_offset | + callsite_calls_function(call, caller, scope, callee, parameter_offset) and + startOffset = scope.getPositionalParameterCount() - parameter_offset and + length = call.getNode().getPositionalArgumentCount() - startOffset and + length > 0 + ) + } + + predicate varargs_empty_tuple(Function scope, PointsToContext callee) { + exists(CallNode call, PointsToContext caller, int parameter_offset | + callsite_calls_function(call, caller, scope, callee, parameter_offset) and + scope.getPositionalParameterCount() - parameter_offset >= call.getNode().getPositionalArgumentCount() + ) } /** Helper predicate for special_parameter_points_to */ diff --git a/python/ql/test/library-tests/PointsTo/new/Call.expected b/python/ql/test/library-tests/PointsTo/new/Call.expected index b97734a8302..70b024ae874 100644 --- a/python/ql/test/library-tests/PointsTo/new/Call.expected +++ b/python/ql/test/library-tests/PointsTo/new/Call.expected @@ -16,6 +16,9 @@ | l_calls.py:10 | ControlFlowNode for bar() | bar | | l_calls.py:24 | ControlFlowNode for Attribute() | Owner.cm | | l_calls.py:25 | ControlFlowNode for Attribute() | Owner.cm2 | +| l_calls.py:37 | ControlFlowNode for f() | f | +| l_calls.py:38 | ControlFlowNode for Attribute() | E.m | +| l_calls.py:39 | ControlFlowNode for Attribute() | E.m | | q_super.py:4 | ControlFlowNode for Attribute() | object.__init__ | | q_super.py:12 | ControlFlowNode for Attribute() | Base2.__init__ | | q_super.py:22 | ControlFlowNode for Attribute() | Base1.meth | diff --git a/python/ql/test/library-tests/PointsTo/new/NameSpace.expected b/python/ql/test/library-tests/PointsTo/new/NameSpace.expected index 481b25a8258..bb6901f1213 100644 --- a/python/ql/test/library-tests/PointsTo/new/NameSpace.expected +++ b/python/ql/test/library-tests/PointsTo/new/NameSpace.expected @@ -112,12 +112,15 @@ | k_getsetattr.py:0 | Module code.k_getsetattr | k | Function k | | k_getsetattr.py:4 | Class C | meth1 | Function meth1 | | k_getsetattr.py:4 | Class C | meth2 | Function meth2 | +| l_calls.py:0 | Module code.l_calls | E | class E | | l_calls.py:0 | Module code.l_calls | Owner | class Owner | | l_calls.py:0 | Module code.l_calls | bar | Function bar | +| l_calls.py:0 | Module code.l_calls | f | Function f | | l_calls.py:0 | Module code.l_calls | foo | Function foo | | l_calls.py:12 | Class Owner | cm | classmethod() | | l_calls.py:12 | Class Owner | cm2 | classmethod() | | l_calls.py:12 | Class Owner | m | Function m | +| l_calls.py:32 | Class E | m | Function m | | o_no_returns.py:0 | Module code.o_no_returns | bar | Function bar | | o_no_returns.py:0 | Module code.o_no_returns | fail | Function fail | | o_no_returns.py:0 | Module code.o_no_returns | foo | Function foo | diff --git a/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected b/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected index 0a933761e78..f69685edcde 100755 --- a/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected +++ b/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected @@ -598,6 +598,43 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P | l_calls.py:25 | ControlFlowNode for Attribute() | int 1 | builtin-class int | 25 | runtime | | l_calls.py:25 | ControlFlowNode for IntegerLiteral | int 1 | builtin-class int | 25 | runtime | | l_calls.py:25 | ControlFlowNode for a | class Owner | builtin-class type | 12 | runtime | +| l_calls.py:29 | ControlFlowNode for FunctionExpr | Function f | builtin-class function | 29 | import | +| l_calls.py:29 | ControlFlowNode for args | args | builtin-class tuple | 29 | runtime | +| l_calls.py:29 | ControlFlowNode for f | Function f | builtin-class function | 29 | import | +| l_calls.py:30 | ControlFlowNode for args | args | builtin-class tuple | 29 | code/l_calls.py:37 from import | +| l_calls.py:30 | ControlFlowNode for args | args | builtin-class tuple | 29 | runtime | +| l_calls.py:32 | ControlFlowNode for ClassExpr | class E | builtin-class type | 32 | import | +| l_calls.py:32 | ControlFlowNode for E | class E | builtin-class type | 32 | import | +| l_calls.py:32 | ControlFlowNode for object | builtin-class object | builtin-class type | 32 | import | +| l_calls.py:33 | ControlFlowNode for FunctionExpr | Function m | builtin-class function | 33 | import | +| l_calls.py:33 | ControlFlowNode for args | args | builtin-class tuple | 33 | runtime | +| l_calls.py:33 | ControlFlowNode for m | Function m | builtin-class function | 33 | import | +| l_calls.py:34 | ControlFlowNode for self | E() | class E | 38 | code/l_calls.py:38 from import | +| l_calls.py:34 | ControlFlowNode for self | int 3 | builtin-class int | 39 | code/l_calls.py:39 from import | +| l_calls.py:34 | ControlFlowNode for self | self | builtin-class tuple | 33 | code/l_calls.py:38 from import | +| l_calls.py:34 | ControlFlowNode for self | self | builtin-class tuple | 33 | code/l_calls.py:39 from import | +| l_calls.py:34 | ControlFlowNode for self | self | class E | 33 | runtime | +| l_calls.py:35 | ControlFlowNode for args | args | builtin-class tuple | 33 | code/l_calls.py:38 from import | +| l_calls.py:35 | ControlFlowNode for args | args | builtin-class tuple | 33 | code/l_calls.py:39 from import | +| l_calls.py:35 | ControlFlowNode for args | args | builtin-class tuple | 33 | runtime | +| l_calls.py:37 | ControlFlowNode for IntegerLiteral | int 1 | builtin-class int | 37 | import | +| l_calls.py:37 | ControlFlowNode for IntegerLiteral | int 2 | builtin-class int | 37 | import | +| l_calls.py:37 | ControlFlowNode for IntegerLiteral | int 3 | builtin-class int | 37 | import | +| l_calls.py:37 | ControlFlowNode for f | Function f | builtin-class function | 29 | import | +| l_calls.py:37 | ControlFlowNode for f() | args | builtin-class tuple | 29 | import | +| l_calls.py:38 | ControlFlowNode for Attribute | Attribute | builtin-class method | 38 | import | +| l_calls.py:38 | ControlFlowNode for Attribute() | args | builtin-class tuple | 33 | import | +| l_calls.py:38 | ControlFlowNode for E | class E | builtin-class type | 32 | import | +| l_calls.py:38 | ControlFlowNode for E() | E() | class E | 38 | import | +| l_calls.py:38 | ControlFlowNode for IntegerLiteral | int 2 | builtin-class int | 38 | import | +| l_calls.py:38 | ControlFlowNode for IntegerLiteral | int 3 | builtin-class int | 38 | import | +| l_calls.py:38 | ControlFlowNode for IntegerLiteral | int 4 | builtin-class int | 38 | import | +| l_calls.py:39 | ControlFlowNode for Attribute | Function m | builtin-class function | 33 | import | +| l_calls.py:39 | ControlFlowNode for Attribute() | args | builtin-class tuple | 33 | import | +| l_calls.py:39 | ControlFlowNode for E | class E | builtin-class type | 32 | import | +| l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 3 | builtin-class int | 39 | import | +| l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 4 | builtin-class int | 39 | import | +| l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 39 | import | | m_attributes.py:3 | ControlFlowNode for C | class C | builtin-class type | 3 | import | | m_attributes.py:3 | ControlFlowNode for ClassExpr | class C | builtin-class type | 3 | import | | m_attributes.py:3 | ControlFlowNode for object | builtin-class object | builtin-class type | 3 | import | diff --git a/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected b/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected index 999cebadfa7..0700fe3cb48 100644 --- a/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected +++ b/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected @@ -675,6 +675,39 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P | l_calls.py:25 | ControlFlowNode for Attribute() | int 1 | builtin-class int | 25 | | l_calls.py:25 | ControlFlowNode for IntegerLiteral | int 1 | builtin-class int | 25 | | l_calls.py:25 | ControlFlowNode for a | class Owner | builtin-class type | 12 | +| l_calls.py:29 | ControlFlowNode for FunctionExpr | Function f | builtin-class function | 29 | +| l_calls.py:29 | ControlFlowNode for args | args | builtin-class tuple | 29 | +| l_calls.py:29 | ControlFlowNode for f | Function f | builtin-class function | 29 | +| l_calls.py:30 | ControlFlowNode for args | args | builtin-class tuple | 29 | +| l_calls.py:32 | ControlFlowNode for ClassExpr | class E | builtin-class type | 32 | +| l_calls.py:32 | ControlFlowNode for E | class E | builtin-class type | 32 | +| l_calls.py:32 | ControlFlowNode for object | builtin-class object | builtin-class type | 32 | +| l_calls.py:33 | ControlFlowNode for FunctionExpr | Function m | builtin-class function | 33 | +| l_calls.py:33 | ControlFlowNode for args | args | builtin-class tuple | 33 | +| l_calls.py:33 | ControlFlowNode for m | Function m | builtin-class function | 33 | +| l_calls.py:34 | ControlFlowNode for self | E() | class E | 38 | +| l_calls.py:34 | ControlFlowNode for self | int 3 | builtin-class int | 39 | +| l_calls.py:34 | ControlFlowNode for self | self | builtin-class tuple | 33 | +| l_calls.py:34 | ControlFlowNode for self | self | class E | 33 | +| l_calls.py:35 | ControlFlowNode for args | args | builtin-class tuple | 33 | +| l_calls.py:37 | ControlFlowNode for IntegerLiteral | int 1 | builtin-class int | 37 | +| l_calls.py:37 | ControlFlowNode for IntegerLiteral | int 2 | builtin-class int | 37 | +| l_calls.py:37 | ControlFlowNode for IntegerLiteral | int 3 | builtin-class int | 37 | +| l_calls.py:37 | ControlFlowNode for f | Function f | builtin-class function | 29 | +| l_calls.py:37 | ControlFlowNode for f() | args | builtin-class tuple | 29 | +| l_calls.py:38 | ControlFlowNode for Attribute | Attribute | builtin-class method | 38 | +| l_calls.py:38 | ControlFlowNode for Attribute() | args | builtin-class tuple | 33 | +| l_calls.py:38 | ControlFlowNode for E | class E | builtin-class type | 32 | +| l_calls.py:38 | ControlFlowNode for E() | E() | class E | 38 | +| l_calls.py:38 | ControlFlowNode for IntegerLiteral | int 2 | builtin-class int | 38 | +| l_calls.py:38 | ControlFlowNode for IntegerLiteral | int 3 | builtin-class int | 38 | +| l_calls.py:38 | ControlFlowNode for IntegerLiteral | int 4 | builtin-class int | 38 | +| l_calls.py:39 | ControlFlowNode for Attribute | Function m | builtin-class function | 33 | +| l_calls.py:39 | ControlFlowNode for Attribute() | args | builtin-class tuple | 33 | +| l_calls.py:39 | ControlFlowNode for E | class E | builtin-class type | 32 | +| l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 3 | builtin-class int | 39 | +| l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 4 | builtin-class int | 39 | +| l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 39 | | s_scopes.py:4 | ControlFlowNode for True | bool True | builtin-class bool | 4 | | s_scopes.py:4 | ControlFlowNode for float | bool True | builtin-class bool | 4 | | s_scopes.py:7 | ControlFlowNode for C2 | class C2 | builtin-class type | 7 | diff --git a/python/ql/test/library-tests/PointsTo/new/Values.expected b/python/ql/test/library-tests/PointsTo/new/Values.expected index 650515e70a8..7fe8471cf38 100644 --- a/python/ql/test/library-tests/PointsTo/new/Values.expected +++ b/python/ql/test/library-tests/PointsTo/new/Values.expected @@ -466,6 +466,38 @@ | l_calls.py:25 | ControlFlowNode for Attribute() | runtime | int 1 | builtin-class int | | l_calls.py:25 | ControlFlowNode for IntegerLiteral | runtime | int 1 | builtin-class int | | l_calls.py:25 | ControlFlowNode for a | runtime | class Owner | builtin-class type | +| l_calls.py:29 | ControlFlowNode for FunctionExpr | import | Function f | builtin-class function | +| l_calls.py:30 | ControlFlowNode for args | code/l_calls.py:37 from import | (int 1, int 2, int 3, ) | builtin-class tuple | +| l_calls.py:30 | ControlFlowNode for args | runtime | instance of tuple | builtin-class tuple | +| l_calls.py:32 | ControlFlowNode for ClassExpr | import | class E | builtin-class type | +| l_calls.py:32 | ControlFlowNode for object | import | builtin-class object | builtin-class type | +| l_calls.py:33 | ControlFlowNode for FunctionExpr | import | Function E.m | builtin-class function | +| l_calls.py:34 | ControlFlowNode for self | code/l_calls.py:38 from import | (int 2, int 3, int 4, ) | builtin-class tuple | +| l_calls.py:34 | ControlFlowNode for self | code/l_calls.py:38 from import | E() | class E | +| l_calls.py:34 | ControlFlowNode for self | code/l_calls.py:39 from import | (int 4, int 5, ) | builtin-class tuple | +| l_calls.py:34 | ControlFlowNode for self | code/l_calls.py:39 from import | int 3 | builtin-class int | +| l_calls.py:34 | ControlFlowNode for self | runtime | self instance of E | class E | +| l_calls.py:35 | ControlFlowNode for args | code/l_calls.py:38 from import | (int 2, int 3, int 4, ) | builtin-class tuple | +| l_calls.py:35 | ControlFlowNode for args | code/l_calls.py:39 from import | (int 4, int 5, ) | builtin-class tuple | +| l_calls.py:35 | ControlFlowNode for args | runtime | instance of tuple | builtin-class tuple | +| l_calls.py:37 | ControlFlowNode for IntegerLiteral | import | int 1 | builtin-class int | +| l_calls.py:37 | ControlFlowNode for IntegerLiteral | import | int 2 | builtin-class int | +| l_calls.py:37 | ControlFlowNode for IntegerLiteral | import | int 3 | builtin-class int | +| l_calls.py:37 | ControlFlowNode for f | import | Function f | builtin-class function | +| l_calls.py:37 | ControlFlowNode for f() | import | (int 1, int 2, int 3, ) | builtin-class tuple | +| l_calls.py:38 | ControlFlowNode for Attribute | import | Method(Function E.m, E()) | builtin-class method | +| l_calls.py:38 | ControlFlowNode for Attribute() | import | (int 2, int 3, int 4, ) | builtin-class tuple | +| l_calls.py:38 | ControlFlowNode for E | import | class E | builtin-class type | +| l_calls.py:38 | ControlFlowNode for E() | import | E() | class E | +| l_calls.py:38 | ControlFlowNode for IntegerLiteral | import | int 2 | builtin-class int | +| l_calls.py:38 | ControlFlowNode for IntegerLiteral | import | int 3 | builtin-class int | +| l_calls.py:38 | ControlFlowNode for IntegerLiteral | import | int 4 | builtin-class int | +| l_calls.py:39 | ControlFlowNode for Attribute | import | Function E.m | builtin-class function | +| l_calls.py:39 | ControlFlowNode for Attribute() | import | (int 4, int 5, ) | builtin-class tuple | +| l_calls.py:39 | ControlFlowNode for E | import | class E | builtin-class type | +| l_calls.py:39 | ControlFlowNode for IntegerLiteral | import | int 3 | builtin-class int | +| l_calls.py:39 | ControlFlowNode for IntegerLiteral | import | int 4 | builtin-class int | +| l_calls.py:39 | ControlFlowNode for IntegerLiteral | import | int 5 | builtin-class int | | m_attributes.py:3 | ControlFlowNode for ClassExpr | import | class C | builtin-class type | | m_attributes.py:3 | ControlFlowNode for object | import | builtin-class object | builtin-class type | | m_attributes.py:5 | ControlFlowNode for FunctionExpr | import | Function C.__init__ | builtin-class function | diff --git a/python/ql/test/library-tests/PointsTo/new/code/l_calls.py b/python/ql/test/library-tests/PointsTo/new/code/l_calls.py index d49f373cec4..7d5b9e348a6 100644 --- a/python/ql/test/library-tests/PointsTo/new/code/l_calls.py +++ b/python/ql/test/library-tests/PointsTo/new/code/l_calls.py @@ -24,3 +24,16 @@ class Owner(object): a = self.cm(0) return a.cm2(1) +# *args + +def f(*args): + return args + +class E(object): + def m(self, *args): + self + return args + +f(1, 2, 3) +E().m(2, 3, 4) +E.m(3, 4, 5) From 5b0652473d729dd6f87161f77c050181d6299a97 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 3 Jun 2019 18:50:03 +0100 Subject: [PATCH 2/5] Python points-to: Handle varargs in caller --- python/ql/src/semmle/python/Exprs.qll | 14 ++++-- python/ql/src/semmle/python/Flow.qll | 5 +++ .../src/semmle/python/objects/Sequences.qll | 4 +- .../src/semmle/python/pointsto/PointsTo.qll | 45 ++++++++++++++----- .../library-tests/PointsTo/new/Call.expected | 4 ++ .../PointsTo/new/NameSpace.expected | 4 ++ .../PointsTo/new/PointsToNone.expected | 1 + .../PointsTo/new/PointsToWithContext.expected | 32 +++++++++++++ .../PointsTo/new/PointsToWithType.expected | 31 +++++++++++++ .../PointsTo/new/Values.expected | 28 ++++++++++++ .../PointsTo/new/code/l_calls.py | 15 +++++++ 11 files changed, 168 insertions(+), 15 deletions(-) diff --git a/python/ql/src/semmle/python/Exprs.qll b/python/ql/src/semmle/python/Exprs.qll index b471afef95d..794456744f6 100644 --- a/python/ql/src/semmle/python/Exprs.qll +++ b/python/ql/src/semmle/python/Exprs.qll @@ -168,12 +168,12 @@ class Call extends Call_ { override CallNode getAFlowNode() { result = super.getAFlowNode() } - /** Gets a tuple (*) argument of this class definition. */ + /** Gets a tuple (*) argument of this call. */ Expr getStarargs() { result = this.getAPositionalArg().(Starred).getValue() } - /** Gets a dictionary (**) argument of this class definition. */ + /** Gets a dictionary (**) argument of this call. */ Expr getKwargs() { result = this.getANamedArg().(DictUnpacking).getValue() } @@ -227,10 +227,18 @@ class Call extends Call_ { result = this.getKwargs().(Dict).getAKey().(StrConst).getText() } + /** Gets the positional argument count of this call, provided there is no more than one tuple (*) argument. */ int getPositionalArgumentCount() { count(this.getStarargs()) < 2 and - result = count(this.getAPositionalArg()) + result = count(Expr arg | arg = this.getAPositionalArg() and not arg instanceof Starred) } + + /** Gets the tuple (*) argument of this call, provided there is exactly one. */ + Expr getStarArg() { + count(this.getStarargs()) < 2 and + result = getStarargs() + } + } /** A conditional expression such as, `body if test else orelse` */ diff --git a/python/ql/src/semmle/python/Flow.qll b/python/ql/src/semmle/python/Flow.qll index d2cb71fd547..5b8f3dabd67 100755 --- a/python/ql/src/semmle/python/Flow.qll +++ b/python/ql/src/semmle/python/Flow.qll @@ -481,6 +481,11 @@ class CallNode extends ControlFlowNode { ) } + ControlFlowNode getStarArg() { + result.getNode() = this.getNode().getStarArg() and + result.getBasicBlock().dominates(this.getBasicBlock()) + } + } /** A control flow corresponding to an attribute expression, such as `value.attr` */ diff --git a/python/ql/src/semmle/python/objects/Sequences.qll b/python/ql/src/semmle/python/objects/Sequences.qll index e274f6f40a2..691d4743686 100644 --- a/python/ql/src/semmle/python/objects/Sequences.qll +++ b/python/ql/src/semmle/python/objects/Sequences.qll @@ -41,7 +41,7 @@ abstract class TupleObjectInternal extends SequenceObjectInternal { private string contents(int n) { n < 4 and n = this.length() and result = "" or - n = 4 and n < this.length() and result = "... " + (this.length()-4).toString() + " more" + n = 3 and this.length() > 3 and result = (this.length()-3).toString() + " more..." or result = this.getItem(n).toString() + ", " + this.contents(n+1) } @@ -165,7 +165,7 @@ class VarargsTupleObjectInternal extends TVarargsTuple, TupleObjectInternal { exists(CallNode call, PointsToContext context, int offset, int length | this = TVarargsTuple(call, context, offset, length) and n < length and - PointsToInternal::pointsTo(call.getArg(offset+n), context, result, _) + InterProceduralPointsTo::positional_argument_points_to(call, offset+n, context, result, _) ) } diff --git a/python/ql/src/semmle/python/pointsto/PointsTo.qll b/python/ql/src/semmle/python/pointsto/PointsTo.qll index a0e33021744..3112d9ee68e 100644 --- a/python/ql/src/semmle/python/pointsto/PointsTo.qll +++ b/python/ql/src/semmle/python/pointsto/PointsTo.qll @@ -844,11 +844,13 @@ module InterProceduralPointsTo { private predicate normal_parameter_points_to(ParameterDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin) { exists(PointsToContext caller, ControlFlowNode arg | PointsToInternal::pointsTo(arg, caller, value, origin) and - callsite_argument_transfer(arg, caller, def, context) + named_argument_transfer(arg, caller, def, context) ) or not def.isSelf() and not def.isVarargs() and not def.isKwargs() and context.isRuntime() and value = ObjectInternal::unknown() and origin = def.getDefiningNode() + or + positional_parameter_points_to(def, context, value, origin) } pragma [noinline] @@ -921,7 +923,7 @@ module InterProceduralPointsTo { exists(int parameter_offset | callsite_calls_function(call, caller, scope, callee, parameter_offset) and startOffset = scope.getPositionalParameterCount() - parameter_offset and - length = call.getNode().getPositionalArgumentCount() - startOffset and + length = positional_argument_count(call, caller) - startOffset and length > 0 ) } @@ -929,7 +931,7 @@ module InterProceduralPointsTo { predicate varargs_empty_tuple(Function scope, PointsToContext callee) { exists(CallNode call, PointsToContext caller, int parameter_offset | callsite_calls_function(call, caller, scope, callee, parameter_offset) and - scope.getPositionalParameterCount() - parameter_offset >= call.getNode().getPositionalArgumentCount() + scope.getPositionalParameterCount() - parameter_offset >= positional_argument_count(call, caller) ) } @@ -940,16 +942,39 @@ module InterProceduralPointsTo { p.isKwargs() and value = TUnknownInstance(ObjectInternal::builtin("dict")) } - /** Holds if the `(argument, caller)` pair matches up with `(param, callee)` pair across call. */ - cached predicate callsite_argument_transfer(ControlFlowNode argument, PointsToContext caller, ParameterDefinition param, PointsToContext callee) { + predicate positional_argument_points_to(CallNode call, int argument, PointsToContext caller, ObjectInternal value, ControlFlowNode origin) { + PointsToInternal::pointsTo(call.getArg(argument), caller, value, origin) + or + exists(SequenceObjectInternal arg, int pos | + pos = call.getNode().getPositionalArgumentCount() and + PointsToInternal::pointsTo(origin, caller, arg, _) and + value = arg.getItem(argument-pos) and + origin = call.getStarArg() + ) + } + + private int positional_argument_count(CallNode call, PointsToContext caller) { + result = call.getNode().getPositionalArgumentCount() and not exists(call.getStarArg()) and caller.appliesTo(call) + or + exists(SequenceObjectInternal arg, int pos | + pos = call.getNode().getPositionalArgumentCount() and + PointsToInternal::pointsTo(call.getStarArg(), caller, arg, _) and + result = pos + arg.length() + ) + } + + predicate positional_parameter_points_to(ParameterDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin) { + exists(CallNode call, int argument, PointsToContext caller, Function func, int offset | + positional_argument_points_to(call, argument, caller, value, origin) and + callsite_calls_function(call, caller, func, context, offset) and + def.getParameter() = func.getArg(argument+offset) + ) + } + + cached predicate named_argument_transfer(ControlFlowNode argument, PointsToContext caller, ParameterDefinition param, PointsToContext callee) { exists(CallNode call, Function func, int offset | callsite_calls_function(call, caller, func, callee, offset) | - exists(int n | - argument = call.getArg(n) and - param.getParameter() = func.getArg(n+offset) - ) - or exists(string name | argument = call.getArgByName(name) and param.getParameter() = func.getArgByName(name) diff --git a/python/ql/test/library-tests/PointsTo/new/Call.expected b/python/ql/test/library-tests/PointsTo/new/Call.expected index 70b024ae874..dbd95c9d356 100644 --- a/python/ql/test/library-tests/PointsTo/new/Call.expected +++ b/python/ql/test/library-tests/PointsTo/new/Call.expected @@ -19,6 +19,10 @@ | l_calls.py:37 | ControlFlowNode for f() | f | | l_calls.py:38 | ControlFlowNode for Attribute() | E.m | | l_calls.py:39 | ControlFlowNode for Attribute() | E.m | +| l_calls.py:42 | ControlFlowNode for f() | f | +| l_calls.py:51 | ControlFlowNode for g() | g | +| l_calls.py:52 | ControlFlowNode for Attribute() | F.m | +| l_calls.py:53 | ControlFlowNode for Attribute() | F.m | | q_super.py:4 | ControlFlowNode for Attribute() | object.__init__ | | q_super.py:12 | ControlFlowNode for Attribute() | Base2.__init__ | | q_super.py:22 | ControlFlowNode for Attribute() | Base1.meth | diff --git a/python/ql/test/library-tests/PointsTo/new/NameSpace.expected b/python/ql/test/library-tests/PointsTo/new/NameSpace.expected index bb6901f1213..bcf7d969dd7 100644 --- a/python/ql/test/library-tests/PointsTo/new/NameSpace.expected +++ b/python/ql/test/library-tests/PointsTo/new/NameSpace.expected @@ -113,14 +113,18 @@ | k_getsetattr.py:4 | Class C | meth1 | Function meth1 | | k_getsetattr.py:4 | Class C | meth2 | Function meth2 | | l_calls.py:0 | Module code.l_calls | E | class E | +| l_calls.py:0 | Module code.l_calls | F | class F | | l_calls.py:0 | Module code.l_calls | Owner | class Owner | | l_calls.py:0 | Module code.l_calls | bar | Function bar | | l_calls.py:0 | Module code.l_calls | f | Function f | | l_calls.py:0 | Module code.l_calls | foo | Function foo | +| l_calls.py:0 | Module code.l_calls | g | Function g | +| l_calls.py:0 | Module code.l_calls | t | Tuple | | l_calls.py:12 | Class Owner | cm | classmethod() | | l_calls.py:12 | Class Owner | cm2 | classmethod() | | l_calls.py:12 | Class Owner | m | Function m | | l_calls.py:32 | Class E | m | Function m | +| l_calls.py:47 | Class F | m | Function m | | o_no_returns.py:0 | Module code.o_no_returns | bar | Function bar | | o_no_returns.py:0 | Module code.o_no_returns | fail | Function fail | | o_no_returns.py:0 | Module code.o_no_returns | foo | Function foo | diff --git a/python/ql/test/library-tests/PointsTo/new/PointsToNone.expected b/python/ql/test/library-tests/PointsTo/new/PointsToNone.expected index a72036844ab..b26108780af 100644 --- a/python/ql/test/library-tests/PointsTo/new/PointsToNone.expected +++ b/python/ql/test/library-tests/PointsTo/new/PointsToNone.expected @@ -75,6 +75,7 @@ | k_getsetattr.py:15 | ControlFlowNode for Attribute() | 6 | | l_calls.py:4 | ControlFlowNode for Attribute() | 4 | | l_calls.py:9 | ControlFlowNode for foo() | 4 | +| l_calls.py:48 | ControlFlowNode for None | 48 | | m_attributes.py:12 | ControlFlowNode for Attribute() | 8 | | m_attributes.py:13 | ControlFlowNode for Attribute() | 8 | | o_no_returns.py:7 | ControlFlowNode for fail() | 10 | diff --git a/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected b/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected index f69685edcde..0dec2c5da97 100755 --- a/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected +++ b/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected @@ -602,6 +602,7 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P | l_calls.py:29 | ControlFlowNode for args | args | builtin-class tuple | 29 | runtime | | l_calls.py:29 | ControlFlowNode for f | Function f | builtin-class function | 29 | import | | l_calls.py:30 | ControlFlowNode for args | args | builtin-class tuple | 29 | code/l_calls.py:37 from import | +| l_calls.py:30 | ControlFlowNode for args | args | builtin-class tuple | 29 | code/l_calls.py:42 from import | | l_calls.py:30 | ControlFlowNode for args | args | builtin-class tuple | 29 | runtime | | l_calls.py:32 | ControlFlowNode for ClassExpr | class E | builtin-class type | 32 | import | | l_calls.py:32 | ControlFlowNode for E | class E | builtin-class type | 32 | import | @@ -635,6 +636,37 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P | l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 3 | builtin-class int | 39 | import | | l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 4 | builtin-class int | 39 | import | | l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 39 | import | +| l_calls.py:41 | ControlFlowNode for Str | 'a' | builtin-class str | 41 | import | +| l_calls.py:41 | ControlFlowNode for Str | 'b' | builtin-class str | 41 | import | +| l_calls.py:41 | ControlFlowNode for Str | 'c' | builtin-class str | 41 | import | +| l_calls.py:41 | ControlFlowNode for Tuple | Tuple | builtin-class tuple | 41 | import | +| l_calls.py:41 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | import | +| l_calls.py:42 | ControlFlowNode for f | Function f | builtin-class function | 29 | import | +| l_calls.py:42 | ControlFlowNode for f() | args | builtin-class tuple | 29 | import | +| l_calls.py:42 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | import | +| l_calls.py:44 | ControlFlowNode for FunctionExpr | Function g | builtin-class function | 44 | import | +| l_calls.py:44 | ControlFlowNode for g | Function g | builtin-class function | 44 | import | +| l_calls.py:45 | ControlFlowNode for a | 'a' | builtin-class str | 51 | code/l_calls.py:51 from import | +| l_calls.py:47 | ControlFlowNode for ClassExpr | class F | builtin-class type | 47 | import | +| l_calls.py:47 | ControlFlowNode for F | class F | builtin-class type | 47 | import | +| l_calls.py:47 | ControlFlowNode for object | builtin-class object | builtin-class type | 47 | import | +| l_calls.py:48 | ControlFlowNode for FunctionExpr | Function m | builtin-class function | 48 | import | +| l_calls.py:48 | ControlFlowNode for None | NoneType None | builtin-class NoneType | 48 | import | +| l_calls.py:48 | ControlFlowNode for m | Function m | builtin-class function | 48 | import | +| l_calls.py:49 | ControlFlowNode for x | 'a' | builtin-class str | 52 | code/l_calls.py:52 from import | +| l_calls.py:49 | ControlFlowNode for x | 'b' | builtin-class str | 53 | code/l_calls.py:53 from import | +| l_calls.py:51 | ControlFlowNode for g | Function g | builtin-class function | 44 | import | +| l_calls.py:51 | ControlFlowNode for g() | 'a' | builtin-class str | 51 | import | +| l_calls.py:51 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | import | +| l_calls.py:52 | ControlFlowNode for Attribute | Attribute | builtin-class method | 52 | import | +| l_calls.py:52 | ControlFlowNode for Attribute() | 'a' | builtin-class str | 52 | import | +| l_calls.py:52 | ControlFlowNode for F | class F | builtin-class type | 47 | import | +| l_calls.py:52 | ControlFlowNode for F() | F() | class F | 52 | import | +| l_calls.py:52 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | import | +| l_calls.py:53 | ControlFlowNode for Attribute | Function m | builtin-class function | 48 | import | +| l_calls.py:53 | ControlFlowNode for Attribute() | 'b' | builtin-class str | 53 | import | +| l_calls.py:53 | ControlFlowNode for F | class F | builtin-class type | 47 | import | +| l_calls.py:53 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | import | | m_attributes.py:3 | ControlFlowNode for C | class C | builtin-class type | 3 | import | | m_attributes.py:3 | ControlFlowNode for ClassExpr | class C | builtin-class type | 3 | import | | m_attributes.py:3 | ControlFlowNode for object | builtin-class object | builtin-class type | 3 | import | diff --git a/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected b/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected index 0700fe3cb48..2f681f63d03 100644 --- a/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected +++ b/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected @@ -708,6 +708,37 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P | l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 3 | builtin-class int | 39 | | l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 4 | builtin-class int | 39 | | l_calls.py:39 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 39 | +| l_calls.py:41 | ControlFlowNode for Str | 'a' | builtin-class str | 41 | +| l_calls.py:41 | ControlFlowNode for Str | 'b' | builtin-class str | 41 | +| l_calls.py:41 | ControlFlowNode for Str | 'c' | builtin-class str | 41 | +| l_calls.py:41 | ControlFlowNode for Tuple | Tuple | builtin-class tuple | 41 | +| l_calls.py:41 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | +| l_calls.py:42 | ControlFlowNode for f | Function f | builtin-class function | 29 | +| l_calls.py:42 | ControlFlowNode for f() | args | builtin-class tuple | 29 | +| l_calls.py:42 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | +| l_calls.py:44 | ControlFlowNode for FunctionExpr | Function g | builtin-class function | 44 | +| l_calls.py:44 | ControlFlowNode for g | Function g | builtin-class function | 44 | +| l_calls.py:45 | ControlFlowNode for a | 'a' | builtin-class str | 51 | +| l_calls.py:47 | ControlFlowNode for ClassExpr | class F | builtin-class type | 47 | +| l_calls.py:47 | ControlFlowNode for F | class F | builtin-class type | 47 | +| l_calls.py:47 | ControlFlowNode for object | builtin-class object | builtin-class type | 47 | +| l_calls.py:48 | ControlFlowNode for FunctionExpr | Function m | builtin-class function | 48 | +| l_calls.py:48 | ControlFlowNode for None | NoneType None | builtin-class NoneType | 48 | +| l_calls.py:48 | ControlFlowNode for m | Function m | builtin-class function | 48 | +| l_calls.py:49 | ControlFlowNode for x | 'a' | builtin-class str | 52 | +| l_calls.py:49 | ControlFlowNode for x | 'b' | builtin-class str | 53 | +| l_calls.py:51 | ControlFlowNode for g | Function g | builtin-class function | 44 | +| l_calls.py:51 | ControlFlowNode for g() | 'a' | builtin-class str | 51 | +| l_calls.py:51 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | +| l_calls.py:52 | ControlFlowNode for Attribute | Attribute | builtin-class method | 52 | +| l_calls.py:52 | ControlFlowNode for Attribute() | 'a' | builtin-class str | 52 | +| l_calls.py:52 | ControlFlowNode for F | class F | builtin-class type | 47 | +| l_calls.py:52 | ControlFlowNode for F() | F() | class F | 52 | +| l_calls.py:52 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | +| l_calls.py:53 | ControlFlowNode for Attribute | Function m | builtin-class function | 48 | +| l_calls.py:53 | ControlFlowNode for Attribute() | 'b' | builtin-class str | 53 | +| l_calls.py:53 | ControlFlowNode for F | class F | builtin-class type | 47 | +| l_calls.py:53 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | | s_scopes.py:4 | ControlFlowNode for True | bool True | builtin-class bool | 4 | | s_scopes.py:4 | ControlFlowNode for float | bool True | builtin-class bool | 4 | | s_scopes.py:7 | ControlFlowNode for C2 | class C2 | builtin-class type | 7 | diff --git a/python/ql/test/library-tests/PointsTo/new/Values.expected b/python/ql/test/library-tests/PointsTo/new/Values.expected index 7fe8471cf38..f58be65ea7a 100644 --- a/python/ql/test/library-tests/PointsTo/new/Values.expected +++ b/python/ql/test/library-tests/PointsTo/new/Values.expected @@ -468,6 +468,7 @@ | l_calls.py:25 | ControlFlowNode for a | runtime | class Owner | builtin-class type | | l_calls.py:29 | ControlFlowNode for FunctionExpr | import | Function f | builtin-class function | | l_calls.py:30 | ControlFlowNode for args | code/l_calls.py:37 from import | (int 1, int 2, int 3, ) | builtin-class tuple | +| l_calls.py:30 | ControlFlowNode for args | code/l_calls.py:42 from import | ('a', 'b', 'c', ) | builtin-class tuple | | l_calls.py:30 | ControlFlowNode for args | runtime | instance of tuple | builtin-class tuple | | l_calls.py:32 | ControlFlowNode for ClassExpr | import | class E | builtin-class type | | l_calls.py:32 | ControlFlowNode for object | import | builtin-class object | builtin-class type | @@ -498,6 +499,33 @@ | l_calls.py:39 | ControlFlowNode for IntegerLiteral | import | int 3 | builtin-class int | | l_calls.py:39 | ControlFlowNode for IntegerLiteral | import | int 4 | builtin-class int | | l_calls.py:39 | ControlFlowNode for IntegerLiteral | import | int 5 | builtin-class int | +| l_calls.py:41 | ControlFlowNode for Str | import | 'a' | builtin-class str | +| l_calls.py:41 | ControlFlowNode for Str | import | 'b' | builtin-class str | +| l_calls.py:41 | ControlFlowNode for Str | import | 'c' | builtin-class str | +| l_calls.py:41 | ControlFlowNode for Tuple | import | ('a', 'b', 'c', ) | builtin-class tuple | +| l_calls.py:42 | ControlFlowNode for f | import | Function f | builtin-class function | +| l_calls.py:42 | ControlFlowNode for f() | import | ('a', 'b', 'c', ) | builtin-class tuple | +| l_calls.py:42 | ControlFlowNode for t | import | ('a', 'b', 'c', ) | builtin-class tuple | +| l_calls.py:44 | ControlFlowNode for FunctionExpr | import | Function g | builtin-class function | +| l_calls.py:45 | ControlFlowNode for a | code/l_calls.py:51 from import | 'a' | builtin-class str | +| l_calls.py:47 | ControlFlowNode for ClassExpr | import | class F | builtin-class type | +| l_calls.py:47 | ControlFlowNode for object | import | builtin-class object | builtin-class type | +| l_calls.py:48 | ControlFlowNode for FunctionExpr | import | Function F.m | builtin-class function | +| l_calls.py:48 | ControlFlowNode for None | import | None | builtin-class NoneType | +| l_calls.py:49 | ControlFlowNode for x | code/l_calls.py:52 from import | 'a' | builtin-class str | +| l_calls.py:49 | ControlFlowNode for x | code/l_calls.py:53 from import | 'b' | builtin-class str | +| l_calls.py:51 | ControlFlowNode for g | import | Function g | builtin-class function | +| l_calls.py:51 | ControlFlowNode for g() | import | 'a' | builtin-class str | +| l_calls.py:51 | ControlFlowNode for t | import | ('a', 'b', 'c', ) | builtin-class tuple | +| l_calls.py:52 | ControlFlowNode for Attribute | import | Method(Function F.m, F()) | builtin-class method | +| l_calls.py:52 | ControlFlowNode for Attribute() | import | 'a' | builtin-class str | +| l_calls.py:52 | ControlFlowNode for F | import | class F | builtin-class type | +| l_calls.py:52 | ControlFlowNode for F() | import | F() | class F | +| l_calls.py:52 | ControlFlowNode for t | import | ('a', 'b', 'c', ) | builtin-class tuple | +| l_calls.py:53 | ControlFlowNode for Attribute | import | Function F.m | builtin-class function | +| l_calls.py:53 | ControlFlowNode for Attribute() | import | 'b' | builtin-class str | +| l_calls.py:53 | ControlFlowNode for F | import | class F | builtin-class type | +| l_calls.py:53 | ControlFlowNode for t | import | ('a', 'b', 'c', ) | builtin-class tuple | | m_attributes.py:3 | ControlFlowNode for ClassExpr | import | class C | builtin-class type | | m_attributes.py:3 | ControlFlowNode for object | import | builtin-class object | builtin-class type | | m_attributes.py:5 | ControlFlowNode for FunctionExpr | import | Function C.__init__ | builtin-class function | diff --git a/python/ql/test/library-tests/PointsTo/new/code/l_calls.py b/python/ql/test/library-tests/PointsTo/new/code/l_calls.py index 7d5b9e348a6..2e866cfe89d 100644 --- a/python/ql/test/library-tests/PointsTo/new/code/l_calls.py +++ b/python/ql/test/library-tests/PointsTo/new/code/l_calls.py @@ -37,3 +37,18 @@ class E(object): f(1, 2, 3) E().m(2, 3, 4) E.m(3, 4, 5) + +t = 'a', 'b', 'c' +f(*t) + +def g(a, b, c): + return a + +class F(object): + def m(self, x, y, z=None): + return x + +g(*t) +F().m(*t) +F.m(*t) + From 53ddfce3599dce5658110dd3449d4b1ca5b5770a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 4 Jun 2019 11:43:30 +0100 Subject: [PATCH 3/5] Python: Clarify and document points-to and object model for calls involving starargs. --- python/ql/src/semmle/python/Flow.qll | 1 + .../semmle/python/objects/ObjectInternal.qll | 4 ++ .../src/semmle/python/objects/Sequences.qll | 4 +- .../ql/src/semmle/python/objects/TObject.qll | 2 +- .../src/semmle/python/pointsto/PointsTo.qll | 39 +++++++++++++------ 5 files changed, 37 insertions(+), 13 deletions(-) diff --git a/python/ql/src/semmle/python/Flow.qll b/python/ql/src/semmle/python/Flow.qll index 5b8f3dabd67..8f6d34cfed3 100755 --- a/python/ql/src/semmle/python/Flow.qll +++ b/python/ql/src/semmle/python/Flow.qll @@ -481,6 +481,7 @@ class CallNode extends ControlFlowNode { ) } + /** Gets the tuple (*) argument of this call, provided there is exactly one. */ ControlFlowNode getStarArg() { result.getNode() = this.getNode().getStarArg() and result.getBasicBlock().dominates(this.getBasicBlock()) diff --git a/python/ql/src/semmle/python/objects/ObjectInternal.qll b/python/ql/src/semmle/python/objects/ObjectInternal.qll index 973d7dca998..a59b57b70fd 100644 --- a/python/ql/src/semmle/python/objects/ObjectInternal.qll +++ b/python/ql/src/semmle/python/objects/ObjectInternal.qll @@ -481,6 +481,10 @@ module ObjectInternal { result = TBuiltinClassObject(Builtin::special("ClassType")) } + ObjectInternal emptyTuple() { + result.(BuiltinTupleObjectInternal).length() = 0 + } + } /** Helper for boolean predicates returning both `true` and `false` */ diff --git a/python/ql/src/semmle/python/objects/Sequences.qll b/python/ql/src/semmle/python/objects/Sequences.qll index 691d4743686..93e2dc2d2fe 100644 --- a/python/ql/src/semmle/python/objects/Sequences.qll +++ b/python/ql/src/semmle/python/objects/Sequences.qll @@ -90,6 +90,7 @@ abstract class TupleObjectInternal extends SequenceObjectInternal { } +/** A tuple built-in to the interpreter, including the empty tuple. */ class BuiltinTupleObjectInternal extends TBuiltinTuple, TupleObjectInternal { override predicate introducedAt(ControlFlowNode node, PointsToContext context) { @@ -116,7 +117,7 @@ class BuiltinTupleObjectInternal extends TBuiltinTuple, TupleObjectInternal { } } - +/** A tuple declared by a tuple expression in the Python source code */ class PythonTupleObjectInternal extends TPythonTuple, TupleObjectInternal { override predicate introducedAt(ControlFlowNode node, PointsToContext context) { @@ -147,6 +148,7 @@ class PythonTupleObjectInternal extends TPythonTuple, TupleObjectInternal { } +/** A tuple created by a `*` parameter */ class VarargsTupleObjectInternal extends TVarargsTuple, TupleObjectInternal { override predicate introducedAt(ControlFlowNode node, PointsToContext context) { diff --git a/python/ql/src/semmle/python/objects/TObject.qll b/python/ql/src/semmle/python/objects/TObject.qll index 3b2ef393716..5dbe08e8e7f 100644 --- a/python/ql/src/semmle/python/objects/TObject.qll +++ b/python/ql/src/semmle/python/objects/TObject.qll @@ -181,7 +181,7 @@ cached newtype TObject = or /* Varargs tuple */ TVarargsTuple(CallNode call, PointsToContext context, int offset, int length) { - InterProceduralPointsTo::varargs_tuple(call, _, context, _, offset, length) + InterProceduralPointsTo::varargs_tuple(call, context, _, _, offset, length) } or /* `type` */ diff --git a/python/ql/src/semmle/python/pointsto/PointsTo.qll b/python/ql/src/semmle/python/pointsto/PointsTo.qll index 3112d9ee68e..7b2aaf3f3d7 100644 --- a/python/ql/src/semmle/python/pointsto/PointsTo.qll +++ b/python/ql/src/semmle/python/pointsto/PointsTo.qll @@ -900,38 +900,46 @@ module InterProceduralPointsTo { /** Helper for parameter_points_to */ pragma [noinline] private predicate special_parameter_points_to(ParameterDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin) { + /* Runtime: Just an unknown tuple (or dict for `**` args) */ special_parameter_value(def, value) and context.isRuntime() and origin = def.getDefiningNode() or + /* A tuple constructed from positional arguments for a `*` parameter. */ exists(CallNode call, Function scope, PointsToContext caller, int offset, int length | - varargs_tuple(call, scope, caller, context, offset, length) and + varargs_tuple(call, caller, scope, context, offset, length) and value = TVarargsTuple(call, caller, offset, length) and def.getScope() = scope ) and origin = def.getDefiningNode() or + /* A `*` parameter with no surplus positional arguments; an empty tuple */ exists(Function scope | varargs_empty_tuple(scope, context) and - value.(BuiltinTupleObjectInternal).length() = 0 and + value = ObjectInternal::emptyTuple() and def.getScope() = scope ) and origin = def.getDefiningNode() } - predicate varargs_tuple(CallNode call, Function scope, PointsToContext caller, PointsToContext callee, int startOffset, int length) { + /** Holds if `call` in context `caller` calls into the function scope `func` in context `callee` and + * that the number of position arguments (including expansion of `*` argument) exceeds the number of positional arguments by + * `length` and that the excess arguments start at `start`. + */ + predicate varargs_tuple(CallNode call, PointsToContext caller, Function scope, PointsToContext callee, int start, int length) { exists(int parameter_offset | callsite_calls_function(call, caller, scope, callee, parameter_offset) and - startOffset = scope.getPositionalParameterCount() - parameter_offset and - length = positional_argument_count(call, caller) - startOffset and + start = scope.getPositionalParameterCount() - parameter_offset and + length = positional_argument_count(call, caller) - start and length > 0 ) } - predicate varargs_empty_tuple(Function scope, PointsToContext callee) { + /** Holds if for function scope `func` in context `callee` the `*` parameter will hold the empty tuple. */ + predicate varargs_empty_tuple(Function func, PointsToContext callee) { exists(CallNode call, PointsToContext caller, int parameter_offset | - callsite_calls_function(call, caller, scope, callee, parameter_offset) and - scope.getPositionalParameterCount() - parameter_offset >= positional_argument_count(call, caller) + callsite_calls_function(call, caller, func, callee, parameter_offset) and + func.getPositionalParameterCount() - parameter_offset >= positional_argument_count(call, caller) ) } @@ -942,17 +950,21 @@ module InterProceduralPointsTo { p.isKwargs() and value = TUnknownInstance(ObjectInternal::builtin("dict")) } - predicate positional_argument_points_to(CallNode call, int argument, PointsToContext caller, ObjectInternal value, ControlFlowNode origin) { - PointsToInternal::pointsTo(call.getArg(argument), caller, value, origin) + /** Holds if the `n`th argument in call `call` with context `caller` points-to `value` from `origin`, including values in tuples + * expanded by a `*` argument. For example, for the call `f('a', *(`x`,`y`))` the arguments are `('a', 'x', y')` + */ + predicate positional_argument_points_to(CallNode call, int n, PointsToContext caller, ObjectInternal value, ControlFlowNode origin) { + PointsToInternal::pointsTo(call.getArg(n), caller, value, origin) or exists(SequenceObjectInternal arg, int pos | pos = call.getNode().getPositionalArgumentCount() and PointsToInternal::pointsTo(origin, caller, arg, _) and - value = arg.getItem(argument-pos) and + value = arg.getItem(n-pos) and origin = call.getStarArg() ) } + /** Gets the number of positional arguments including values in tuples expanded by a `*` argument.*/ private int positional_argument_count(CallNode call, PointsToContext caller) { result = call.getNode().getPositionalArgumentCount() and not exists(call.getStarArg()) and caller.appliesTo(call) or @@ -963,6 +975,7 @@ module InterProceduralPointsTo { ) } + /** Holds if the parameter definition `def` points-to `value` from `origin` given the context `context` */ predicate positional_parameter_points_to(ParameterDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin) { exists(CallNode call, int argument, PointsToContext caller, Function func, int offset | positional_argument_points_to(call, argument, caller, value, origin) and @@ -971,6 +984,7 @@ module InterProceduralPointsTo { ) } + /** Holds if the named `argument` given the context `caller` is transferred to the parameter `param` with conntext `callee` by a call. */ cached predicate named_argument_transfer(ControlFlowNode argument, PointsToContext caller, ParameterDefinition param, PointsToContext callee) { exists(CallNode call, Function func, int offset | callsite_calls_function(call, caller, func, callee, offset) @@ -982,6 +996,9 @@ module InterProceduralPointsTo { ) } + /** Holds if the `call` with context `caller` calls the function `scope` in context `callee` + * and the offset from argument to parameter is `parameter_offset` + */ cached predicate callsite_calls_function(CallNode call, PointsToContext caller, Function scope, PointsToContext callee, int parameter_offset) { exists(ObjectInternal func | callWithContext(call, caller, func, callee) and From cec4d55b2ad5635af41341455cffddac790ec05c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 5 Jun 2019 11:10:38 +0100 Subject: [PATCH 4/5] Python points-to: Make sure that vararg tuples are only assigned to vararg parameters. --- python/ql/src/semmle/python/pointsto/PointsTo.qll | 2 ++ .../library-tests/PointsTo/new/PointsToWithContext.expected | 2 -- .../test/library-tests/PointsTo/new/PointsToWithType.expected | 1 - python/ql/test/library-tests/PointsTo/new/Values.expected | 2 -- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/python/ql/src/semmle/python/pointsto/PointsTo.qll b/python/ql/src/semmle/python/pointsto/PointsTo.qll index 7b2aaf3f3d7..235aaa0a0b6 100644 --- a/python/ql/src/semmle/python/pointsto/PointsTo.qll +++ b/python/ql/src/semmle/python/pointsto/PointsTo.qll @@ -906,6 +906,7 @@ module InterProceduralPointsTo { origin = def.getDefiningNode() or /* A tuple constructed from positional arguments for a `*` parameter. */ + def.isVarargs() and exists(CallNode call, Function scope, PointsToContext caller, int offset, int length | varargs_tuple(call, caller, scope, context, offset, length) and value = TVarargsTuple(call, caller, offset, length) and @@ -914,6 +915,7 @@ module InterProceduralPointsTo { origin = def.getDefiningNode() or /* A `*` parameter with no surplus positional arguments; an empty tuple */ + def.isVarargs() and exists(Function scope | varargs_empty_tuple(scope, context) and value = ObjectInternal::emptyTuple() and diff --git a/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected b/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected index 0dec2c5da97..b7cced59a58 100755 --- a/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected +++ b/python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected @@ -612,8 +612,6 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P | l_calls.py:33 | ControlFlowNode for m | Function m | builtin-class function | 33 | import | | l_calls.py:34 | ControlFlowNode for self | E() | class E | 38 | code/l_calls.py:38 from import | | l_calls.py:34 | ControlFlowNode for self | int 3 | builtin-class int | 39 | code/l_calls.py:39 from import | -| l_calls.py:34 | ControlFlowNode for self | self | builtin-class tuple | 33 | code/l_calls.py:38 from import | -| l_calls.py:34 | ControlFlowNode for self | self | builtin-class tuple | 33 | code/l_calls.py:39 from import | | l_calls.py:34 | ControlFlowNode for self | self | class E | 33 | runtime | | l_calls.py:35 | ControlFlowNode for args | args | builtin-class tuple | 33 | code/l_calls.py:38 from import | | l_calls.py:35 | ControlFlowNode for args | args | builtin-class tuple | 33 | code/l_calls.py:39 from import | diff --git a/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected b/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected index 2f681f63d03..131957da9fb 100644 --- a/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected +++ b/python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected @@ -687,7 +687,6 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P | l_calls.py:33 | ControlFlowNode for m | Function m | builtin-class function | 33 | | l_calls.py:34 | ControlFlowNode for self | E() | class E | 38 | | l_calls.py:34 | ControlFlowNode for self | int 3 | builtin-class int | 39 | -| l_calls.py:34 | ControlFlowNode for self | self | builtin-class tuple | 33 | | l_calls.py:34 | ControlFlowNode for self | self | class E | 33 | | l_calls.py:35 | ControlFlowNode for args | args | builtin-class tuple | 33 | | l_calls.py:37 | ControlFlowNode for IntegerLiteral | int 1 | builtin-class int | 37 | diff --git a/python/ql/test/library-tests/PointsTo/new/Values.expected b/python/ql/test/library-tests/PointsTo/new/Values.expected index f58be65ea7a..d89f6a4fdef 100644 --- a/python/ql/test/library-tests/PointsTo/new/Values.expected +++ b/python/ql/test/library-tests/PointsTo/new/Values.expected @@ -473,9 +473,7 @@ | l_calls.py:32 | ControlFlowNode for ClassExpr | import | class E | builtin-class type | | l_calls.py:32 | ControlFlowNode for object | import | builtin-class object | builtin-class type | | l_calls.py:33 | ControlFlowNode for FunctionExpr | import | Function E.m | builtin-class function | -| l_calls.py:34 | ControlFlowNode for self | code/l_calls.py:38 from import | (int 2, int 3, int 4, ) | builtin-class tuple | | l_calls.py:34 | ControlFlowNode for self | code/l_calls.py:38 from import | E() | class E | -| l_calls.py:34 | ControlFlowNode for self | code/l_calls.py:39 from import | (int 4, int 5, ) | builtin-class tuple | | l_calls.py:34 | ControlFlowNode for self | code/l_calls.py:39 from import | int 3 | builtin-class int | | l_calls.py:34 | ControlFlowNode for self | runtime | self instance of E | class E | | l_calls.py:35 | ControlFlowNode for args | code/l_calls.py:38 from import | (int 2, int 3, int 4, ) | builtin-class tuple | From 03d296a6176b9f9d10e218b4c66567ed40f7dfcd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 5 Jun 2019 17:54:09 +0100 Subject: [PATCH 5/5] Fix typo in qldoc. --- python/ql/src/semmle/python/pointsto/PointsTo.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/semmle/python/pointsto/PointsTo.qll b/python/ql/src/semmle/python/pointsto/PointsTo.qll index 235aaa0a0b6..771ada01956 100644 --- a/python/ql/src/semmle/python/pointsto/PointsTo.qll +++ b/python/ql/src/semmle/python/pointsto/PointsTo.qll @@ -924,7 +924,7 @@ module InterProceduralPointsTo { origin = def.getDefiningNode() } - /** Holds if `call` in context `caller` calls into the function scope `func` in context `callee` and + /** Holds if `call` in context `caller` calls into the function scope `scope` in context `callee` and * that the number of position arguments (including expansion of `*` argument) exceeds the number of positional arguments by * `length` and that the excess arguments start at `start`. */