Merge branch 'master' into python-modernise-statements

2025-12-19 10:23:15 +01:00 · 2020-03-10 14:53:44 +01:00
parent 2382b42bbe dd0ce1c607
commit b1d1974a0f
575 changed files with 18497 additions and 6785 deletions
--- a/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.ql
+++ b/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.ql
@@ -20,8 +20,8 @@ import Expressions.CallArgs

 from Call call, ClassObject cls, string name, FunctionObject init
 where
-    illegally_named_parameter(call, cls, name)
-    and init = get_function_or_initializer(cls)
+    illegally_named_parameter_objectapi(call, cls, name)
+    and init = get_function_or_initializer_objectapi(cls)
 select
    call, "Keyword argument '" + name + "' is not a supported parameter name of $@.", init, init.getQualifiedName()

--- a/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.ql
+++ b/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.ql
@@ -18,8 +18,8 @@ import Expressions.CallArgs
 from Call call, ClassObject cls, string too, string should, int limit, FunctionObject init
 where
 (
-    too_many_args(call, cls, limit) and too = "too many arguments" and should = "no more than "
+    too_many_args_objectapi(call, cls, limit) and too = "too many arguments" and should = "no more than "
    or
-    too_few_args(call, cls, limit) and too = "too few arguments" and should = "no fewer than "
-) and init = get_function_or_initializer(cls)
+    too_few_args_objectapi(call, cls, limit) and too = "too few arguments" and should = "no fewer than "
+) and init = get_function_or_initializer_objectapi(cls)
 select call, "Call to $@ with " + too + "; should be " + should + limit.toString() + ".", init, init.getQualifiedName()
--- a/python/ql/src/Exceptions/CatchingBaseException.ql
+++ b/python/ql/src/Exceptions/CatchingBaseException.ql
@@ -19,7 +19,7 @@ predicate doesnt_reraise(ExceptStmt ex) {
 }

 predicate catches_base_exception(ExceptStmt ex) {
-     ex.getType().refersTo(theBaseExceptionType())
+     ex.getType().pointsTo(ClassValue::baseException())
     or
     not exists(ex.getType())
 }
--- a/python/ql/src/Exceptions/EmptyExcept.ql
+++ b/python/ql/src/Exceptions/EmptyExcept.ql
@@ -31,7 +31,7 @@ predicate no_comment(ExceptStmt ex) {
 }

 predicate non_local_control_flow(ExceptStmt ex) {
-    ex.getType().refersTo(theStopIterationType())
+    ex.getType().pointsTo(ClassValue::stopIteration())
 }

 predicate try_has_normal_exit(Try try) {
@@ -64,32 +64,29 @@ predicate subscript(Stmt s) {
    s.(Delete).getATarget() instanceof Subscript
 }

-predicate encode_decode(Expr ex, ClassObject type) {
+predicate encode_decode(Call ex, ClassValue type) {
    exists(string name |
-        ex.(Call).getFunc().(Attribute).getName() = name |
-        name = "encode" and type = Object::builtin("UnicodeEncodeError")
+        ex.getFunc().(Attribute).getName() = name |
+        name = "encode" and type = ClassValue::unicodeEncodeError()
        or
-        name = "decode" and type = Object::builtin("UnicodeDecodeError")
+        name = "decode" and type = ClassValue::unicodeDecodeError()
    )
 }

-predicate small_handler(ExceptStmt ex, Stmt s, ClassObject type) {
+predicate small_handler(ExceptStmt ex, Stmt s, ClassValue type) {
    not exists(ex.getTry().getStmt(1)) and
    s = ex.getTry().getStmt(0) and
-    ex.getType().refersTo(type)
+    ex.getType().pointsTo(type)
 }

-/** Holds if this exception handler is sufficiently small in scope to not need a comment
- * as to what it is doing.
- */
 predicate focussed_handler(ExceptStmt ex) {
-    exists(Stmt s, ClassObject type |
+    exists(Stmt s, ClassValue type |
        small_handler(ex, s, type) |
-        subscript(s) and type.getAnImproperSuperType() = theLookupErrorType()
+        subscript(s) and type.getASuperType() = ClassValue::lookupError()
        or
-        attribute_access(s) and type = theAttributeErrorType()
+        attribute_access(s) and type = ClassValue::attributeError()
        or
-        s.(ExprStmt).getValue() instanceof Name and type = theNameErrorType()
+        s.(ExprStmt).getValue() instanceof Name and type = ClassValue::nameError()
        or
        encode_decode(s.(ExprStmt).getValue(), type)
    )
--- a/python/ql/src/Exceptions/IllegalRaise.ql
+++ b/python/ql/src/Exceptions/IllegalRaise.ql
@@ -15,7 +15,7 @@ import python
 import Raising
 import Exceptions.NotImplemented

-from Raise r, ClassObject t
-where type_or_typeof(r, t, _) and not t.isLegalExceptionType() and not t.failedInference() and not use_of_not_implemented_in_raise(r, _)
+from Raise r, ClassValue t
+where type_or_typeof(r, t, _) and not t.isLegalExceptionType() and not t.failedInference(_) and not use_of_not_implemented_in_raise(r, _)
 select r, "Illegal class '" + t.getName() + "' raised; will result in a TypeError being raised instead."

--- a/python/ql/src/Exceptions/NotImplemented.qll
+++ b/python/ql/src/Exceptions/NotImplemented.qll
@@ -3,7 +3,7 @@ import python

 /** Holds if `notimpl` refers to `NotImplemented` or `NotImplemented()` in the `raise` statement */
 predicate use_of_not_implemented_in_raise(Raise raise, Expr notimpl) {
-    notimpl.refersTo(Object::notImplemented()) and
+    notimpl.pointsTo(Value::named("NotImplemented")) and
    (
        notimpl = raise.getException() or
        notimpl = raise.getException().(Call).getFunc()
--- a/python/ql/src/Exceptions/Raising.qll
+++ b/python/ql/src/Exceptions/Raising.qll
@@ -1,14 +1,16 @@
 import python

 /** Whether the raise statement 'r' raises 'type' from origin 'orig' */ 
-predicate type_or_typeof(Raise r, ClassObject type, AstNode orig) {
+predicate type_or_typeof(Raise r, ClassValue type, AstNode orig) {
     exists(Expr exception |
        exception = r.getRaised() |
-        exception.refersTo(type, _, orig)
+        exception.pointsTo(type, orig)
        or
-        not exists(ClassObject exc_type | exception.refersTo(exc_type)) and
-        not type = theTypeType() and // First value is an unknown exception type
-        exception.refersTo(_, type, orig)
+        not exists(ClassValue exc_type | exception.pointsTo(exc_type)) and
+        not type = ClassValue::type() and // First value is an unknown exception type
+        exists(Value val | exception.pointsTo(val, orig) |
+          val.getClass() = type
+        )
    )

 }
--- a/python/ql/src/Exceptions/RaisingTuple.ql
+++ b/python/ql/src/Exceptions/RaisingTuple.ql
@@ -11,8 +11,9 @@

 import python

-from Raise r, AstNode origin
-where r.getException().refersTo(_, theTupleType(), origin) and
+from Raise r, Value v, AstNode origin
+where r.getException().pointsTo(v, origin) and
+v.getClass() = ClassValue::tuple() and
 major_version() = 2 /* Raising a tuple is a type error in Python 3, so is handled by the IllegalRaise query. */

 select r, "Raising $@ will result in the first element (recursively) being raised and all other elements being discarded.", origin, "a tuple"
--- a/python/ql/src/Expressions/CallArgs.qll
+++ b/python/ql/src/Expressions/CallArgs.qll
@@ -2,7 +2,7 @@ import python

 import Testing.Mox

-private int varargs_length(Call call) {
+private int varargs_length_objectapi(Call call) {
    not exists(call.getStarargs()) and result = 0
    or
    exists(TupleObject t |
@@ -13,67 +13,131 @@ private int varargs_length(Call call) {
    result = count(call.getStarargs().(List).getAnElt())
 }

+private int varargs_length(Call call) {
+    not exists(call.getStarargs()) and result = 0
+    or
+    exists(TupleValue t |
+        call.getStarargs().pointsTo(t) |
+        result = t.length()
+    )
+    or
+    result = count(call.getStarargs().(List).getAnElt())
+}
+
 /** Gets a keyword argument that is not a keyword-only parameter. */
-private Keyword not_keyword_only_arg(Call call, FunctionObject func) {
+private Keyword not_keyword_only_arg_objectapi(Call call, FunctionObject func) {
    func.getACall().getNode() = call and
    result = call.getAKeyword() and
    not func.getFunction().getAKeywordOnlyArg().getId() = result.getArg()
 }

+/** Gets a keyword argument that is not a keyword-only parameter. */
+private Keyword not_keyword_only_arg(Call call, FunctionValue func) {
+    func.getACall().getNode() = call and
+    result = call.getAKeyword() and
+    not func.getScope().getAKeywordOnlyArg().getId() = result.getArg()
+}
+
 /** Gets the count of arguments that are passed as positional parameters even if they
 *  are named in the call.
 *  This is the sum of the number of positional arguments, the number of elements in any explicit tuple passed as *arg
 *  plus the number of keyword arguments that do not match keyword-only arguments (if the function does not take **kwargs).
 */

-private int positional_arg_count_for_call(Call call, Object callable) {
-    call = get_a_call(callable).getNode() and
+private int positional_arg_count_for_call_objectapi(Call call, Object callable) {
+    call = get_a_call_objectapi(callable).getNode() and
    exists(int positional_keywords |
-      exists(FunctionObject func | func = get_function_or_initializer(callable) |
+      exists(FunctionObject func | func = get_function_or_initializer_objectapi(callable) |
          not func.getFunction().hasKwArg() and
-          positional_keywords = count(not_keyword_only_arg(call, func))
+          positional_keywords = count(not_keyword_only_arg_objectapi(call, func))
        or
          func.getFunction().hasKwArg() and positional_keywords = 0
      )
      |
-      result = count(call.getAnArg()) + varargs_length(call) + positional_keywords
+      result = count(call.getAnArg()) + varargs_length_objectapi(call) + positional_keywords
    )
 }

+/** Gets the count of arguments that are passed as positional parameters even if they
+ *  are named in the call.
+ *  This is the sum of the number of positional arguments, the number of elements in any explicit tuple passed as *arg
+ *  plus the number of keyword arguments that do not match keyword-only arguments (if the function does not take **kwargs).
+ */
+
+private int positional_arg_count_for_call(Call call, Value callable) {
+    call = get_a_call(callable).getNode() and
+    exists(int positional_keywords |
+      exists(FunctionValue func | func = get_function_or_initializer(callable) |
+          not func.getScope().hasKwArg() and
+          positional_keywords = count(not_keyword_only_arg(call, func))
+        or
+          func.getScope().hasKwArg() and positional_keywords = 0
+      )
+      |
+      result = count(call.getAnArg()) + varargs_length_objectapi(call) + positional_keywords
+    )
+}
+
+int arg_count_objectapi(Call call) {
+    result = count(call.getAnArg()) + varargs_length_objectapi(call) + count(call.getAKeyword())
+}
+
 int arg_count(Call call) {
    result = count(call.getAnArg()) + varargs_length(call) + count(call.getAKeyword())
 }

 /* Gets a call corresponding to the given class or function*/
-private ControlFlowNode get_a_call(Object callable) {
+private ControlFlowNode get_a_call_objectapi(Object callable) {
  result = callable.(ClassObject).getACall()
  or
  result = callable.(FunctionObject).getACall()
 }

+/* Gets a call corresponding to the given class or function*/
+private ControlFlowNode get_a_call(Value callable) {
+  result = callable.(ClassValue).getACall()
+  or
+  result = callable.(FunctionValue).getACall()
+}
+
 /* Gets the function object corresponding to the given class or function*/
-FunctionObject get_function_or_initializer(Object func_or_cls) {
+FunctionObject get_function_or_initializer_objectapi(Object func_or_cls) {
  result = func_or_cls.(FunctionObject)
  or
  result = func_or_cls.(ClassObject).declaredAttribute("__init__")
 }

+/* Gets the function object corresponding to the given class or function*/
+FunctionValue get_function_or_initializer(Value func_or_cls) {
+  result = func_or_cls.(FunctionValue)
+  or
+  result = func_or_cls.(ClassValue).declaredAttribute("__init__")
+}
+

 /**Whether there is an illegally named parameter called `name` in the `call` to `func` */
-predicate illegally_named_parameter(Call call, Object func, string name) {
+predicate illegally_named_parameter_objectapi(Call call, Object func, string name) {
    not func.isC() and
    name = call.getANamedArgumentName() and
+    call.getAFlowNode() = get_a_call_objectapi(func) and
+    not get_function_or_initializer_objectapi(func).isLegalArgumentName(name)
+}
+
+/**Whether there is an illegally named parameter called `name` in the `call` to `func` */
+predicate illegally_named_parameter(Call call, Value func, string name) {
+    not func.isBuiltin() and
+    name = call.getANamedArgumentName() and
    call.getAFlowNode() = get_a_call(func) and
    not get_function_or_initializer(func).isLegalArgumentName(name)
 }

 /**Whether there are too few arguments in the `call` to `callable` where `limit` is the lowest number of legal arguments */
-predicate too_few_args(Call call, Object callable, int limit) {
+predicate too_few_args_objectapi(Call call, Object callable, int limit) {
    // Exclude cases where an incorrect name is used as that is covered by 'Wrong name for an argument in a call'
-    not illegally_named_parameter(call, callable, _) and
+    not illegally_named_parameter_objectapi(call, callable, _) and
    not exists(call.getStarargs()) and not exists(call.getKwargs()) and
-    arg_count(call) < limit and
-    exists(FunctionObject func | func = get_function_or_initializer(callable) |
+    arg_count_objectapi(call) < limit and
+    exists(FunctionObject func | func = get_function_or_initializer_objectapi(callable) |
      call = func.getAFunctionCall().getNode() and limit = func.minParameters() and
      /* The combination of misuse of `mox.Mox().StubOutWithMock()`
       * and a bug in mox's implementation of methods results in having to
@@ -84,16 +148,37 @@ predicate too_few_args(Call call, Object callable, int limit) {
      call = func.getAMethodCall().getNode() and limit = func.minParameters() - 1
      or
      callable instanceof ClassObject and
+      call.getAFlowNode() = get_a_call_objectapi(callable) and limit = func.minParameters() - 1
+    )
+}
+
+/**Whether there are too few arguments in the `call` to `callable` where `limit` is the lowest number of legal arguments */
+predicate too_few_args(Call call, Value callable, int limit) {
+    // Exclude cases where an incorrect name is used as that is covered by 'Wrong name for an argument in a call'
+    not illegally_named_parameter(call, callable, _) and
+    not exists(call.getStarargs()) and not exists(call.getKwargs()) and
+    arg_count(call) < limit and
+    exists(FunctionValue func | func = get_function_or_initializer(callable) |
+      call = func.getACall().getNode() and limit = func.minParameters() and
+      /* The combination of misuse of `mox.Mox().StubOutWithMock()`
+       * and a bug in mox's implementation of methods results in having to
+       * pass 1 too few arguments to the mocked function.
+       */
+      not (useOfMoxInModule(call.getEnclosingModule()) and func.isNormalMethod())
+      or
+      call = func.getACall().getNode() and limit = func.minParameters() - 1
+      or
+      callable instanceof ClassValue and
      call.getAFlowNode() = get_a_call(callable) and limit = func.minParameters() - 1
    )
 }

 /**Whether there are too many arguments in the `call` to `func` where `limit` is the highest number of legal arguments */
-predicate too_many_args(Call call, Object callable, int limit) {
+predicate too_many_args_objectapi(Call call, Object callable, int limit) {
    // Exclude cases where an incorrect name is used as that is covered by 'Wrong name for an argument in a call'
-    not illegally_named_parameter(call, callable, _) and
+    not illegally_named_parameter_objectapi(call, callable, _) and
    exists(FunctionObject func | 
-      func = get_function_or_initializer(callable) and
+      func = get_function_or_initializer_objectapi(callable) and
      not func.getFunction().hasVarArg() and limit >= 0 
      |
        call = func.getAFunctionCall().getNode() and limit = func.maxParameters()
@@ -101,13 +186,38 @@ predicate too_many_args(Call call, Object callable, int limit) {
        call = func.getAMethodCall().getNode() and limit = func.maxParameters() - 1
      or
        callable instanceof ClassObject and
+        call.getAFlowNode() = get_a_call_objectapi(callable) and limit = func.maxParameters() - 1
+    ) and
+    positional_arg_count_for_call_objectapi(call, callable) > limit
+}
+
+/**Whether there are too many arguments in the `call` to `func` where `limit` is the highest number of legal arguments */
+predicate too_many_args(Call call, Value callable, int limit) {
+    // Exclude cases where an incorrect name is used as that is covered by 'Wrong name for an argument in a call'
+    not illegally_named_parameter(call, callable, _) and
+    exists(FunctionValue func | 
+      func = get_function_or_initializer(callable) and
+      not func.getScope().hasVarArg() and limit >= 0 
+      |
+        call = func.getACall().getNode() and limit = func.maxParameters()
+      or
+        call = func.getACall().getNode() and limit = func.maxParameters() - 1
+      or
+        callable instanceof ClassValue and
        call.getAFlowNode() = get_a_call(callable) and limit = func.maxParameters() - 1
    ) and
    positional_arg_count_for_call(call, callable) > limit
 }

 /** Holds if `call` has too many or too few arguments for `func` */
-predicate wrong_args(Call call, FunctionObject func, int limit, string too) {
+predicate wrong_args_objectapi(Call call, FunctionObject func, int limit, string too) {
+    too_few_args_objectapi(call, func, limit) and too = "too few"
+    or
+    too_many_args_objectapi(call, func, limit) and too = "too many"
+}
+
+/** Holds if `call` has too many or too few arguments for `func` */
+predicate wrong_args(Call call, FunctionValue func, int limit, string too) {
    too_few_args(call, func, limit) and too = "too few"
    or
    too_many_args(call, func, limit) and too = "too many"
@@ -117,14 +227,31 @@ predicate wrong_args(Call call, FunctionObject func, int limit, string too) {
 * Implies nothing about whether `call` could call `func`.
 */
 bindingset[call, func]
-predicate correct_args_if_called_as_method(Call call, FunctionObject func) {
+predicate correct_args_if_called_as_method_objectapi(Call call, FunctionObject func) {
+    arg_count_objectapi(call)+1 >= func.minParameters()
+    and
+    arg_count_objectapi(call) < func.maxParameters()
+}
+
+/** Holds if `call` has correct number of arguments for `func`.
+ * Implies nothing about whether `call` could call `func`.
+ */
+ bindingset[call, func]
+predicate correct_args_if_called_as_method(Call call, FunctionValue func) {
    arg_count(call)+1 >= func.minParameters()
    and
    arg_count(call) < func.maxParameters()
 }

 /** Holds if `call` is a call to `overriding`, which overrides `func`. */
-predicate overridden_call(FunctionObject func, FunctionObject overriding, Call call)  {
+predicate overridden_call_objectapi(FunctionObject func, FunctionObject overriding, Call call)  {
    overriding.overrides(func) and
    overriding.getACall().getNode() = call
 }
+
+/** Holds if `call` is a call to `overriding`, which overrides `func`. */
+predicate overridden_call(FunctionValue func, FunctionValue overriding, Call call)  {
+    overriding.overrides(func) and
+    overriding.getACall().getNode() = call
+}
+
--- a/python/ql/src/Expressions/CallToSuperWrongClass.ql
+++ b/python/ql/src/Expressions/CallToSuperWrongClass.ql
@@ -17,13 +17,13 @@ import python
 from CallNode call_to_super, string name
 where
 exists(GlobalVariable gv, ControlFlowNode cn |
-    call_to_super = theSuperType().getACall() and
+    call_to_super = ClassValue::super_().getACall() and
    gv.getId() = "super" and
    cn = call_to_super.getArg(0) and
    name = call_to_super.getScope().getScope().(Class).getName() and
-    exists(ClassObject other | 
-        cn.refersTo(other) and
-        not other.getPyClass().getName() = name
+    exists(ClassValue other | 
+        cn.pointsTo(other) and
+        not other.getScope().getName() = name
    )
 )
 select call_to_super.getNode(), "First argument to super() should be " + name + "."
--- a/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll
+++ b/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll
@@ -107,10 +107,10 @@ private predicate brace_pair(PossibleAdvancedFormatString fmt, int start, int en
 private predicate advanced_format_call(Call format_expr, PossibleAdvancedFormatString fmt, int args) {
    exists(CallNode call | 
        call = format_expr.getAFlowNode() |
-        call.getFunction().refersTo(Object::builtin("format")) and call.getArg(0).refersTo(_, fmt.getAFlowNode()) and
+        call.getFunction().pointsTo(Value::named("format")) and call.getArg(0).pointsTo(_, fmt.getAFlowNode()) and
        args = count(format_expr.getAnArg()) - 1
        or
-        call.getFunction().(AttrNode).getObject("format").refersTo(_, fmt.getAFlowNode()) and
+        call.getFunction().(AttrNode).getObject("format").pointsTo(_, fmt.getAFlowNode()) and
        args = count(format_expr.getAnArg())
    )
 }
@@ -139,4 +139,3 @@ class AdvancedFormattingCall extends Call {
    }

 }
-
--- a/python/ql/src/Expressions/HashedButNoHash.ql
+++ b/python/ql/src/Expressions/HashedButNoHash.ql
@@ -69,7 +69,7 @@ predicate is_unhashable(ControlFlowNode f, ClassValue cls, ControlFlowNode origi
 predicate typeerror_is_caught(ControlFlowNode f) {
    exists (Try try |
        try.getBody().contains(f.getNode()) and
-        try.getAHandler().getType().refersTo(theTypeErrorType()))
+        try.getAHandler().getType().pointsTo(ClassValue::typeError()))
 }

 from ControlFlowNode f, ClassValue c, ControlFlowNode origin
--- a/python/ql/src/Expressions/IsComparisons.qll
+++ b/python/ql/src/Expressions/IsComparisons.qll
@@ -28,14 +28,14 @@ predicate probablySingleton(ClassValue cls) {

 predicate invalid_to_use_is_portably(ClassValue c) {
    overrides_eq_or_cmp(c) and
-    /* Exclude type/builtin-function/bool as it is legitimate to compare them using 'is' but they implement __eq__ */
+    // Exclude type/builtin-function/bool as it is legitimate to compare them using 'is' but they implement __eq__
    not c = Value::named("type") and not c = ClassValue::builtinFunction() and not c = Value::named("bool") and
-    /* OK to compare with 'is' if a singleton */
+    // OK to compare with 'is' if a singleton
    not probablySingleton(c)
 }

 predicate simple_constant(ControlFlowNode f) {
-    exists(Object obj | f.refersTo(obj) |  obj = theTrueObject() or obj = theFalseObject() or obj = theNoneObject())
+    exists(Value val | f.pointsTo(val) |  val = Value::named("True") or val = Value::named("False") or val = Value::named("None"))
 }

 private predicate cpython_interned_value(Expr e) {
@@ -66,14 +66,14 @@ private predicate universally_interned_value(Expr e) {

 predicate cpython_interned_constant(Expr e) {
    exists(Expr const | 
-        e.refersTo(_, const) | 
+        e.pointsTo(_, const) | 
        cpython_interned_value(const)
    )
 }

 predicate universally_interned_constant(Expr e) {
    exists(Expr const | 
-        e.refersTo(_, const) | 
+        e.pointsTo(_, const) | 
        universally_interned_value(const)
    )
 }
@@ -95,7 +95,7 @@ private predicate comparison_one_type(Compare comp, Cmpop op, ClassValue cls) {
 }

 predicate invalid_portable_is_comparison(Compare comp, Cmpop op, ClassValue cls) {
-    /* OK to use 'is' when defining '__eq__' */
+    // OK to use 'is' when defining '__eq__'
    not exists(Function eq | eq.getName() = "__eq__" or eq.getName() = "__ne__" | eq = comp.getScope().getScope*())
    and
    (
@@ -107,24 +107,24 @@ predicate invalid_portable_is_comparison(Compare comp, Cmpop op, ClassValue cls)
        )
    )
    and
-    /* OK to use 'is' when comparing items from a known set of objects */
-    not exists(Expr left, Expr right, Object obj |
+    // OK to use 'is' when comparing items from a known set of objects
+    not exists(Expr left, Expr right, Value val |
        comp.compares(left, op, right) and
-        exists(ImmutableLiteral il | il.getLiteralObject() = obj) |
-        left.refersTo(obj) and right.refersTo(obj)
+        exists(ImmutableLiteral il | il.getLiteralValue() = val) |
+        left.pointsTo(val) and right.pointsTo(val)
        or
-        /* Simple constant in module, probably some sort of sentinel */
+        // Simple constant in module, probably some sort of sentinel
        exists(AstNode origin |
-            not left.refersTo(_) and right.refersTo(obj, origin) and
+            not left.pointsTo(_) and right.pointsTo(val, origin) and
            origin.getScope().getEnclosingModule() = comp.getScope().getEnclosingModule()
        )
    )
    and
-    /* OK to use 'is' when comparing with a member of an enum */
+    // OK to use 'is' when comparing with a member of an enum
    not exists(Expr left, Expr right, AstNode origin |
        comp.compares(left, op, right) and
        enum_member(origin) |
-        left.refersTo(_, origin) or right.refersTo(_, origin)
+        left.pointsTo(_, origin) or right.pointsTo(_, origin)
    )
 }

@@ -135,4 +135,3 @@ private predicate enum_member(AstNode obj) {
        asgn.getValue() = obj
    )
 }
-
--- a/python/ql/src/Expressions/TruncatedDivision.ql
+++ b/python/ql/src/Expressions/TruncatedDivision.ql
@@ -18,19 +18,20 @@ where
    // Only relevant for Python 2, as all later versions implement true division
    major_version() = 2
    and
-    exists(BinaryExprNode bin, Object lobj, Object robj |
+    exists(BinaryExprNode bin, Value lval, Value rval |
        bin = div.getAFlowNode()
        and bin.getNode().getOp() instanceof Div
-        and bin.getLeft().refersTo(lobj, theIntType(), left)
-        and bin.getRight().refersTo(robj, theIntType(), right)
+        and bin.getLeft().pointsTo(lval, left)
+        and lval.getClass() = ClassValue::int_()
+        and bin.getRight().pointsTo(rval, right)
+        and rval.getClass() = ClassValue::int_()
        // Ignore instances where integer division leaves no remainder
-        and not lobj.(NumericObject).intValue() % robj.(NumericObject).intValue() = 0
+        and not lval.(NumericValue).getIntValue() % rval.(NumericValue).getIntValue() = 0
        and not bin.getNode().getEnclosingModule().hasFromFuture("division")
        // Filter out results wrapped in `int(...)`
-        and not exists(CallNode c, ClassObject cls |
-            c.getAnArg() = bin
-            and c.getFunction().refersTo(cls)
-            and cls.getName() = "int"
+        and not exists(CallNode c |
+            c = ClassValue::int_().getACall()
+            and c.getAnArg() = bin
        )
    )
 select div, "Result of division may be truncated as its $@ and $@ arguments may both be integers.",
--- a/python/ql/src/Expressions/UnnecessaryLambda.ql
+++ b/python/ql/src/Expressions/UnnecessaryLambda.ql
@@ -38,14 +38,14 @@ predicate unnecessary_lambda(Lambda l, Expr e) {
    simple_wrapper(l, e) and 
    (
      /* plain class */
-      exists(ClassObject c | e.refersTo(c))
+      exists(ClassValue c | e.pointsTo(c))
      or
      /* plain function */
-      exists(FunctionObject f | e.refersTo(f))
+      exists(FunctionValue f | e.pointsTo(f))
      or
      /* bound-method of enclosing instance */
-      exists(ClassObject cls, Attribute a | 
-          cls.getPyClass() = l.getScope().getScope() and a = e |
+      exists(ClassValue cls, Attribute a | 
+          cls.getScope() = l.getScope().getScope() and a = e |
          ((Name)a.getObject()).getId() = "self" and
          cls.hasAttribute(a.getName())
      )
--- a/python/ql/src/Expressions/UseofApply.ql
+++ b/python/ql/src/Expressions/UseofApply.ql
@@ -10,8 +10,8 @@
 */

 import python
+private import semmle.python.types.Builtins

 from CallNode call, ControlFlowNode func
-where
-major_version() = 2 and call.getFunction() = func and func.refersTo(Object::builtin("apply"))
+where major_version() = 2 and call.getFunction() = func and func.pointsTo(Value::named("apply"))
 select call, "Call to the obsolete builtin function 'apply'."
--- a/python/ql/src/Expressions/UseofInput.qhelp
+++ b/python/ql/src/Expressions/UseofInput.qhelp
@@ -3,20 +3,20 @@
  "qhelp.dtd">
 <qhelp>
 <overview>
-<p>A call to the input() function, <code>input(prompt)</code> is equivalent to <code>eval(raw_input(prompt))</code>. Evaluating user input without any checking can be a serious security flaw.</p>
+<p>In Python 2, a call to the <code>input()</code> function, <code>input(prompt)</code> is equivalent to <code>eval(raw_input(prompt))</code>. Evaluating user input without any checking can be a serious security flaw.</p>

 </overview>
 <recommendation>

-<p> Get user input with <code>raw_input(prompt)</code> and then validate that input before evaluating. If the expected input is a number or
+<p>Get user input with <code>raw_input(prompt)</code> and then validate that input before evaluating. If the expected input is a number or
 string, then <code>ast.literal_eval()</code> can always be used safely.</p>


 </recommendation>
 <references>

-  <li>Python Standard Library: <a href="http://docs.python.org/library/functions.html#input">input</a>,
-  <a href="http://docs.python.org/library/ast.html#ast.literal_eval">ast.literal_eval</a>.</li>
+  <li>Python Standard Library: <a href="http://docs.python.org/2/library/functions.html#input">input</a>,
+  <a href="http://docs.python.org/2/library/ast.html#ast.literal_eval">ast.literal_eval</a>.</li>
  <li>Wikipedia: <a href="http://en.wikipedia.org/wiki/Data_validation">Data validation</a>.</li>

 </references>
--- a/python/ql/src/Expressions/UseofInput.ql
+++ b/python/ql/src/Expressions/UseofInput.ql
@@ -1,6 +1,6 @@
 /**
- * @name 'input' function used
- * @description The built-in function 'input' is used which can allow arbitrary code to be run.
+ * @name 'input' function used in Python 2
+ * @description The built-in function 'input' is used which, in Python 2, can allow arbitrary code to be run.
 * @kind problem
 * @tags security
 *       correctness
@@ -18,4 +18,4 @@ where
    call.getFunction() = func and
    func.pointsTo(context, Value::named("input"), _) and
    not func.pointsTo(context, Value::named("raw_input"), _)
-select call, "The unsafe built-in function 'input' is used."
+select call, "The unsafe built-in function 'input' is used in Python 2."
--- a/python/ql/src/Expressions/WrongNameForArgumentInCall.ql
+++ b/python/ql/src/Expressions/WrongNameForArgumentInCall.ql
@@ -19,7 +19,7 @@ import Expressions.CallArgs

 from Call call, FunctionObject func, string name
 where
-illegally_named_parameter(call, func, name) and
+illegally_named_parameter_objectapi(call, func, name) and
 not func.isAbstract() and
 not exists(FunctionObject overridden | func.overrides(overridden) and overridden.getFunction().getAnArg().(Name).getId() = name)
 select
--- a/python/ql/src/Expressions/WrongNumberArgumentsInCall.ql
+++ b/python/ql/src/Expressions/WrongNumberArgumentsInCall.ql
@@ -17,12 +17,12 @@ import CallArgs
 from Call call, FunctionObject func, string too, string should, int limit
 where
 (
-    too_many_args(call, func, limit) and too = "too many arguments" and should = "no more than "
+    too_many_args_objectapi(call, func, limit) and too = "too many arguments" and should = "no more than "
    or
-    too_few_args(call, func, limit) and too = "too few arguments" and should = "no fewer than "
+    too_few_args_objectapi(call, func, limit) and too = "too few arguments" and should = "no fewer than "
 ) and
 not func.isAbstract() and
-not exists(FunctionObject overridden | func.overrides(overridden) and correct_args_if_called_as_method(call, overridden))
+not exists(FunctionObject overridden | func.overrides(overridden) and correct_args_if_called_as_method_objectapi(call, overridden))
 /* The semantics of `__new__` can be a bit subtle, so we simply exclude `__new__` methods */
 and not func.getName() = "__new__"

--- a/python/ql/src/Functions/IncorrectlyOverriddenMethod.ql
+++ b/python/ql/src/Functions/IncorrectlyOverriddenMethod.ql
@@ -15,10 +15,10 @@ import Expressions.CallArgs
 from Call call, FunctionObject func, FunctionObject overridden, string problem
 where
 func.overrides(overridden) and (
-    wrong_args(call, func, _, problem) and correct_args_if_called_as_method(call, overridden)
+    wrong_args_objectapi(call, func, _, problem) and correct_args_if_called_as_method_objectapi(call, overridden)
    or
    exists(string name | 
-        illegally_named_parameter(call, func, name) and problem = "an argument named '" + name + "'" and
+        illegally_named_parameter_objectapi(call, func, name) and problem = "an argument named '" + name + "'" and
        overridden.getFunction().getAnArg().(Name).getId() = name
    )
 )
--- a/python/ql/src/Functions/IncorrectlySpecifiedOverriddenMethod.ql
+++ b/python/ql/src/Functions/IncorrectlySpecifiedOverriddenMethod.ql
@@ -18,11 +18,11 @@ where
 not func.getName() = "__init__" and
 overriding.overrides(func) and
 call = overriding.getAMethodCall().getNode() and
-correct_args_if_called_as_method(call, overriding) and
+correct_args_if_called_as_method_objectapi(call, overriding) and
 (
-    arg_count(call)+1 < func.minParameters() and problem = "too few arguments"
+    arg_count_objectapi(call)+1 < func.minParameters() and problem = "too few arguments"
    or
-    arg_count(call) >= func.maxParameters() and problem = "too many arguments"
+    arg_count_objectapi(call) >= func.maxParameters() and problem = "too many arguments"
    or
    exists(string name | call.getAKeyword().getArg() = name and 
        overriding.getFunction().getAnArg().(Name).getId() = name and
--- a/python/ql/src/Functions/UseImplicitNoneReturnValue.ql
+++ b/python/ql/src/Functions/UseImplicitNoneReturnValue.ql
@@ -23,12 +23,12 @@ predicate is_used(Call c) {
    )
 }

-from Call c, FunctionObject func
+from Call c, FunctionValue func
 where 
 /* Call result is used, but callee is a procedure */
-is_used(c) and c.getFunc().refersTo(func) and func.getFunction().isProcedure() and
+is_used(c) and c.getFunc().pointsTo(func) and func.getScope().isProcedure() and
 /* All callees are procedures */
-forall(FunctionObject callee | c.getFunc().refersTo(callee) | callee.getFunction().isProcedure()) and
+forall(FunctionValue callee | c.getFunc().pointsTo(callee) | callee.getScope().isProcedure()) and
 /* Mox return objects have an `AndReturn` method */
 not useOfMoxInModule(c.getEnclosingModule())
 select c, "The result of '$@' is used even though it is always None.", func, func.getQualifiedName()
--- a/python/ql/src/experimental/README.md
+++ b/python/ql/src/experimental/README.md
@@ -0,0 +1 @@
+This directory contains [experimental](../../../../docs/experimental.md) CodeQL queries and libraries.
--- a/python/ql/src/semmle/python/AstGenerated.qll
+++ b/python/ql/src/semmle/python/AstGenerated.qll
@@ -1,3 +1,8 @@
+/*
+ *      This library file is auto-generated by 'semmle/query_gen.py'.
+ *      WARNING: Any modifications to this file will be lost.
+ *      Relations can be changed by modifying master.py.
+ */
 import python

 library class Add_ extends @py_Add, Operator {
@@ -1781,6 +1786,37 @@ library class Slice_ extends @py_Slice, Expr {

 }

+library class SpecialOperation_ extends @py_SpecialOperation, Expr {
+
+
+    /** Gets the name of this special operation. */
+    string getName() {
+        py_strs(result, this, 2)
+    }
+
+
+    /** Gets the arguments of this special operation. */
+    ExprList getArguments() {
+        py_expr_lists(result, this, 3)
+    }
+
+
+    /** Gets the nth argument of this special operation. */
+    Expr getArgument(int index) {
+        result = this.getArguments().getItem(index)
+    }
+
+    /** Gets an argument of this special operation. */
+    Expr getAnArgument() {
+        result = this.getArguments().getAnItem()
+    }
+
+    override string toString() {
+        result = "SpecialOperation"
+    }
+
+}
+
 library class Starred_ extends @py_Starred, Expr {


--- a/python/ql/src/semmle/python/Flow.qll
+++ b/python/ql/src/semmle/python/Flow.qll
@@ -910,6 +910,10 @@ private AstNode assigned_value(Expr lhs) {

 predicate nested_sequence_assign(Expr left_parent, Expr right_parent,
        Expr left_result, Expr right_result) {
+    exists(Assign a |
+        a.getATarget().getASubExpression*() = left_parent and
+        a.getValue().getASubExpression*() = right_parent
+    ) and
    exists(int i, Expr left_elem, Expr right_elem
    |
        (
--- a/python/ql/src/semmle/python/dataflow/Implementation.qll
+++ b/python/ql/src/semmle/python/dataflow/Implementation.qll
@@ -726,6 +726,8 @@ private class EssaTaintTracking extends string {
    private TaintKind iterable_unpacking_descent(
        SequenceNode left_parent, ControlFlowNode left_defn, CollectionKind parent_kind
    ) {
+        //TODO: Fix the cartesian product in this predicate
+        none() and
        left_parent.getAnElement() = left_defn and
        // Handle `a, *b = some_iterable`
        if left_defn instanceof StarredNode
--- a/python/ql/src/semmle/python/objects/ObjectAPI.qll
+++ b/python/ql/src/semmle/python/objects/ObjectAPI.qll
@@ -532,6 +532,15 @@ class ClassValue extends Value {
        this.(ClassObjectInternal).getClassDeclaration().declaresAttribute(name)
    }

+    /** Whether this class is a legal exception class.
+     *  What constitutes a legal exception class differs between major versions */
+    predicate isLegalExceptionType() {
+        not this.isNewStyle() or
+        this.getASuperType() = ClassValue::baseException()
+        or
+        major_version() = 2 and this = ClassValue::tuple()
+    }
+
 }


@@ -555,6 +564,28 @@ abstract class FunctionValue extends CallableValue {
    predicate isOverriddenMethod() {
        exists(Value f | f.overrides(this))
    }
+
+    /** Whether `name` is a legal argument name for this function */
+    bindingset[name]
+    predicate isLegalArgumentName(string name) {
+        this.getScope().getAnArg().asName().getId() = name
+        or
+        this.getScope().getAKeywordOnlyArg().getId() = name
+        or
+        this.getScope().hasKwArg()
+    }
+
+    /** Whether this is a "normal" method, that is, it is exists as a class attribute
+     *  which is not a lambda and not the __new__ method. */
+    predicate isNormalMethod() {
+        exists(ClassValue cls, string name |
+            cls.declaredAttribute(name) = this and
+            name != "__new__" and
+            exists(Expr expr, AstNode origin | expr.pointsTo(this, origin) |
+              not origin instanceof Lambda
+            )
+        )
+    }
 }

 /** Class representing Python functions */
@@ -917,7 +948,7 @@ module ClassValue {

    /** Get the `ClassValue` for the `StopIteration` class. */
    ClassValue stopIteration() {
-        result = TBuiltinClassObject(Builtin::special("StopIteration"))
+        result = TBuiltinClassObject(Builtin::builtin("StopIteration"))
    }

    /** Get the `ClassValue` for the class of modules. */
@@ -960,6 +991,11 @@ module ClassValue {
        result = TBuiltinClassObject(Builtin::builtin("KeyError"))
    }

+    /** Get the `ClassValue` for the `LookupError` class. */
+    ClassValue lookupError() {
+        result = TBuiltinClassObject(Builtin::builtin("LookupError"))
+    }
+
    /** Get the `ClassValue` for the `IOError` class. */
    ClassValue ioError() {
        result = TBuiltinClassObject(Builtin::builtin("IOError"))
@@ -975,4 +1011,14 @@ module ClassValue {
        result = TBuiltinClassObject(Builtin::builtin("ImportError"))
    }

+    /** Get the `ClassValue` for the `UnicodeEncodeError` class. */
+    ClassValue unicodeEncodeError() {
+        result = TBuiltinClassObject(Builtin::builtin("UnicodeEncodeError"))
+    }
+
+    /** Get the `ClassValue` for the `UnicodeDecodeError` class. */
+    ClassValue unicodeDecodeError() {
+        result = TBuiltinClassObject(Builtin::builtin("UnicodeDecodeError"))
+    }
+
 }
--- a/python/ql/src/semmle/python/regex.qll
+++ b/python/ql/src/semmle/python/regex.qll
@@ -12,25 +12,27 @@ private predicate re_module_function(string name, int flags) {
    name = "subn" and flags = 4
 }

+/**
+ * Holds if `s` is used as a regex with the `re` module, with the regex-mode `mode` (if known).
+ * If regex mode is not known, `mode` will be `"None"`.
+ */
 predicate used_as_regex(Expr s, string mode) {
    (s instanceof Bytes or s instanceof Unicode)
    and
-    exists(ModuleValue re | re.getName() = "re" |
-        /* Call to re.xxx(regex, ... [mode]) */
-        exists(CallNode call, string name |
-            call.getArg(0).refersTo(_, _, s.getAFlowNode()) and
-            call.getFunction().pointsTo(re.attr(name)) |
-            mode = "None"
-            or
-            exists(Value obj |
-                mode = mode_from_mode_object(obj) |
-                exists(int flags_arg |
-                    re_module_function(name, flags_arg) and
-                    call.getArg(flags_arg).pointsTo(obj)
-                )
-                or
-                call.getArgByName("flags").pointsTo(obj)
+    /* Call to re.xxx(regex, ... [mode]) */
+    exists(CallNode call, string name |
+        call.getArg(0).refersTo(_, _, s.getAFlowNode()) and
+        call.getFunction().pointsTo(Module::named("re").attr(name)) |
+        mode = "None"
+        or
+        exists(Value obj |
+            mode = mode_from_mode_object(obj) |
+            exists(int flags_arg |
+                re_module_function(name, flags_arg) and
+                call.getArg(flags_arg).pointsTo(obj)
            )
+            or
+            call.getArgByName("flags").pointsTo(obj)
        )
    )
 }
--- a/python/ql/src/semmle/python/security/strings/External.qll
+++ b/python/ql/src/semmle/python/security/strings/External.qll
@@ -2,45 +2,40 @@ import python
 import Basic
 private import Common

-/** An extensible kind of taint representing an externally controlled string.
+/**
+ * An extensible kind of taint representing an externally controlled string.
 */
 abstract class ExternalStringKind extends StringKind {
-
    bindingset[this]
-    ExternalStringKind() {
-        this = this
-    }
+    ExternalStringKind() { this = this }

    override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
        result = StringKind.super.getTaintForFlowStep(fromnode, tonode)
        or
-        tonode.(SequenceNode).getElement(_) = fromnode and result.(ExternalStringSequenceKind).getItem() = this
+        tonode.(SequenceNode).getElement(_) = fromnode and
+        result.(ExternalStringSequenceKind).getItem() = this
        or
        json_load(fromnode, tonode) and result.(ExternalJsonKind).getValue() = this
        or
        tonode.(DictNode).getAValue() = fromnode and result.(ExternalStringDictKind).getValue() = this
+        or
+        urlsplit(fromnode, tonode) and result.(ExternalUrlSplitResult).getItem() = this
+        or
+        urlparse(fromnode, tonode) and result.(ExternalUrlParseResult).getItem() = this
    }
-
 }

 /** A kind of "taint", representing a sequence, with a "taint" member */
 class ExternalStringSequenceKind extends SequenceKind {
-
-    ExternalStringSequenceKind() {
-        this.getItem() instanceof ExternalStringKind
-    }
-
+    ExternalStringSequenceKind() { this.getItem() instanceof ExternalStringKind }
 }

-/** An hierachical dictionary or list where the entire structure is externally controlled
+/**
+ * An hierachical dictionary or list where the entire structure is externally controlled
 * This is typically a parsed JSON object.
 */
 class ExternalJsonKind extends TaintKind {
-
-    ExternalJsonKind() {
-        this = "json[" + any(ExternalStringKind key) + "]"
-    }
-
+    ExternalJsonKind() { this = "json[" + any(ExternalStringKind key) + "]" }

    /** Gets the taint kind for item in this sequence */
    TaintKind getValue() {
@@ -54,65 +49,225 @@ class ExternalJsonKind extends TaintKind {
        json_subscript_taint(tonode, fromnode, this, result)
        or
        result = this and copy_call(fromnode, tonode)
-     }
+    }

    override TaintKind getTaintOfMethodResult(string name) {
        name = "get" and result = this.getValue()
-     }
-
+    }
 }

 /** A kind of "taint", representing a dictionary mapping str->"taint" */
 class ExternalStringDictKind extends DictKind {
-
-    ExternalStringDictKind() {
-        this.getValue() instanceof ExternalStringKind
-    }
-
+    ExternalStringDictKind() { this.getValue() instanceof ExternalStringKind }
 }

-/** A kind of "taint", representing a dictionary mapping strings to sequences of
- *  tainted strings */
-
+/**
+ * A kind of "taint", representing a dictionary mapping strings to sequences of
+ *  tainted strings
+ */
 class ExternalStringSequenceDictKind extends DictKind {
-    ExternalStringSequenceDictKind() {
-       this.getValue() instanceof ExternalStringSequenceKind
+    ExternalStringSequenceDictKind() { this.getValue() instanceof ExternalStringSequenceKind }
+}
+
+/** TaintKind for the result of `urlsplit(tainted_string)` */
+class ExternalUrlSplitResult extends ExternalStringSequenceKind {
+    // https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit
+    override TaintKind getTaintOfAttribute(string name) {
+        result = super.getTaintOfAttribute(name)
+        or
+        (
+            // namedtuple field names
+            name = "scheme" or
+            name = "netloc" or
+            name = "path" or
+            name = "query" or
+            name = "fragment" or
+            // class methods
+            name = "username" or
+            name = "password" or
+            name = "hostname"
+        ) and
+        result instanceof ExternalStringKind
+    }
+
+    override TaintKind getTaintOfMethodResult(string name) {
+        result = super.getTaintOfMethodResult(name)
+        or
+        name = "geturl" and
+        result instanceof ExternalStringKind
+    }
+}
+
+/** TaintKind for the result of `urlparse(tainted_string)` */
+class ExternalUrlParseResult extends ExternalStringSequenceKind {
+    // https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
+    override TaintKind getTaintOfAttribute(string name) {
+        result = super.getTaintOfAttribute(name)
+        or
+        (
+            // namedtuple field names
+            name = "scheme" or
+            name = "netloc" or
+            name = "path" or
+            name = "params" or
+            name = "query" or
+            name = "fragment" or
+            // class methods
+            name = "username" or
+            name = "password" or
+            name = "hostname"
+        ) and
+        result instanceof ExternalStringKind
+    }
+
+    override TaintKind getTaintOfMethodResult(string name) {
+        result = super.getTaintOfMethodResult(name)
+        or
+        name = "geturl" and
+        result instanceof ExternalStringKind
    }
 }

 /* Helper for getTaintForStep() */
-pragma [noinline]
-private predicate json_subscript_taint(SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key) {
+pragma[noinline]
+private predicate json_subscript_taint(
+    SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key
+) {
    sub.isLoad() and
    sub.getValue() = obj and
    key = seq.getValue()
 }

-
 private predicate json_load(ControlFlowNode fromnode, CallNode tonode) {
    exists(FunctionObject json_loads |
        ModuleObject::named("json").attr("loads") = json_loads and
-        json_loads.getACall() = tonode and tonode.getArg(0) = fromnode
+        json_loads.getACall() = tonode and
+        tonode.getArg(0) = fromnode
+    )
+}
+
+private predicate urlsplit(ControlFlowNode fromnode, CallNode tonode) {
+    // This could be implemented as `exists(FunctionValue` without the explicit six part,
+    // but then our tests will need to import +100 modules, so for now this slightly
+    // altered version gets to live on.
+    exists(Value urlsplit |
+        (
+            urlsplit = Value::named("six.moves.urllib.parse.urlsplit")
+            or
+            // Python 2
+            urlsplit = Value::named("urlparse.urlsplit")
+            or
+            // Python 3
+            urlsplit = Value::named("urllib.parse.urlsplit")
+        ) and
+        tonode = urlsplit.getACall() and
+        tonode.getArg(0) = fromnode
+    )
+}
+
+private predicate urlparse(ControlFlowNode fromnode, CallNode tonode) {
+    // This could be implemented as `exists(FunctionValue` without the explicit six part,
+    // but then our tests will need to import +100 modules, so for now this slightly
+    // altered version gets to live on.
+    exists(Value urlparse |
+        (
+            urlparse = Value::named("six.moves.urllib.parse.urlparse")
+            or
+            // Python 2
+            urlparse = Value::named("urlparse.urlparse")
+            or
+            // Python 3
+            urlparse = Value::named("urllib.parse.urlparse")
+        ) and
+        tonode = urlparse.getACall() and
+        tonode.getArg(0) = fromnode
    )
 }

 /** A kind of "taint", representing an open file-like object from an external source. */
 class ExternalFileObject extends TaintKind {
-
-    ExternalFileObject() {
-        this = "file[" + any(ExternalStringKind key) + "]"
-    }
-
+    ExternalFileObject() { this = "file[" + any(ExternalStringKind key) + "]" }

    /** Gets the taint kind for the contents of this file */
-    TaintKind getValue() {
-        this = "file[" + result + "]"
-    }
+    TaintKind getValue() { this = "file[" + result + "]" }

    override TaintKind getTaintOfMethodResult(string name) {
        name = "read" and result = this.getValue()
    }
-
 }

+/**
+ * Temporary sanitizer for the tainted result from `urlsplit` and `urlparse`. Can be used to reduce FPs until
+ * we have better support for namedtuples.
+ *
+ * Will clear **all** taint on a test of the kind. That is, on the true edge of any matching test,
+ * all fields/indexes will be cleared of taint.
+ *
+ * Handles:
+ * - `if splitres.netloc == "KNOWN_VALUE"`
+ * - `if splitres[0] == "KNOWN_VALUE"`
+ */
+class UrlsplitUrlparseTempSanitizer extends Sanitizer {
+    // TODO: remove this once we have better support for named tuples

+    UrlsplitUrlparseTempSanitizer() { this = "UrlsplitUrlparseTempSanitizer" }
+
+    override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
+        (
+            taint instanceof ExternalUrlSplitResult
+            or
+            taint instanceof ExternalUrlParseResult
+        ) and
+        exists(ControlFlowNode full_use |
+            full_use.(SubscriptNode).getObject() = test.getInput().getAUse()
+            or
+            full_use.(AttrNode).getObject() = test.getInput().getAUse()
+        |
+            clears_taint(full_use, test.getTest(), test.getSense())
+        )
+    }
+
+    private predicate clears_taint(ControlFlowNode tainted, ControlFlowNode test, boolean sense) {
+        test_equality_with_const(test, tainted, sense)
+        or
+        test_in_const_seq(test, tainted, sense)
+        or
+        test.(UnaryExprNode).getNode().getOp() instanceof Not and
+        exists(ControlFlowNode nested_test |
+            nested_test = test.(UnaryExprNode).getOperand() and
+            clears_taint(tainted, nested_test, sense.booleanNot())
+        )
+    }
+
+    /** holds for `== "KNOWN_VALUE"` on `true` edge, and `!= "KNOWN_VALUE"` on `false` edge */
+    private predicate test_equality_with_const(CompareNode cmp, ControlFlowNode tainted, boolean sense) {
+        exists(ControlFlowNode const, Cmpop op |
+            const.getNode() instanceof StrConst
+        |
+            (
+                cmp.operands(const, op, tainted)
+                or
+                cmp.operands(tainted, op, const)
+            ) and
+            (
+                op instanceof Eq and sense = true
+                or
+                op instanceof NotEq and sense = false
+            )
+        )
+    }
+
+    /** holds for `in ["KNOWN_VALUE", ...]` on `true` edge, and `not in ["KNOWN_VALUE", ...]` on `false` edge */
+    private predicate test_in_const_seq(CompareNode cmp, ControlFlowNode tainted, boolean sense) {
+        exists(SequenceNode const_seq, Cmpop op |
+            forall(ControlFlowNode elem | elem = const_seq.getAnElement() | elem.getNode() instanceof StrConst)
+        |
+            cmp.operands(tainted, op, const_seq) and
+            (
+                op instanceof In and sense = true
+                or
+                op instanceof NotIn and sense = false
+            )
+        )
+    }
+}
--- a/python/ql/src/semmle/python/web/ClientHttpRequest.qll
+++ b/python/ql/src/semmle/python/web/ClientHttpRequest.qll
@@ -0,0 +1,2 @@
+import semmle.python.web.client.StdLib
+import semmle.python.web.client.Requests
--- a/python/ql/src/semmle/python/web/Http.qll
+++ b/python/ql/src/semmle/python/web/Http.qll
@@ -89,7 +89,7 @@ abstract class CookieSet extends CookieOperation {}
 /** Generic taint sink in a http response */
 abstract class HttpResponseTaintSink extends TaintSink {

-    override predicate sinks(TaintKind kind) { 
+    override predicate sinks(TaintKind kind) {
        kind instanceof ExternalStringKind
    }

@@ -97,9 +97,51 @@ abstract class HttpResponseTaintSink extends TaintSink {

 abstract class HttpRedirectTaintSink extends TaintSink {

-    override predicate sinks(TaintKind kind) { 
+    override predicate sinks(TaintKind kind) {
        kind instanceof ExternalStringKind
    }

 }

+module Client {
+
+    // TODO: user-input in other than URL:
+    // - `data`, `json` for `requests.post`
+    // - `body` for `HTTPConnection.request`
+    // - headers?
+
+    // TODO: Add more library support
+    // - urllib3 https://github.com/urllib3/urllib3
+    // - httpx https://github.com/encode/httpx
+
+    /**
+      * An outgoing http request
+      *
+      * For example:
+      * conn = HTTPConnection('example.com')
+        conn.request('GET', '/path')
+      */
+    abstract class HttpRequest extends ControlFlowNode {
+
+        /** Get any ControlFlowNode that is used to construct the final URL.
+          *
+          * In the HTTPConnection example, there is a result for both `'example.com'` and for `'/path'`.
+          */
+        abstract ControlFlowNode getAUrlPart();
+
+        abstract string getMethodUpper();
+    }
+
+    /** Taint sink for the URL-part of an outgoing http request */
+    class HttpRequestUrlTaintSink extends TaintSink {
+
+        HttpRequestUrlTaintSink() {
+            this = any(HttpRequest r).getAUrlPart()
+        }
+
+        override predicate sinks(TaintKind kind) {
+            kind instanceof ExternalStringKind
+        }
+
+    }
+}
--- a/python/ql/src/semmle/python/web/client/Requests.qll
+++ b/python/ql/src/semmle/python/web/client/Requests.qll
@@ -0,0 +1,22 @@
+/**
+ * Modeling outgoing HTTP requests using the `requests` package
+ * https://pypi.org/project/requests/
+ */
+
+import python
+private import semmle.python.web.Http
+
+class RequestsHttpRequest extends Client::HttpRequest, CallNode {
+    CallableValue func;
+    string method;
+
+    RequestsHttpRequest() {
+        method = httpVerbLower() and
+        func = Module::named("requests").attr(method) and
+        this = func.getACall()
+    }
+
+    override ControlFlowNode getAUrlPart() { result = func.getNamedArgumentForCall(this, "url") }
+
+    override string getMethodUpper() { result = method.toUpperCase() }
+}
--- a/python/ql/src/semmle/python/web/client/StdLib.qll
+++ b/python/ql/src/semmle/python/web/client/StdLib.qll
@@ -0,0 +1,55 @@
+import python
+private import semmle.python.web.Http
+
+ClassValue httpConnectionClass() {
+    // Python 2
+    result = Value::named("httplib.HTTPConnection")
+    or
+    result = Value::named("httplib.HTTPSConnection")
+    or
+    // Python 3
+    result = Value::named("http.client.HTTPConnection")
+    or
+    result = Value::named("http.client.HTTPSConnection")
+    or
+    // six
+    result = Value::named("six.moves.http_client.HTTPConnection")
+    or
+    result = Value::named("six.moves.http_client.HTTPSConnection")
+}
+
+class HttpConnectionHttpRequest extends Client::HttpRequest, CallNode {
+    CallNode constructor_call;
+    CallableValue func;
+
+    HttpConnectionHttpRequest() {
+        exists(ClassValue cls, AttrNode call_origin, Value constructor_call_value |
+            cls = httpConnectionClass() and
+            func = cls.lookup("request") and
+            this = func.getACall() and
+            // since you can do `r = conn.request; r('GET', path)`, we need to find the origin
+            this.getFunction().pointsTo(_, _, call_origin) and
+            // Since HTTPSConnection is a subtype of HTTPConnection, up until this point, `cls` could be either class,
+            // because `HTTPSConnection.request == HTTPConnection.request`. To avoid generating 2 results, we filter
+            // on the actual class used as the constructor
+            call_origin.getObject().pointsTo(_, constructor_call_value, constructor_call) and
+            cls = constructor_call_value.getClass() and
+            constructor_call = cls.getACall()
+        )
+    }
+
+    override ControlFlowNode getAUrlPart() {
+        result = func.getNamedArgumentForCall(this, "url")
+        or
+        result = constructor_call.getArg(0)
+        or
+        result = constructor_call.getArgByName("host")
+    }
+
+    override string getMethodUpper() {
+        exists(string method |
+            result = method.toUpperCase() and
+            func.getNamedArgumentForCall(this, "method").pointsTo(Value::forString(method))
+        )
+    }
+}
				`@@ -0,0 +1 @@`
				`This directory contains [experimental](../../../../docs/experimental.md) CodeQL queries and libraries.`