Merge branch 'master' into python-fix-multi-assign-points-to

This commit is contained in:
Taus Brock-Nannestad
2020-03-12 18:50:10 +01:00
618 changed files with 15540 additions and 4952 deletions

View File

@@ -20,8 +20,8 @@ import Expressions.CallArgs
from Call call, ClassObject cls, string name, FunctionObject init
where
illegally_named_parameter(call, cls, name)
and init = get_function_or_initializer(cls)
illegally_named_parameter_objectapi(call, cls, name)
and init = get_function_or_initializer_objectapi(cls)
select
call, "Keyword argument '" + name + "' is not a supported parameter name of $@.", init, init.getQualifiedName()

View File

@@ -18,8 +18,8 @@ import Expressions.CallArgs
from Call call, ClassObject cls, string too, string should, int limit, FunctionObject init
where
(
too_many_args(call, cls, limit) and too = "too many arguments" and should = "no more than "
too_many_args_objectapi(call, cls, limit) and too = "too many arguments" and should = "no more than "
or
too_few_args(call, cls, limit) and too = "too few arguments" and should = "no fewer than "
) and init = get_function_or_initializer(cls)
too_few_args_objectapi(call, cls, limit) and too = "too few arguments" and should = "no fewer than "
) and init = get_function_or_initializer_objectapi(cls)
select call, "Call to $@ with " + too + "; should be " + should + limit.toString() + ".", init, init.getQualifiedName()

View File

@@ -19,7 +19,7 @@ predicate doesnt_reraise(ExceptStmt ex) {
}
predicate catches_base_exception(ExceptStmt ex) {
ex.getType().refersTo(theBaseExceptionType())
ex.getType().pointsTo(ClassValue::baseException())
or
not exists(ex.getType())
}

View File

@@ -31,7 +31,7 @@ predicate no_comment(ExceptStmt ex) {
}
predicate non_local_control_flow(ExceptStmt ex) {
ex.getType().refersTo(theStopIterationType())
ex.getType().pointsTo(ClassValue::stopIteration())
}
predicate try_has_normal_exit(Try try) {
@@ -64,32 +64,29 @@ predicate subscript(Stmt s) {
s.(Delete).getATarget() instanceof Subscript
}
predicate encode_decode(Expr ex, ClassObject type) {
predicate encode_decode(Call ex, ClassValue type) {
exists(string name |
ex.(Call).getFunc().(Attribute).getName() = name |
name = "encode" and type = Object::builtin("UnicodeEncodeError")
ex.getFunc().(Attribute).getName() = name |
name = "encode" and type = ClassValue::unicodeEncodeError()
or
name = "decode" and type = Object::builtin("UnicodeDecodeError")
name = "decode" and type = ClassValue::unicodeDecodeError()
)
}
predicate small_handler(ExceptStmt ex, Stmt s, ClassObject type) {
predicate small_handler(ExceptStmt ex, Stmt s, ClassValue type) {
not exists(ex.getTry().getStmt(1)) and
s = ex.getTry().getStmt(0) and
ex.getType().refersTo(type)
ex.getType().pointsTo(type)
}
/** Holds if this exception handler is sufficiently small in scope to not need a comment
* as to what it is doing.
*/
predicate focussed_handler(ExceptStmt ex) {
exists(Stmt s, ClassObject type |
exists(Stmt s, ClassValue type |
small_handler(ex, s, type) |
subscript(s) and type.getAnImproperSuperType() = theLookupErrorType()
subscript(s) and type.getASuperType() = ClassValue::lookupError()
or
attribute_access(s) and type = theAttributeErrorType()
attribute_access(s) and type = ClassValue::attributeError()
or
s.(ExprStmt).getValue() instanceof Name and type = theNameErrorType()
s.(ExprStmt).getValue() instanceof Name and type = ClassValue::nameError()
or
encode_decode(s.(ExprStmt).getValue(), type)
)

View File

@@ -15,7 +15,7 @@ import python
import Raising
import Exceptions.NotImplemented
from Raise r, ClassObject t
where type_or_typeof(r, t, _) and not t.isLegalExceptionType() and not t.failedInference() and not use_of_not_implemented_in_raise(r, _)
from Raise r, ClassValue t
where type_or_typeof(r, t, _) and not t.isLegalExceptionType() and not t.failedInference(_) and not use_of_not_implemented_in_raise(r, _)
select r, "Illegal class '" + t.getName() + "' raised; will result in a TypeError being raised instead."

View File

@@ -3,7 +3,7 @@ import python
/** Holds if `notimpl` refers to `NotImplemented` or `NotImplemented()` in the `raise` statement */
predicate use_of_not_implemented_in_raise(Raise raise, Expr notimpl) {
notimpl.refersTo(Object::notImplemented()) and
notimpl.pointsTo(Value::named("NotImplemented")) and
(
notimpl = raise.getException() or
notimpl = raise.getException().(Call).getFunc()

View File

@@ -1,14 +1,16 @@
import python
/** Whether the raise statement 'r' raises 'type' from origin 'orig' */
predicate type_or_typeof(Raise r, ClassObject type, AstNode orig) {
predicate type_or_typeof(Raise r, ClassValue type, AstNode orig) {
exists(Expr exception |
exception = r.getRaised() |
exception.refersTo(type, _, orig)
exception.pointsTo(type, orig)
or
not exists(ClassObject exc_type | exception.refersTo(exc_type)) and
not type = theTypeType() and // First value is an unknown exception type
exception.refersTo(_, type, orig)
not exists(ClassValue exc_type | exception.pointsTo(exc_type)) and
not type = ClassValue::type() and // First value is an unknown exception type
exists(Value val | exception.pointsTo(val, orig) |
val.getClass() = type
)
)
}

View File

@@ -11,8 +11,9 @@
import python
from Raise r, AstNode origin
where r.getException().refersTo(_, theTupleType(), origin) and
from Raise r, Value v, AstNode origin
where r.getException().pointsTo(v, origin) and
v.getClass() = ClassValue::tuple() and
major_version() = 2 /* Raising a tuple is a type error in Python 3, so is handled by the IllegalRaise query. */
select r, "Raising $@ will result in the first element (recursively) being raised and all other elements being discarded.", origin, "a tuple"

View File

@@ -2,7 +2,7 @@ import python
import Testing.Mox
private int varargs_length(Call call) {
private int varargs_length_objectapi(Call call) {
not exists(call.getStarargs()) and result = 0
or
exists(TupleObject t |
@@ -13,67 +13,131 @@ private int varargs_length(Call call) {
result = count(call.getStarargs().(List).getAnElt())
}
private int varargs_length(Call call) {
not exists(call.getStarargs()) and result = 0
or
exists(TupleValue t |
call.getStarargs().pointsTo(t) |
result = t.length()
)
or
result = count(call.getStarargs().(List).getAnElt())
}
/** Gets a keyword argument that is not a keyword-only parameter. */
private Keyword not_keyword_only_arg(Call call, FunctionObject func) {
private Keyword not_keyword_only_arg_objectapi(Call call, FunctionObject func) {
func.getACall().getNode() = call and
result = call.getAKeyword() and
not func.getFunction().getAKeywordOnlyArg().getId() = result.getArg()
}
/** Gets a keyword argument that is not a keyword-only parameter. */
private Keyword not_keyword_only_arg(Call call, FunctionValue func) {
func.getACall().getNode() = call and
result = call.getAKeyword() and
not func.getScope().getAKeywordOnlyArg().getId() = result.getArg()
}
/** Gets the count of arguments that are passed as positional parameters even if they
* are named in the call.
* This is the sum of the number of positional arguments, the number of elements in any explicit tuple passed as *arg
* plus the number of keyword arguments that do not match keyword-only arguments (if the function does not take **kwargs).
*/
private int positional_arg_count_for_call(Call call, Object callable) {
call = get_a_call(callable).getNode() and
private int positional_arg_count_for_call_objectapi(Call call, Object callable) {
call = get_a_call_objectapi(callable).getNode() and
exists(int positional_keywords |
exists(FunctionObject func | func = get_function_or_initializer(callable) |
exists(FunctionObject func | func = get_function_or_initializer_objectapi(callable) |
not func.getFunction().hasKwArg() and
positional_keywords = count(not_keyword_only_arg(call, func))
positional_keywords = count(not_keyword_only_arg_objectapi(call, func))
or
func.getFunction().hasKwArg() and positional_keywords = 0
)
|
result = count(call.getAnArg()) + varargs_length(call) + positional_keywords
result = count(call.getAnArg()) + varargs_length_objectapi(call) + positional_keywords
)
}
/** Gets the count of arguments that are passed as positional parameters even if they
* are named in the call.
* This is the sum of the number of positional arguments, the number of elements in any explicit tuple passed as *arg
* plus the number of keyword arguments that do not match keyword-only arguments (if the function does not take **kwargs).
*/
private int positional_arg_count_for_call(Call call, Value callable) {
call = get_a_call(callable).getNode() and
exists(int positional_keywords |
exists(FunctionValue func | func = get_function_or_initializer(callable) |
not func.getScope().hasKwArg() and
positional_keywords = count(not_keyword_only_arg(call, func))
or
func.getScope().hasKwArg() and positional_keywords = 0
)
|
result = count(call.getAnArg()) + varargs_length_objectapi(call) + positional_keywords
)
}
int arg_count_objectapi(Call call) {
result = count(call.getAnArg()) + varargs_length_objectapi(call) + count(call.getAKeyword())
}
int arg_count(Call call) {
result = count(call.getAnArg()) + varargs_length(call) + count(call.getAKeyword())
}
/* Gets a call corresponding to the given class or function*/
private ControlFlowNode get_a_call(Object callable) {
private ControlFlowNode get_a_call_objectapi(Object callable) {
result = callable.(ClassObject).getACall()
or
result = callable.(FunctionObject).getACall()
}
/* Gets a call corresponding to the given class or function*/
private ControlFlowNode get_a_call(Value callable) {
result = callable.(ClassValue).getACall()
or
result = callable.(FunctionValue).getACall()
}
/* Gets the function object corresponding to the given class or function*/
FunctionObject get_function_or_initializer(Object func_or_cls) {
FunctionObject get_function_or_initializer_objectapi(Object func_or_cls) {
result = func_or_cls.(FunctionObject)
or
result = func_or_cls.(ClassObject).declaredAttribute("__init__")
}
/* Gets the function object corresponding to the given class or function*/
FunctionValue get_function_or_initializer(Value func_or_cls) {
result = func_or_cls.(FunctionValue)
or
result = func_or_cls.(ClassValue).declaredAttribute("__init__")
}
/**Whether there is an illegally named parameter called `name` in the `call` to `func` */
predicate illegally_named_parameter(Call call, Object func, string name) {
predicate illegally_named_parameter_objectapi(Call call, Object func, string name) {
not func.isC() and
name = call.getANamedArgumentName() and
call.getAFlowNode() = get_a_call_objectapi(func) and
not get_function_or_initializer_objectapi(func).isLegalArgumentName(name)
}
/**Whether there is an illegally named parameter called `name` in the `call` to `func` */
predicate illegally_named_parameter(Call call, Value func, string name) {
not func.isBuiltin() and
name = call.getANamedArgumentName() and
call.getAFlowNode() = get_a_call(func) and
not get_function_or_initializer(func).isLegalArgumentName(name)
}
/**Whether there are too few arguments in the `call` to `callable` where `limit` is the lowest number of legal arguments */
predicate too_few_args(Call call, Object callable, int limit) {
predicate too_few_args_objectapi(Call call, Object callable, int limit) {
// Exclude cases where an incorrect name is used as that is covered by 'Wrong name for an argument in a call'
not illegally_named_parameter(call, callable, _) and
not illegally_named_parameter_objectapi(call, callable, _) and
not exists(call.getStarargs()) and not exists(call.getKwargs()) and
arg_count(call) < limit and
exists(FunctionObject func | func = get_function_or_initializer(callable) |
arg_count_objectapi(call) < limit and
exists(FunctionObject func | func = get_function_or_initializer_objectapi(callable) |
call = func.getAFunctionCall().getNode() and limit = func.minParameters() and
/* The combination of misuse of `mox.Mox().StubOutWithMock()`
* and a bug in mox's implementation of methods results in having to
@@ -84,16 +148,37 @@ predicate too_few_args(Call call, Object callable, int limit) {
call = func.getAMethodCall().getNode() and limit = func.minParameters() - 1
or
callable instanceof ClassObject and
call.getAFlowNode() = get_a_call_objectapi(callable) and limit = func.minParameters() - 1
)
}
/**Whether there are too few arguments in the `call` to `callable` where `limit` is the lowest number of legal arguments */
predicate too_few_args(Call call, Value callable, int limit) {
// Exclude cases where an incorrect name is used as that is covered by 'Wrong name for an argument in a call'
not illegally_named_parameter(call, callable, _) and
not exists(call.getStarargs()) and not exists(call.getKwargs()) and
arg_count(call) < limit and
exists(FunctionValue func | func = get_function_or_initializer(callable) |
call = func.getACall().getNode() and limit = func.minParameters() and
/* The combination of misuse of `mox.Mox().StubOutWithMock()`
* and a bug in mox's implementation of methods results in having to
* pass 1 too few arguments to the mocked function.
*/
not (useOfMoxInModule(call.getEnclosingModule()) and func.isNormalMethod())
or
call = func.getACall().getNode() and limit = func.minParameters() - 1
or
callable instanceof ClassValue and
call.getAFlowNode() = get_a_call(callable) and limit = func.minParameters() - 1
)
}
/**Whether there are too many arguments in the `call` to `func` where `limit` is the highest number of legal arguments */
predicate too_many_args(Call call, Object callable, int limit) {
predicate too_many_args_objectapi(Call call, Object callable, int limit) {
// Exclude cases where an incorrect name is used as that is covered by 'Wrong name for an argument in a call'
not illegally_named_parameter(call, callable, _) and
not illegally_named_parameter_objectapi(call, callable, _) and
exists(FunctionObject func |
func = get_function_or_initializer(callable) and
func = get_function_or_initializer_objectapi(callable) and
not func.getFunction().hasVarArg() and limit >= 0
|
call = func.getAFunctionCall().getNode() and limit = func.maxParameters()
@@ -101,13 +186,38 @@ predicate too_many_args(Call call, Object callable, int limit) {
call = func.getAMethodCall().getNode() and limit = func.maxParameters() - 1
or
callable instanceof ClassObject and
call.getAFlowNode() = get_a_call_objectapi(callable) and limit = func.maxParameters() - 1
) and
positional_arg_count_for_call_objectapi(call, callable) > limit
}
/**Whether there are too many arguments in the `call` to `func` where `limit` is the highest number of legal arguments */
predicate too_many_args(Call call, Value callable, int limit) {
// Exclude cases where an incorrect name is used as that is covered by 'Wrong name for an argument in a call'
not illegally_named_parameter(call, callable, _) and
exists(FunctionValue func |
func = get_function_or_initializer(callable) and
not func.getScope().hasVarArg() and limit >= 0
|
call = func.getACall().getNode() and limit = func.maxParameters()
or
call = func.getACall().getNode() and limit = func.maxParameters() - 1
or
callable instanceof ClassValue and
call.getAFlowNode() = get_a_call(callable) and limit = func.maxParameters() - 1
) and
positional_arg_count_for_call(call, callable) > limit
}
/** Holds if `call` has too many or too few arguments for `func` */
predicate wrong_args(Call call, FunctionObject func, int limit, string too) {
predicate wrong_args_objectapi(Call call, FunctionObject func, int limit, string too) {
too_few_args_objectapi(call, func, limit) and too = "too few"
or
too_many_args_objectapi(call, func, limit) and too = "too many"
}
/** Holds if `call` has too many or too few arguments for `func` */
predicate wrong_args(Call call, FunctionValue func, int limit, string too) {
too_few_args(call, func, limit) and too = "too few"
or
too_many_args(call, func, limit) and too = "too many"
@@ -117,14 +227,31 @@ predicate wrong_args(Call call, FunctionObject func, int limit, string too) {
* Implies nothing about whether `call` could call `func`.
*/
bindingset[call, func]
predicate correct_args_if_called_as_method(Call call, FunctionObject func) {
predicate correct_args_if_called_as_method_objectapi(Call call, FunctionObject func) {
arg_count_objectapi(call)+1 >= func.minParameters()
and
arg_count_objectapi(call) < func.maxParameters()
}
/** Holds if `call` has correct number of arguments for `func`.
* Implies nothing about whether `call` could call `func`.
*/
bindingset[call, func]
predicate correct_args_if_called_as_method(Call call, FunctionValue func) {
arg_count(call)+1 >= func.minParameters()
and
arg_count(call) < func.maxParameters()
}
/** Holds if `call` is a call to `overriding`, which overrides `func`. */
predicate overridden_call(FunctionObject func, FunctionObject overriding, Call call) {
predicate overridden_call_objectapi(FunctionObject func, FunctionObject overriding, Call call) {
overriding.overrides(func) and
overriding.getACall().getNode() = call
}
/** Holds if `call` is a call to `overriding`, which overrides `func`. */
predicate overridden_call(FunctionValue func, FunctionValue overriding, Call call) {
overriding.overrides(func) and
overriding.getACall().getNode() = call
}

View File

@@ -17,13 +17,13 @@ import python
from CallNode call_to_super, string name
where
exists(GlobalVariable gv, ControlFlowNode cn |
call_to_super = theSuperType().getACall() and
call_to_super = ClassValue::super_().getACall() and
gv.getId() = "super" and
cn = call_to_super.getArg(0) and
name = call_to_super.getScope().getScope().(Class).getName() and
exists(ClassObject other |
cn.refersTo(other) and
not other.getPyClass().getName() = name
exists(ClassValue other |
cn.pointsTo(other) and
not other.getScope().getName() = name
)
)
select call_to_super.getNode(), "First argument to super() should be " + name + "."

View File

@@ -13,7 +13,14 @@
import python
import semmle.python.strings
from Expr e, ClassObject t
where exists(BinaryExpr b | b.getOp() instanceof Mod and format_string(b.getLeft()) and e = b.getRight() and
mapping_format(b.getLeft()) and e.refersTo(_, t, _) and not t.isMapping())
from Expr e, ClassValue t
where
exists(BinaryExpr b |
b.getOp() instanceof Mod and
format_string(b.getLeft()) and
e = b.getRight() and
mapping_format(b.getLeft()) and
e.pointsTo().getClass() = t and
not t.isMapping()
)
select e, "Right hand side of a % operator must be a mapping, not class $@.", t, t.getName()

View File

@@ -107,10 +107,10 @@ private predicate brace_pair(PossibleAdvancedFormatString fmt, int start, int en
private predicate advanced_format_call(Call format_expr, PossibleAdvancedFormatString fmt, int args) {
exists(CallNode call |
call = format_expr.getAFlowNode() |
call.getFunction().refersTo(Object::builtin("format")) and call.getArg(0).refersTo(_, fmt.getAFlowNode()) and
call.getFunction().pointsTo(Value::named("format")) and call.getArg(0).pointsTo(_, fmt.getAFlowNode()) and
args = count(format_expr.getAnArg()) - 1
or
call.getFunction().(AttrNode).getObject("format").refersTo(_, fmt.getAFlowNode()) and
call.getFunction().(AttrNode).getObject("format").pointsTo(_, fmt.getAFlowNode()) and
args = count(format_expr.getAnArg())
)
}
@@ -139,4 +139,3 @@ class AdvancedFormattingCall extends Call {
}
}

View File

@@ -69,7 +69,7 @@ predicate is_unhashable(ControlFlowNode f, ClassValue cls, ControlFlowNode origi
predicate typeerror_is_caught(ControlFlowNode f) {
exists (Try try |
try.getBody().contains(f.getNode()) and
try.getAHandler().getType().refersTo(theTypeErrorType()))
try.getAHandler().getType().pointsTo(ClassValue::typeError()))
}
from ControlFlowNode f, ClassValue c, ControlFlowNode origin

View File

@@ -28,14 +28,14 @@ predicate probablySingleton(ClassValue cls) {
predicate invalid_to_use_is_portably(ClassValue c) {
overrides_eq_or_cmp(c) and
/* Exclude type/builtin-function/bool as it is legitimate to compare them using 'is' but they implement __eq__ */
// Exclude type/builtin-function/bool as it is legitimate to compare them using 'is' but they implement __eq__
not c = Value::named("type") and not c = ClassValue::builtinFunction() and not c = Value::named("bool") and
/* OK to compare with 'is' if a singleton */
// OK to compare with 'is' if a singleton
not probablySingleton(c)
}
predicate simple_constant(ControlFlowNode f) {
exists(Object obj | f.refersTo(obj) | obj = theTrueObject() or obj = theFalseObject() or obj = theNoneObject())
exists(Value val | f.pointsTo(val) | val = Value::named("True") or val = Value::named("False") or val = Value::named("None"))
}
private predicate cpython_interned_value(Expr e) {
@@ -66,14 +66,14 @@ private predicate universally_interned_value(Expr e) {
predicate cpython_interned_constant(Expr e) {
exists(Expr const |
e.refersTo(_, const) |
e.pointsTo(_, const) |
cpython_interned_value(const)
)
}
predicate universally_interned_constant(Expr e) {
exists(Expr const |
e.refersTo(_, const) |
e.pointsTo(_, const) |
universally_interned_value(const)
)
}
@@ -95,7 +95,7 @@ private predicate comparison_one_type(Compare comp, Cmpop op, ClassValue cls) {
}
predicate invalid_portable_is_comparison(Compare comp, Cmpop op, ClassValue cls) {
/* OK to use 'is' when defining '__eq__' */
// OK to use 'is' when defining '__eq__'
not exists(Function eq | eq.getName() = "__eq__" or eq.getName() = "__ne__" | eq = comp.getScope().getScope*())
and
(
@@ -107,24 +107,24 @@ predicate invalid_portable_is_comparison(Compare comp, Cmpop op, ClassValue cls)
)
)
and
/* OK to use 'is' when comparing items from a known set of objects */
not exists(Expr left, Expr right, Object obj |
// OK to use 'is' when comparing items from a known set of objects
not exists(Expr left, Expr right, Value val |
comp.compares(left, op, right) and
exists(ImmutableLiteral il | il.getLiteralObject() = obj) |
left.refersTo(obj) and right.refersTo(obj)
exists(ImmutableLiteral il | il.getLiteralValue() = val) |
left.pointsTo(val) and right.pointsTo(val)
or
/* Simple constant in module, probably some sort of sentinel */
// Simple constant in module, probably some sort of sentinel
exists(AstNode origin |
not left.refersTo(_) and right.refersTo(obj, origin) and
not left.pointsTo(_) and right.pointsTo(val, origin) and
origin.getScope().getEnclosingModule() = comp.getScope().getEnclosingModule()
)
)
and
/* OK to use 'is' when comparing with a member of an enum */
// OK to use 'is' when comparing with a member of an enum
not exists(Expr left, Expr right, AstNode origin |
comp.compares(left, op, right) and
enum_member(origin) |
left.refersTo(_, origin) or right.refersTo(_, origin)
left.pointsTo(_, origin) or right.pointsTo(_, origin)
)
}
@@ -135,4 +135,3 @@ private predicate enum_member(AstNode obj) {
asgn.getValue() = obj
)
}

View File

@@ -18,19 +18,20 @@ where
// Only relevant for Python 2, as all later versions implement true division
major_version() = 2
and
exists(BinaryExprNode bin, Object lobj, Object robj |
exists(BinaryExprNode bin, Value lval, Value rval |
bin = div.getAFlowNode()
and bin.getNode().getOp() instanceof Div
and bin.getLeft().refersTo(lobj, theIntType(), left)
and bin.getRight().refersTo(robj, theIntType(), right)
and bin.getLeft().pointsTo(lval, left)
and lval.getClass() = ClassValue::int_()
and bin.getRight().pointsTo(rval, right)
and rval.getClass() = ClassValue::int_()
// Ignore instances where integer division leaves no remainder
and not lobj.(NumericObject).intValue() % robj.(NumericObject).intValue() = 0
and not lval.(NumericValue).getIntValue() % rval.(NumericValue).getIntValue() = 0
and not bin.getNode().getEnclosingModule().hasFromFuture("division")
// Filter out results wrapped in `int(...)`
and not exists(CallNode c, ClassObject cls |
c.getAnArg() = bin
and c.getFunction().refersTo(cls)
and cls.getName() = "int"
and not exists(CallNode c |
c = ClassValue::int_().getACall()
and c.getAnArg() = bin
)
)
select div, "Result of division may be truncated as its $@ and $@ arguments may both be integers.",

View File

@@ -38,14 +38,14 @@ predicate unnecessary_lambda(Lambda l, Expr e) {
simple_wrapper(l, e) and
(
/* plain class */
exists(ClassObject c | e.refersTo(c))
exists(ClassValue c | e.pointsTo(c))
or
/* plain function */
exists(FunctionObject f | e.refersTo(f))
exists(FunctionValue f | e.pointsTo(f))
or
/* bound-method of enclosing instance */
exists(ClassObject cls, Attribute a |
cls.getPyClass() = l.getScope().getScope() and a = e |
exists(ClassValue cls, Attribute a |
cls.getScope() = l.getScope().getScope() and a = e |
((Name)a.getObject()).getId() = "self" and
cls.hasAttribute(a.getName())
)

View File

@@ -10,8 +10,8 @@
*/
import python
private import semmle.python.types.Builtins
from CallNode call, ControlFlowNode func
where
major_version() = 2 and call.getFunction() = func and func.refersTo(Object::builtin("apply"))
where major_version() = 2 and call.getFunction() = func and func.pointsTo(Value::named("apply"))
select call, "Call to the obsolete builtin function 'apply'."

View File

@@ -3,20 +3,20 @@
"qhelp.dtd">
<qhelp>
<overview>
<p>A call to the input() function, <code>input(prompt)</code> is equivalent to <code>eval(raw_input(prompt))</code>. Evaluating user input without any checking can be a serious security flaw.</p>
<p>In Python 2, a call to the <code>input()</code> function, <code>input(prompt)</code> is equivalent to <code>eval(raw_input(prompt))</code>. Evaluating user input without any checking can be a serious security flaw.</p>
</overview>
<recommendation>
<p> Get user input with <code>raw_input(prompt)</code> and then validate that input before evaluating. If the expected input is a number or
<p>Get user input with <code>raw_input(prompt)</code> and then validate that input before evaluating. If the expected input is a number or
string, then <code>ast.literal_eval()</code> can always be used safely.</p>
</recommendation>
<references>
<li>Python Standard Library: <a href="http://docs.python.org/library/functions.html#input">input</a>,
<a href="http://docs.python.org/library/ast.html#ast.literal_eval">ast.literal_eval</a>.</li>
<li>Python Standard Library: <a href="http://docs.python.org/2/library/functions.html#input">input</a>,
<a href="http://docs.python.org/2/library/ast.html#ast.literal_eval">ast.literal_eval</a>.</li>
<li>Wikipedia: <a href="http://en.wikipedia.org/wiki/Data_validation">Data validation</a>.</li>
</references>

View File

@@ -1,6 +1,6 @@
/**
* @name 'input' function used
* @description The built-in function 'input' is used which can allow arbitrary code to be run.
* @name 'input' function used in Python 2
* @description The built-in function 'input' is used which, in Python 2, can allow arbitrary code to be run.
* @kind problem
* @tags security
* correctness
@@ -18,4 +18,4 @@ where
call.getFunction() = func and
func.pointsTo(context, Value::named("input"), _) and
not func.pointsTo(context, Value::named("raw_input"), _)
select call, "The unsafe built-in function 'input' is used."
select call, "The unsafe built-in function 'input' is used in Python 2."

View File

@@ -19,7 +19,7 @@ import Expressions.CallArgs
from Call call, FunctionObject func, string name
where
illegally_named_parameter(call, func, name) and
illegally_named_parameter_objectapi(call, func, name) and
not func.isAbstract() and
not exists(FunctionObject overridden | func.overrides(overridden) and overridden.getFunction().getAnArg().(Name).getId() = name)
select

View File

@@ -15,29 +15,30 @@
import python
import semmle.python.strings
predicate string_format(BinaryExpr operation, StrConst str, Object args, AstNode origin) {
exists(Object fmt, Context ctx | operation.getOp() instanceof Mod |
operation.getLeft().refersTo(ctx, fmt, _, str) and
operation.getRight().refersTo(ctx, args, _, origin)
predicate string_format(BinaryExpr operation, StrConst str, Value args, AstNode origin) {
operation.getOp() instanceof Mod and
exists(Value fmt, Context ctx |
operation.getLeft().pointsTo(ctx, fmt, str) and
operation.getRight().pointsTo(ctx, args, origin)
)
}
int sequence_length(Object args) {
int sequence_length(Value args) {
/* Guess length of sequence */
exists(Tuple seq |
seq = args.getOrigin() |
exists(Tuple seq, AstNode origin |
seq.pointsTo(args,origin) |
result = strictcount(seq.getAnElt()) and
not seq.getAnElt() instanceof Starred
)
or
exists(ImmutableLiteral i |
i.getLiteralObject() = args |
i.getLiteralValue() = args |
result = 1
)
}
from BinaryExpr operation, StrConst fmt, Object args, int slen, int alen, AstNode origin, string provided
from BinaryExpr operation, StrConst fmt, Value args, int slen, int alen, AstNode origin, string provided
where string_format(operation, fmt, args, origin) and slen = sequence_length(args) and alen = format_items(fmt) and slen != alen and
(if slen = 1 then provided = " is provided." else provided = " are provided.")
select operation, "Wrong number of $@ for string format. Format $@ takes " + alen.toString() + ", but " + slen.toString() + provided,

View File

@@ -17,12 +17,12 @@ import CallArgs
from Call call, FunctionObject func, string too, string should, int limit
where
(
too_many_args(call, func, limit) and too = "too many arguments" and should = "no more than "
too_many_args_objectapi(call, func, limit) and too = "too many arguments" and should = "no more than "
or
too_few_args(call, func, limit) and too = "too few arguments" and should = "no fewer than "
too_few_args_objectapi(call, func, limit) and too = "too few arguments" and should = "no fewer than "
) and
not func.isAbstract() and
not exists(FunctionObject overridden | func.overrides(overridden) and correct_args_if_called_as_method(call, overridden))
not exists(FunctionObject overridden | func.overrides(overridden) and correct_args_if_called_as_method_objectapi(call, overridden))
/* The semantics of `__new__` can be a bit subtle, so we simply exclude `__new__` methods */
and not func.getName() = "__new__"

View File

@@ -15,9 +15,9 @@ predicate slice_method_name(string name) {
name = "__getslice__" or name = "__setslice__" or name = "__delslice__"
}
from PyFunctionObject f, string meth
from PythonFunctionValue f, string meth
where f.getFunction().isMethod() and not f.isOverridingMethod() and
where f.getScope().isMethod() and not f.isOverridingMethod() and
slice_method_name(meth) and f.getName() = meth

View File

@@ -15,10 +15,10 @@ import Expressions.CallArgs
from Call call, FunctionObject func, FunctionObject overridden, string problem
where
func.overrides(overridden) and (
wrong_args(call, func, _, problem) and correct_args_if_called_as_method(call, overridden)
wrong_args_objectapi(call, func, _, problem) and correct_args_if_called_as_method_objectapi(call, overridden)
or
exists(string name |
illegally_named_parameter(call, func, name) and problem = "an argument named '" + name + "'" and
illegally_named_parameter_objectapi(call, func, name) and problem = "an argument named '" + name + "'" and
overridden.getFunction().getAnArg().(Name).getId() = name
)
)

View File

@@ -18,11 +18,11 @@ where
not func.getName() = "__init__" and
overriding.overrides(func) and
call = overriding.getAMethodCall().getNode() and
correct_args_if_called_as_method(call, overriding) and
correct_args_if_called_as_method_objectapi(call, overriding) and
(
arg_count(call)+1 < func.minParameters() and problem = "too few arguments"
arg_count_objectapi(call)+1 < func.minParameters() and problem = "too few arguments"
or
arg_count(call) >= func.maxParameters() and problem = "too many arguments"
arg_count_objectapi(call) >= func.maxParameters() and problem = "too many arguments"
or
exists(string name | call.getAKeyword().getArg() = name and
overriding.getFunction().getAnArg().(Name).getId() = name and

View File

@@ -112,7 +112,7 @@ predicate is_quad_op(string name) {
name = "__setslice__" or name = "__exit__"
}
int argument_count(PyFunctionObject f, string name, ClassObject cls) {
int argument_count(PythonFunctionValue f, string name, ClassValue cls) {
cls.declaredAttribute(name) = f and
(
is_unary_op(name) and result = 1
@@ -125,7 +125,7 @@ int argument_count(PyFunctionObject f, string name, ClassObject cls) {
)
}
predicate incorrect_special_method_defn(PyFunctionObject func, string message, boolean show_counts, string name, ClassObject owner) {
predicate incorrect_special_method_defn(PythonFunctionValue func, string message, boolean show_counts, string name, ClassValue owner) {
exists(int required |
required = argument_count(func, name, owner) |
/* actual_non_default <= actual */
@@ -133,14 +133,14 @@ predicate incorrect_special_method_defn(PyFunctionObject func, string message, b
(message = "Too few parameters" and show_counts = true)
else if required < func.minParameters() then
(message = "Too many parameters" and show_counts = true)
else if (func.minParameters() < required and not func.getFunction().hasVarArg()) then
else if (func.minParameters() < required and not func.getScope().hasVarArg()) then
(message = (required -func.minParameters()) + " default values(s) will never be used" and show_counts = false)
else
none()
)
}
predicate incorrect_pow(FunctionObject func, string message, boolean show_counts, ClassObject owner) {
predicate incorrect_pow(FunctionValue func, string message, boolean show_counts, ClassValue owner) {
owner.declaredAttribute("__pow__") = func and
(
func.maxParameters() < 2 and message = "Too few parameters" and show_counts = true
@@ -153,19 +153,19 @@ predicate incorrect_pow(FunctionObject func, string message, boolean show_counts
)
}
predicate incorrect_get(FunctionObject func, string message, boolean show_counts, ClassObject owner) {
predicate incorrect_get(FunctionValue func, string message, boolean show_counts, ClassValue owner) {
owner.declaredAttribute("__get__") = func and
(
func.maxParameters() < 3 and message = "Too few parameters" and show_counts = true
or
func.minParameters() > 3 and message = "Too many parameters" and show_counts = true
or
func.minParameters() < 2 and not func.getFunction().hasVarArg() and
func.minParameters() < 2 and not func.getScope().hasVarArg() and
message = (2 - func.minParameters()) + " default value(s) will never be used" and show_counts = false
)
}
string should_have_parameters(PyFunctionObject f, string name, ClassObject owner) {
string should_have_parameters(PythonFunctionValue f, string name, ClassValue owner) {
exists(int i | i = argument_count(f, name, owner) |
result = i.toString()
)
@@ -173,7 +173,7 @@ string should_have_parameters(PyFunctionObject f, string name, ClassObject owner
owner.declaredAttribute(name) = f and (name = "__get__" or name = "__pow__") and result = "2 or 3"
}
string has_parameters(PyFunctionObject f) {
string has_parameters(PythonFunctionValue f) {
exists(int i | i = f.minParameters() |
i = 0 and result = "no parameters"
or
@@ -183,7 +183,7 @@ string has_parameters(PyFunctionObject f) {
)
}
from PyFunctionObject f, string message, string sizes, boolean show_counts, string name, ClassObject owner
from PythonFunctionValue f, string message, string sizes, boolean show_counts, string name, ClassValue owner
where
(
incorrect_special_method_defn(f, message, show_counts, name, owner)

View File

@@ -23,12 +23,12 @@ predicate is_used(Call c) {
)
}
from Call c, FunctionObject func
from Call c, FunctionValue func
where
/* Call result is used, but callee is a procedure */
is_used(c) and c.getFunc().refersTo(func) and func.getFunction().isProcedure() and
is_used(c) and c.getFunc().pointsTo(func) and func.getScope().isProcedure() and
/* All callees are procedures */
forall(FunctionObject callee | c.getFunc().refersTo(callee) | callee.getFunction().isProcedure()) and
forall(FunctionValue callee | c.getFunc().pointsTo(callee) | callee.getScope().isProcedure()) and
/* Mox return objects have an `AndReturn` method */
not useOfMoxInModule(c.getEnclosingModule())
select c, "The result of '$@' is used even though it is always None.", func, func.getQualifiedName()

View File

@@ -26,7 +26,5 @@ where
value = test.(NameConstant).toString()
) and
/* Exclude asserts appearing at the end of a chain of `elif`s */
not exists(If i |
i.getElif().getAnOrelse() = a
)
not exists(If i | i.getElif().getAnOrelse() = a)
select a, "Assert of literal constant " + value + "."

View File

@@ -14,11 +14,14 @@
import python
from Assert a, string b, string non
where a.getTest() instanceof Tuple and
(if exists(((Tuple)a.getTest()).getAnElt()) then
(b = "True" and non = "non-")
else
(b = "False" and non = "")
)
where
a.getTest() instanceof Tuple and
(
if exists(a.getTest().(Tuple).getAnElt())
then (
b = "True" and non = "non-"
) else (
b = "False" and non = ""
)
)
select a, "Assertion of " + non + "empty tuple is always " + b + "."

View File

@@ -15,13 +15,13 @@
import python
from Stmt s, string kind
where
s instanceof Return and kind = "return" and exists(Try t | t.getFinalbody().contains(s))
or
s instanceof Break and kind = "break" and
exists(Try t | t.getFinalbody().contains(s) |
not exists(For loop | loop.contains(s) and t.getFinalbody().contains(loop))
and
not exists(While loop | loop.contains(s) and t.getFinalbody().contains(loop))
)
where
s instanceof Return and kind = "return" and exists(Try t | t.getFinalbody().contains(s))
or
s instanceof Break and
kind = "break" and
exists(Try t | t.getFinalbody().contains(s) |
not exists(For loop | loop.contains(s) and t.getFinalbody().contains(loop)) and
not exists(While loop | loop.contains(s) and t.getFinalbody().contains(loop))
)
select s, "'" + kind + "' in a finally block will swallow any exceptions raised."

View File

@@ -14,19 +14,22 @@
import python
from Expr e, Location l, string kind, string what
where e.isParenthesized() and
not e instanceof Tuple and
(
exists(If i | i.getTest() = e) and kind = "if" and what = "condition"
or
exists(While w | w.getTest() = e) and kind = "while" and what = "condition"
or
exists(Return r | r.getValue() = e) and kind = "return" and what = "value"
or
exists(Assert a | a.getTest() = e and not exists(a.getMsg())) and kind = "assert" and what = "test"
)
and
// These require parentheses
(not e instanceof Yield and not e instanceof YieldFrom and not e instanceof GeneratorExp) and
l = e.getLocation() and l.getStartLine() = l.getEndLine()
where
e.isParenthesized() and
not e instanceof Tuple and
(
exists(If i | i.getTest() = e) and kind = "if" and what = "condition"
or
exists(While w | w.getTest() = e) and kind = "while" and what = "condition"
or
exists(Return r | r.getValue() = e) and kind = "return" and what = "value"
or
exists(Assert a | a.getTest() = e and not exists(a.getMsg())) and
kind = "assert" and
what = "test"
) and
// These require parentheses
(not e instanceof Yield and not e instanceof YieldFrom and not e instanceof GeneratorExp) and
l = e.getLocation() and
l.getStartLine() = l.getEndLine()
select e, "Parenthesized " + what + " in '" + kind + "' statement."

View File

@@ -15,16 +15,15 @@
import python
predicate is_condition(Expr cond) {
exists(If i | i.getTest() = cond) or
exists(IfExp ie | ie.getTest() = cond)
exists(If i | i.getTest() = cond) or
exists(IfExp ie | ie.getTest() = cond)
}
/* Treat certain unmodified builtins as constants as well. */
predicate effective_constant(Name cond) {
exists(GlobalVariable var | var = cond.getVariable() and not exists(NameNode f | f.defines(var)) |
var.getId() = "True" or var.getId() = "False" or var.getId() = "NotImplemented"
var.getId() = "True" or var.getId() = "False" or var.getId() = "NotImplemented"
)
}
@@ -34,9 +33,10 @@ predicate test_makes_code_unreachable(Expr cond) {
exists(While w | w.getTest() = cond and w.getStmt(0).isUnreachable())
}
from Expr cond
where is_condition(cond) and (cond.isConstant() or effective_constant(cond)) and
/* Ignore cases where test makes code unreachable, as that is handled in different query */
not test_makes_code_unreachable(cond)
where
is_condition(cond) and
(cond.isConstant() or effective_constant(cond)) and
/* Ignore cases where test makes code unreachable, as that is handled in different query */
not test_makes_code_unreachable(cond)
select cond, "Testing a constant will always give the same result."

View File

@@ -9,7 +9,9 @@
* @precision medium
* @id py/missing-docstring
*/
/* NOTE: precision of 'medium' reflects the lack of precision in the underlying rule.
/*
* NOTE: precision of 'medium' reflects the lack of precision in the underlying rule.
* Do we care whether a function has a docstring? That often depends on the reader of that docstring.
*/
@@ -18,25 +20,26 @@ import python
predicate needs_docstring(Scope s) {
s.isPublic() and
(
not s instanceof Function
or
function_needs_docstring(s)
not s instanceof Function
or
function_needs_docstring(s)
)
}
predicate function_needs_docstring(Function f) {
not exists(FunctionObject fo, FunctionObject base | fo.overrides(base) and fo.getFunction() = f |
not function_needs_docstring(base.getFunction())) and
not exists(FunctionValue fo, FunctionValue base | fo.overrides(base) and fo.getScope() = f |
not function_needs_docstring(base.getScope())
) and
f.getName() != "lambda" and
(f.getMetrics().getNumberOfLinesOfCode() - count(f.getADecorator())) > 2
and not exists(PythonPropertyObject p |
(f.getMetrics().getNumberOfLinesOfCode() - count(f.getADecorator())) > 2 and
not exists(PythonPropertyObject p |
p.getGetter().getFunction() = f or
p.getSetter().getFunction() = f
)
}
string scope_type(Scope s) {
result = "Module" and s instanceof Module and not ((Module)s).isPackage()
result = "Module" and s instanceof Module and not s.(Module).isPackage()
or
result = "Class" and s instanceof Class
or
@@ -46,5 +49,3 @@ string scope_type(Scope s) {
from Scope s
where needs_docstring(s) and not exists(s.getDocString())
select s, scope_type(s) + " " + s.getName() + " does not have a docstring"

View File

@@ -19,9 +19,9 @@ string message() {
}
predicate exec_function_call(Call c) {
exists(GlobalVariable exec | exec = ((Name)c.getFunc()).getVariable() and exec.getId() = "exec")
exists(GlobalVariable exec | exec = c.getFunc().(Name).getVariable() and exec.getId() = "exec")
}
from AstNode exec
where exec_function_call(exec) or exec instanceof Exec
select exec, message()
select exec, message()

View File

@@ -13,18 +13,20 @@
import python
predicate is_a_string_type(ClassObject seqtype) {
seqtype = theBytesType() and major_version() = 2
or
seqtype = theUnicodeType()
predicate has_string_type(Value v) {
v.getClass() = ClassValue::str()
or
v.getClass() = ClassValue::unicode() and major_version() = 2
}
from For loop, ControlFlowNode iter, Object str, Object seq, ControlFlowNode seq_origin, ClassObject strtype, ClassObject seqtype, ControlFlowNode str_origin
where loop.getIter().getAFlowNode() = iter and
iter.refersTo(str, strtype, str_origin) and
iter.refersTo(seq, seqtype, seq_origin) and
is_a_string_type(strtype) and
seqtype.isIterable() and
not is_a_string_type(seqtype)
select loop, "Iteration over $@, of class " + seqtype.getName() + ", may also iterate over $@.", seq_origin, "sequence", str_origin, "string"
from
For loop, ControlFlowNode iter, Value str, Value seq, ControlFlowNode seq_origin, ControlFlowNode str_origin
where
loop.getIter().getAFlowNode() = iter and
iter.pointsTo(str, str_origin) and
iter.pointsTo(seq, seq_origin) and
has_string_type(str) and
seq.getClass().isIterable() and
not has_string_type(seq)
select loop, "Iteration over $@, of class " + seq.getClass().getName() + ", may also iterate over $@.",
seq_origin, "sequence", str_origin, "string"

View File

@@ -14,18 +14,19 @@
import python
private int len(ExprList el) {
result = count(el.getAnItem())
}
private int len(ExprList el) { result = count(el.getAnItem()) }
predicate mismatched(Assign a, int lcount, int rcount, Location loc, string sequenceType) {
exists(ExprList l, ExprList r |
(a.getATarget().(Tuple).getElts() = l or
a.getATarget().(List).getElts() = l)
and
((a.getValue().(Tuple).getElts() = r and sequenceType = "tuple") or
(a.getValue().(List).getElts() = r and sequenceType = "list"))
and
(
a.getATarget().(Tuple).getElts() = l or
a.getATarget().(List).getElts() = l
) and
(
a.getValue().(Tuple).getElts() = r and sequenceType = "tuple"
or
a.getValue().(List).getElts() = r and sequenceType = "list"
) and
loc = a.getValue().getLocation() and
lcount = len(l) and
rcount = len(r) and
@@ -35,24 +36,26 @@ predicate mismatched(Assign a, int lcount, int rcount, Location loc, string sequ
}
predicate mismatched_tuple_rhs(Assign a, int lcount, int rcount, Location loc) {
exists(ExprList l, TupleObject r, AstNode origin |
(a.getATarget().(Tuple).getElts() = l or
a.getATarget().(List).getElts() = l)
and
a.getValue().refersTo(r, origin) and
exists(ExprList l, TupleValue r, AstNode origin |
(
a.getATarget().(Tuple).getElts() = l or
a.getATarget().(List).getElts() = l
) and
a.getValue().pointsTo(r, origin) and
loc = origin.getLocation() and
lcount = len(l) and
rcount = r.getLength() and
rcount = r.length() and
lcount != rcount and
not exists(Starred s | l.getAnItem() = s)
)
}
from Assign a, int lcount, int rcount, Location loc, string sequenceType
where
mismatched(a, lcount, rcount, loc, sequenceType)
or
mismatched_tuple_rhs(a, lcount, rcount, loc) and
sequenceType = "tuple"
select a, "Left hand side of assignment contains " + lcount + " variables, but right hand side is a $@ of length " + rcount + "." , loc, sequenceType
select a,
"Left hand side of assignment contains " + lcount +
" variables, but right hand side is a $@ of length " + rcount + ".", loc, sequenceType

View File

@@ -12,24 +12,18 @@
import python
Object aFunctionLocalsObject() {
exists(Call c, Name n, GlobalVariable v |
c = result.getOrigin() and
n = c.getFunc() and
n.getVariable() = v and
v.getId() = "locals" and
c.getScope() instanceof FastLocalsFunction
)
predicate originIsLocals(ControlFlowNode n) {
n.pointsTo(_, _, Value::named("locals").getACall())
}
predicate modification_of_locals(ControlFlowNode f) {
f.(SubscriptNode).getObject().refersTo(aFunctionLocalsObject()) and (f.isStore() or f.isDelete())
originIsLocals(f.(SubscriptNode).getObject()) and
(f.isStore() or f.isDelete())
or
exists(string mname, AttrNode attr |
attr = f.(CallNode).getFunction() and
attr.getObject(mname).refersTo(aFunctionLocalsObject(), _) |
originIsLocals(attr.getObject(mname))
|
mname = "pop" or
mname = "popitem" or
mname = "update" or
@@ -39,5 +33,4 @@ predicate modification_of_locals(ControlFlowNode f) {
from AstNode a, ControlFlowNode f
where modification_of_locals(f) and a = f.getNode()
select a, "Modification of the locals() dictionary will have no effect on the local variables."

View File

@@ -10,20 +10,21 @@
* @precision very-high
* @id py/nested-loops-with-same-variable
*/
import python
predicate loop_variable(For f, Variable v) {
f.getTarget().defines(v)
}
predicate loop_variable(For f, Variable v) { f.getTarget().defines(v) }
predicate variableUsedInNestedLoops(For inner, For outer, Variable v) {
/* Only treat loops in body as inner loops. Loops in the else clause are ignored. */
outer.getBody().contains(inner) and loop_variable(inner, v) and loop_variable(outer, v)
outer.getBody().contains(inner) and
loop_variable(inner, v) and
loop_variable(outer, v) and
/* Ignore cases where there is no use of the variable or the only use is in the inner loop */
and exists(Name n | n.uses(v) and outer.contains(n) and not inner.contains(n))
exists(Name n | n.uses(v) and outer.contains(n) and not inner.contains(n))
}
from For inner, For outer, Variable v
where variableUsedInNestedLoops(inner, outer, v)
select inner, "Nested for statement uses loop variable '" + v.getId() + "' of enclosing $@.",
outer, "for statement"
select inner, "Nested for statement uses loop variable '" + v.getId() + "' of enclosing $@.", outer,
"for statement"

View File

@@ -19,18 +19,18 @@ predicate loop_variable_ssa(For f, Variable v, SsaVariable s) {
predicate variableUsedInNestedLoops(For inner, For outer, Variable v, Name n) {
/* Ignore cases where there is no use of the variable or the only use is in the inner loop. */
outer.contains(n)
and not inner.contains(n)
outer.contains(n) and
not inner.contains(n) and
/* Only treat loops in body as inner loops. Loops in the else clause are ignored. */
and outer.getBody().contains(inner)
and exists(SsaVariable s |
loop_variable_ssa(inner, v, s.getAnUltimateDefinition())
and loop_variable_ssa(outer, v, _)
and s.getAUse().getNode() = n
outer.getBody().contains(inner) and
exists(SsaVariable s |
loop_variable_ssa(inner, v, s.getAnUltimateDefinition()) and
loop_variable_ssa(outer, v, _) and
s.getAUse().getNode() = n
)
}
from For inner, For outer, Variable v, Name n
where variableUsedInNestedLoops(inner, outer, v, n)
select inner, "Nested for statement $@ loop variable '" + v.getId() + "' of enclosing $@.", n, "uses",
outer, "for statement"
select inner, "Nested for statement $@ loop variable '" + v.getId() + "' of enclosing $@.", n,
"uses", outer, "for statement"

View File

@@ -17,7 +17,7 @@ for addressing the defect. </p>
</recommendation>
<example>
<p>
In this example, the loop may attempt to iterate over <code>None</code>, which is not an iterator.
In this example, the loop may attempt to iterate over <code>None</code>, which is not an iterable.
It is likely that the programmer forgot to test for <code>None</code> before the loop.
</p>
<sample src="NonIteratorInForLoop.py" />

View File

@@ -14,10 +14,12 @@
import python
from For loop, ControlFlowNode iter, Value v, ClassValue t, ControlFlowNode origin
where loop.getIter().getAFlowNode() = iter and
iter.pointsTo(_, v, origin) and v.getClass() = t and
not t.isIterable() and not t.failedInference(_) and
not v = Value::named("None") and
not t.isDescriptorType()
select loop, "$@ of class '$@' may be used in for-loop.", origin, "Non-iterator", t, t.getName()
where
loop.getIter().getAFlowNode() = iter and
iter.pointsTo(_, v, origin) and
v.getClass() = t and
not t.isIterable() and
not t.failedInference(_) and
not v = Value::named("None") and
not t.isDescriptorType()
select loop, "$@ of class '$@' may be used in for-loop.", origin, "Non-iterable", t, t.getName()

View File

@@ -12,8 +12,8 @@
*/
import python
predicate assignment(AssignStmt a, Expr left, Expr right)
{
predicate assignment(AssignStmt a, Expr left, Expr right) {
a.getATarget() = left and a.getValue() = right
}
@@ -23,7 +23,8 @@ predicate corresponding(Expr left, Expr right) {
exists(Attribute la, Attribute ra |
corresponding(la, ra) and
left = la.getObject() and
right = ra.getObject())
right = ra.getObject()
)
}
predicate same_value(Expr left, Expr right) {
@@ -33,34 +34,41 @@ predicate same_value(Expr left, Expr right) {
}
predicate maybe_defined_in_outer_scope(Name n) {
exists(SsaVariable v | v.getAUse().getNode() = n |
v.maybeUndefined()
)
exists(SsaVariable v | v.getAUse().getNode() = n | v.maybeUndefined())
}
Variable relevant_var(Name n) {
n.getVariable() = result and
(corresponding(n, _) or corresponding(_, n))
/* Protection against FPs in projects that offer compatibility between Python 2 and 3,
* since many of them make assignments such as
*
* if PY2:
* bytes = str
* else:
* bytes = bytes
*
*/
predicate isBuiltin(string name) {
exists(Value v | v = Value::named(name) and v.isBuiltin())
}
predicate same_name(Name n1, Name n2) {
corresponding(n1, n2) and
relevant_var(n1) = relevant_var(n2) and
not exists(Object::builtin(n1.getId())) and
n1.getVariable() = n2.getVariable() and
not isBuiltin(n1.getId()) and
not maybe_defined_in_outer_scope(n2)
}
ClassObject value_type(Attribute a) {
a.getObject().refersTo(_, result, _)
}
ClassValue value_type(Attribute a) { a.getObject().pointsTo().getClass() = result }
predicate is_property_access(Attribute a) {
value_type(a).lookupAttribute(a.getName()) instanceof PropertyObject
value_type(a).lookup(a.getName()) instanceof PropertyValue
}
predicate same_attribute(Attribute a1, Attribute a2) {
corresponding(a1, a2) and a1.getName() = a2.getName() and same_value(a1.getObject(), a2.getObject()) and
exists(value_type(a1)) and not is_property_access(a1)
corresponding(a1, a2) and
a1.getName() = a2.getName() and
same_value(a1.getObject(), a2.getObject()) and
exists(value_type(a1)) and
not is_property_access(a1)
}
int pyflakes_commented_line(File file) {
@@ -72,21 +80,25 @@ int pyflakes_commented_line(File file) {
predicate pyflakes_commented(AssignStmt assignment) {
exists(Location loc |
assignment.getLocation() = loc and
loc.getStartLine() = pyflakes_commented_line(loc.getFile()))
loc.getStartLine() = pyflakes_commented_line(loc.getFile())
)
}
predicate side_effecting_lhs(Attribute lhs) {
exists(ClassObject cls, ClassObject decl |
lhs.getObject().refersTo(_, cls, _) and
decl = cls.getAnImproperSuperType() and
not decl.isBuiltin() |
exists(ClassValue cls, ClassValue decl |
lhs.getObject().pointsTo().getClass() = cls and
decl = cls.getASuperType() and
not decl.isBuiltin()
|
decl.declaresAttribute("__setattr__")
)
}
from AssignStmt a, Expr left, Expr right
where assignment(a, left, right)
and same_value(left, right)
and not pyflakes_commented(a) and
not side_effecting_lhs(left)
where
assignment(a, left, right) and
same_value(left, right) and
// some people use self-assignment to shut Pyflakes up, such as `ok = ok # Pyflakes`
not pyflakes_commented(a) and
not side_effecting_lhs(left)
select a, "This assignment assigns a variable to itself."

View File

@@ -14,12 +14,12 @@ import python
from AstNode node, string kind
where
not node.getScope() instanceof Function and
(
node instanceof Return and kind = "return"
or
node instanceof Yield and kind = "yield"
or
node instanceof YieldFrom and kind = "yield from"
)
not node.getScope() instanceof Function and
(
node instanceof Return and kind = "return"
or
node instanceof Yield and kind = "yield"
or
node instanceof YieldFrom and kind = "yield from"
)
select node, "'" + kind + "' is used outside a function."

View File

@@ -14,24 +14,26 @@
import python
predicate calls_close(Call c) { exists(Attribute a | c.getFunc() = a and a.getName() = "close") }
predicate calls_close(Call c) {
exists (Attribute a | c.getFunc() = a and a.getName() = "close")
predicate only_stmt_in_finally(Try t, Call c) {
exists(ExprStmt s |
t.getAFinalstmt() = s and s.getValue() = c and strictcount(t.getAFinalstmt()) = 1
)
}
predicate
only_stmt_in_finally(Try t, Call c) {
exists(ExprStmt s | t.getAFinalstmt() = s and s.getValue() = c and strictcount(t.getAFinalstmt()) = 1)
predicate points_to_context_manager(ControlFlowNode f, ClassValue cls) {
forex(Value v | f.pointsTo(v) | v.getClass() = cls) and
cls.isContextManager()
}
predicate points_to_context_manager(ControlFlowNode f, ClassObject cls) {
cls.isContextManager() and
forex(Object obj | f.refersTo(obj) | f.refersTo(obj, cls, _))
}
from Call close, Try t, ClassObject cls
where only_stmt_in_finally(t, close) and calls_close(close) and
exists(ControlFlowNode f | f = close.getFunc().getAFlowNode().(AttrNode).getObject() |
points_to_context_manager(f, cls))
select close, "Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement.", cls, cls.getName()
from Call close, Try t, ClassValue cls
where
only_stmt_in_finally(t, close) and
calls_close(close) and
exists(ControlFlowNode f | f = close.getFunc().getAFlowNode().(AttrNode).getObject() |
points_to_context_manager(f, cls)
)
select close,
"Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement.",
cls, cls.getName()

View File

@@ -1 +1 @@
assert(subprocess.call(['run-backup']) == 0)
assert subprocess.call(['run-backup']) == 0

View File

@@ -14,22 +14,37 @@
import python
predicate func_with_side_effects(Expr e) {
exists(string name |
name = ((Attribute)e).getName() or name = ((Name)e).getId() |
name = "print" or name = "write" or name = "append" or
name = "pop" or name = "remove" or name = "discard" or
name = "delete" or name = "close" or name = "open" or
exists(string name | name = e.(Attribute).getName() or name = e.(Name).getId() |
name = "print" or
name = "write" or
name = "append" or
name = "pop" or
name = "remove" or
name = "discard" or
name = "delete" or
name = "close" or
name = "open" or
name = "exit"
)
}
predicate call_with_side_effect(Call e) {
e.getAFlowNode() = Value::named("subprocess.call").getACall()
or
e.getAFlowNode() = Value::named("subprocess.check_call").getACall()
or
e.getAFlowNode() = Value::named("subprocess.check_output").getACall()
}
predicate probable_side_effect(Expr e) {
// Only consider explicit yields, not artificial ones in comprehensions
e instanceof Yield and not exists(Comp c | c.contains(e))
or
e instanceof YieldFrom
or
e instanceof Call and func_with_side_effects(((Call)e).getFunc())
e instanceof Call and func_with_side_effects(e.(Call).getFunc())
or
e instanceof Call and call_with_side_effect(e)
}
from Assert a, Expr e

View File

@@ -13,73 +13,75 @@
import python
predicate understood_attribute(Attribute attr, ClassObject cls, ClassObject attr_cls) {
exists(string name |
attr.getName() = name |
attr.getObject().refersTo(_, cls, _) and
cls.attributeRefersTo(name, _, attr_cls, _)
predicate understood_attribute(Attribute attr, ClassValue cls, ClassValue attr_cls) {
exists(string name | attr.getName() = name |
attr.getObject().pointsTo().getClass() = cls and
cls.attr(name).getClass() = attr_cls
)
}
/* Conservative estimate of whether attribute lookup has a side effect */
predicate side_effecting_attribute(Attribute attr) {
exists(ClassObject cls, ClassObject attr_cls |
exists(ClassValue cls, ClassValue attr_cls |
understood_attribute(attr, cls, attr_cls) and
side_effecting_descriptor_type(attr_cls)
)
}
predicate maybe_side_effecting_attribute(Attribute attr) {
not understood_attribute(attr, _, _) and not attr.refersTo(_)
not understood_attribute(attr, _, _) and not attr.pointsTo(_)
or
side_effecting_attribute(attr)
}
predicate side_effecting_descriptor_type(ClassObject descriptor) {
predicate side_effecting_descriptor_type(ClassValue descriptor) {
descriptor.isDescriptorType() and
/* Technically all descriptor gets have side effects,
* but some are indicative of a missing call and
* we want to treat them as having no effect. */
not descriptor = thePyFunctionType() and
not descriptor = theStaticMethodType() and
not descriptor = theClassMethodType()
// Technically all descriptor gets have side effects,
// but some are indicative of a missing call and
// we want to treat them as having no effect.
not descriptor = ClassValue::functionType() and
not descriptor = ClassValue::staticmethod() and
not descriptor = ClassValue::classmethod()
}
/** Side effecting binary operators are rare, so we assume they are not
/**
* Side effecting binary operators are rare, so we assume they are not
* side-effecting unless we know otherwise.
*/
predicate side_effecting_binary(Expr b) {
exists(Expr sub, ClassObject cls, string method_name |
exists(Expr sub, ClassValue cls, string method_name |
binary_operator_special_method(b, sub, cls, method_name)
or
comparison_special_method(b, sub, cls, method_name)
|
|
method_name = special_method() and
cls.hasAttribute(method_name)
and
not exists(ClassObject declaring |
declaring.declaresAttribute(method_name)
and declaring = cls.getAnImproperSuperType() and
declaring.isBuiltin() and not declaring = theObjectType()
cls.hasAttribute(method_name) and
not exists(ClassValue declaring |
declaring.declaresAttribute(method_name) and
declaring = cls.getASuperType() and
declaring.isBuiltin() and
not declaring = ClassValue::object()
)
)
}
pragma[nomagic]
private predicate binary_operator_special_method(BinaryExpr b, Expr sub, ClassObject cls, string method_name) {
private predicate binary_operator_special_method(
BinaryExpr b, Expr sub, ClassValue cls, string method_name
) {
method_name = special_method() and
sub = b.getLeft() and
method_name = b.getOp().getSpecialMethodName() and
sub.refersTo(_, cls, _)
sub.pointsTo().getClass() = cls
}
pragma[nomagic]
private predicate comparison_special_method(Compare b, Expr sub, ClassObject cls, string method_name) {
private predicate comparison_special_method(Compare b, Expr sub, ClassValue cls, string method_name) {
exists(Cmpop op |
b.compares(sub, op, _) and
method_name = op.getSpecialMethodName()
) and
sub.refersTo(_, cls, _)
sub.pointsTo().getClass() = cls
}
private string special_method() {
@@ -89,19 +91,16 @@ private string special_method() {
}
predicate is_notebook(File f) {
exists(Comment c |
c.getLocation().getFile() = f |
exists(Comment c | c.getLocation().getFile() = f |
c.getText().regexpMatch("#\\s*<nbformat>.+</nbformat>\\s*")
)
}
/** Expression (statement) in a jupyter/ipython notebook */
predicate in_notebook(Expr e) {
is_notebook(e.getScope().(Module).getFile())
}
predicate in_notebook(Expr e) { is_notebook(e.getScope().(Module).getFile()) }
FunctionObject assertRaises() {
result = ModuleObject::named("unittest").attr("TestCase").(ClassObject).lookupAttribute("assertRaises")
FunctionValue assertRaises() {
result = Value::named("unittest.TestCase").(ClassValue).lookup("assertRaises")
}
/** Holds if expression `e` is in a `with` block that tests for exceptions being raised. */
@@ -121,14 +120,11 @@ predicate python2_print(Expr e) {
}
predicate no_effect(Expr e) {
// strings can be used as comments
not e instanceof StrConst and
not ((StrConst)e).isDocString() and
not e.hasSideEffects() and
forall(Expr sub |
sub = e.getASubExpression*()
|
not side_effecting_binary(sub)
and
forall(Expr sub | sub = e.getASubExpression*() |
not side_effecting_binary(sub) and
not maybe_side_effecting_attribute(sub)
) and
not in_notebook(e) and
@@ -139,4 +135,3 @@ predicate no_effect(Expr e) {
from ExprStmt stmt
where no_effect(stmt.getValue())
select stmt, "This statement has no effect."

View File

@@ -3,13 +3,13 @@
"qhelp.dtd">
<qhelp>
<overview>
<p>If you concatenate strings in a loop then the time taken by the loop is quadratic in the number
<p>If you concatenate strings in a loop then the time taken by the loop is quadratic in the number
of iterations.</p>
</overview>
<recommendation>
<p>Initialize an empty list before the start of the list.
<p>Initialize an empty list before the start of the loop.
During the loop append the substrings to the list.
At the end of the loop, convert the list to a string by using <code>''.join(list)</code>.</p>

View File

@@ -13,17 +13,16 @@
import python
predicate string_concat_in_loop(BinaryExpr b) {
b.getOp() instanceof Add
and
exists(SsaVariable d, SsaVariable u, BinaryExprNode add, ClassObject str_type |
add.getNode() = b and d = u.getAnUltimateDefinition() |
d.getDefinition().(DefinitionNode).getValue() = add and u.getAUse() = add.getAnOperand() and
add.getAnOperand().refersTo(_, str_type, _) and
(str_type = theBytesType() or str_type = theUnicodeType())
b.getOp() instanceof Add and
exists(SsaVariable d, SsaVariable u, BinaryExprNode add |
add.getNode() = b and d = u.getAnUltimateDefinition()
|
d.getDefinition().(DefinitionNode).getValue() = add and
u.getAUse() = add.getAnOperand() and
add.getAnOperand().pointsTo().getClass() = ClassValue::str()
)
}
from BinaryExpr b, Stmt s
where string_concat_in_loop(b) and s.getASubExpression() = b
select s, "String concatenation in a loop is quadratic in the number of iterations."

View File

@@ -13,10 +13,10 @@
import python
predicate main_eq_name(If i) {
exists(Name n, StrConst m, Compare c |
i.getTest() = c and c.getLeft() = n and
i.getTest() = c and
c.getLeft() = n and
c.getAComparator() = m and
n.getId() = "__name__" and
m.getText() = "__main__"
@@ -24,12 +24,17 @@ predicate main_eq_name(If i) {
}
predicate is_print_stmt(Stmt s) {
s instanceof Print or
exists(ExprStmt e, Call c, Name n | e = s and c = e.getValue() and n = c.getFunc() and n.getId() = "print")
s instanceof Print
or
exists(ExprStmt e, Call c, Name n |
e = s and c = e.getValue() and n = c.getFunc() and n.getId() = "print"
)
}
from Stmt p
where is_print_stmt(p) and
exists(ModuleObject m | m.getModule() = p.getScope() and m.getKind() = "module") and
not exists(If i | main_eq_name(i) and i.getASubStatement().getASubStatement*() = p)
where
is_print_stmt(p) and
// TODO: Need to discuss how we would like to handle ModuleObject.getKind in the glorious future
exists(ModuleValue m | m.getScope() = p.getScope() and m.isUsedAsModule()) and
not exists(If i | main_eq_name(i) and i.getASubStatement().getASubStatement*() = p)
select p, "Print statement may execute during import."

View File

@@ -12,7 +12,6 @@
* @id py/unnecessary-delete
*/
import python
from Delete del, Expr e, Function f
@@ -23,11 +22,11 @@ where
not e instanceof Subscript and
not e instanceof Attribute and
not exists(Stmt s | s.(While).contains(del) or s.(For).contains(del)) and
/* False positive: calling `sys.exc_info` within a function results in a
reference cycle,and an explicit call to `del` helps break this cycle. */
not exists(FunctionObject ex |
ex.hasLongName("sys.exc_info") and
// False positive: calling `sys.exc_info` within a function results in a
// reference cycle, and an explicit call to `del` helps break this cycle.
not exists(FunctionValue ex |
ex = Value::named("sys.exc_info") and
ex.getACall().getScope() = f
)
select del, "Unnecessary deletion of local variable $@ in function $@.",
e.getLocation(), e.toString(), f.getLocation(), f.getName()
select del, "Unnecessary deletion of local variable $@ in function $@.", e.getLocation(),
e.toString(), f.getLocation(), f.getName()

View File

@@ -14,9 +14,11 @@ import python
from Stmt loop, StmtList body, StmtList clause, string kind
where
(exists(For f | f = loop | clause = f.getOrelse() and body = f.getBody() and kind = "for")
or
exists(While w | w = loop | clause = w.getOrelse() and body = w.getBody() and kind = "while")
)
and not exists(Break b | body.contains(b))
select loop, "This '" + kind + "' statement has a redundant 'else' as no 'break' is present in the body."
(
exists(For f | f = loop | clause = f.getOrelse() and body = f.getBody() and kind = "for")
or
exists(While w | w = loop | clause = w.getOrelse() and body = w.getBody() and kind = "while")
) and
not exists(Break b | body.contains(b))
select loop,
"This '" + kind + "' statement has a redundant 'else' as no 'break' is present in the body."

View File

@@ -13,21 +13,20 @@
import python
predicate is_doc_string(ExprStmt s) {
s.getValue() instanceof Unicode or s.getValue() instanceof Bytes
s.getValue() instanceof Unicode or s.getValue() instanceof Bytes
}
predicate has_doc_string(StmtList stmts) {
stmts.getParent() instanceof Scope
and
stmts.getParent() instanceof Scope and
is_doc_string(stmts.getItem(0))
}
from Pass p, StmtList list
where list.getAnItem() = p and
(
strictcount(list.getAnItem()) = 2 and not has_doc_string(list)
or
strictcount(list.getAnItem()) > 2
)
where
list.getAnItem() = p and
(
strictcount(list.getAnItem()) = 2 and not has_doc_string(list)
or
strictcount(list.getAnItem()) > 2
)
select p, "Unnecessary 'pass' statement."

View File

@@ -12,8 +12,9 @@
import python
from Call call, ClassObject ex
where call.getFunc().refersTo(ex) and ex.getAnImproperSuperType() = theExceptionType()
and exists(ExprStmt s | s.getValue() = call)
from Call call, ClassValue ex
where
call.getFunc().pointsTo(ex) and
ex.getASuperType() = ClassValue::exception() and
exists(ExprStmt s | s.getValue() = call)
select call, "Instantiating an exception, but not raising it, has no effect"

View File

@@ -12,5 +12,7 @@
import python
from CallNode call, string name
where call.getFunction().refersTo(Object::quitter(name))
select call, "The '" + name + "' site.Quitter object may not exist if the 'site' module is not loaded or is modified."
where call.getFunction().pointsTo(Value::siteQuitter(name))
select call,
"The '" + name +
"' site.Quitter object may not exist if the 'site' module is not loaded or is modified."

View File

@@ -5,7 +5,7 @@ predicate monkey_patched_builtin(string name) {
subscr.isStore() and
subscr.getIndex().getNode() = s and
s.getText() = name and
subscr.getValue() = attr and
subscr.getObject() = attr and
attr.getObject("__dict__").pointsTo(Module::builtinModule())
)
or

View File

@@ -0,0 +1 @@
This directory contains [experimental](../../../../docs/experimental.md) CodeQL queries and libraries.

View File

@@ -910,6 +910,10 @@ private AstNode assigned_value(Expr lhs) {
predicate nested_sequence_assign(Expr left_parent, Expr right_parent,
Expr left_result, Expr right_result) {
exists(Assign a |
a.getATarget().getASubExpression*() = left_parent and
a.getValue().getASubExpression*() = right_parent
) and
exists(int i, Expr left_elem, Expr right_elem
|
(

View File

@@ -726,6 +726,8 @@ private class EssaTaintTracking extends string {
private TaintKind iterable_unpacking_descent(
SequenceNode left_parent, ControlFlowNode left_defn, CollectionKind parent_kind
) {
//TODO: Fix the cartesian product in this predicate
none() and
left_parent.getAnElement() = left_defn and
// Handle `a, *b = some_iterable`
if left_defn instanceof StarredNode

View File

@@ -286,14 +286,14 @@ module DictKind {
pragma[noinline]
private predicate subscript_index(ControlFlowNode obj, SubscriptNode sub) {
sub.isLoad() and
sub.getValue() = obj and
sub.getObject() = obj and
not sub.getNode().getIndex() instanceof Slice
}
pragma[noinline]
private predicate subscript_slice(ControlFlowNode obj, SubscriptNode sub) {
sub.isLoad() and
sub.getValue() = obj and
sub.getObject() = obj and
sub.getNode().getIndex() instanceof Slice
}

View File

@@ -20,13 +20,39 @@ class PropertyInternal extends ObjectInternal, TProperty {
this = TProperty(_, _, result)
}
private CallNode getCallNode() { this = TProperty(result, _, _) }
/** Gets the setter function of this property */
CallableObjectInternal getSetter() {
// @x.setter
exists(CallNode call, AttrNode setter |
call.getFunction() = setter and
call.getFunction() = setter and
PointsToInternal::pointsTo(setter.getObject("setter"), this.getContext(), this, _) and
PointsToInternal::pointsTo(call.getArg(0), this.getContext(), result, _)
)
or
// x = property(getter, setter, deleter)
exists(ControlFlowNode setter_arg |
setter_arg = getCallNode().getArg(1) or setter_arg = getCallNode().getArgByName("fset")
|
PointsToInternal::pointsTo(setter_arg, this.getContext(), result, _)
)
}
/** Gets the setter function of this property */
CallableObjectInternal getDeleter() {
exists(CallNode call, AttrNode setter |
call.getFunction() = setter and
PointsToInternal::pointsTo(setter.getObject("deleter"), this.getContext(), this, _) and
PointsToInternal::pointsTo(call.getArg(0), this.getContext(), result, _)
)
or
// x = property(getter, setter, deleter)
exists(ControlFlowNode deleter_arg |
deleter_arg = getCallNode().getArg(2) or deleter_arg = getCallNode().getArgByName("fdel")
|
PointsToInternal::pointsTo(deleter_arg, this.getContext(), result, _)
)
}
private Context getContext() { this = TProperty(_,result, _) }

View File

@@ -187,6 +187,37 @@ class ModuleValue extends Value {
result.importedAs(this.getScope().getAnImportedModuleName())
}
/** When used as a normal module (for example, imported and used by other modules) */
predicate isUsedAsModule() {
this.isBuiltin()
or
this.isPackage()
or
exists(ImportingStmt i | this.importedAs(i.getAnImportedModuleName()))
or
this.getPath().getBaseName() = "__init__.py"
}
/** When used (exclusively) as a script (will not include normal modules that can also be run as a script) */
predicate isUsedAsScript() {
not isUsedAsModule() and
(
not this.getPath().getExtension() = "py"
or
exists(If i, Name name, StrConst main, Cmpop op |
i.getScope() = this.getScope() and
op instanceof Eq and
i.getTest().(Compare).compares(name, op, main) and
name.getId() = "__name__" and main.getText() = "__main__"
)
or
exists(Comment c |
c.getLocation().getFile() = this.getPath() and
c.getLocation().getStartLine() = 1 and
c.getText().regexpMatch("^#!/.*python(2|3)?[ \\\\t]*$")
)
)
}
}
module Module {
@@ -301,6 +332,19 @@ module Value {
result = ObjectInternal::none_()
}
/**
* Shorcuts added by the `site` module to exit your interactive session.
*
* see https://docs.python.org/3/library/constants.html#constants-added-by-the-site-module
*/
Value siteQuitter(string name) {
(
name = "exit"
or
name = "quit"
) and
result = Value::named(name)
}
}
/** Class representing callables in the Python program
@@ -412,12 +456,57 @@ class ClassValue extends Value {
or
this.hasAttribute("__getitem__")
}
/** Holds if this class is a container(). That is, does it have a __getitem__ method.*/
predicate isContainer() {
exists(this.lookup("__getitem__"))
}
/** Holds if this class is probably a sequence. */
predicate isSequence() {
/* To determine whether something is a sequence or a mapping is not entirely clear,
* so we need to guess a bit.
*/
this.getASuperType() = ClassValue::tuple()
or
this.getASuperType() = ClassValue::list()
or
this.getASuperType() = ClassValue::range()
or
this.getASuperType() = ClassValue::bytes()
or
this.getASuperType() = ClassValue::unicode()
or
major_version() = 2 and this.getASuperType() = Value::named("collections.Sequence")
or
major_version() = 3 and this.getASuperType() = Value::named("collections.abc.Sequence")
or
/* Does it have an index or __reversed__ method? */
this.isContainer() and
(
this.hasAttribute("index") or
this.hasAttribute("__reversed__")
)
}
/** Holds if this class is a mapping. */
predicate isMapping() {
this.hasAttribute("__getitem__")
and
not this.isSequence()
}
/** Holds if this class is a descriptor. */
predicate isDescriptorType() {
this.hasAttribute("__get__")
}
/** Holds if this class is a context manager. */
predicate isContextManager() {
this.hasAttribute("__enter__") and
this.hasAttribute("__exit__")
}
/** Gets the qualified name for this class.
* Should return the same name as the `__qualname__` attribute on classes in Python 3.
*/
@@ -482,6 +571,16 @@ class ClassValue extends Value {
this.(ClassObjectInternal).getClassDeclaration().declaresAttribute(name)
}
/** Whether this class is a legal exception class.
* What constitutes a legal exception class differs between major versions */
predicate isLegalExceptionType() {
not this.isNewStyle()
or
this.getASuperType() = ClassValue::baseException()
or
major_version() = 2 and this = ClassValue::tuple()
}
}
@@ -505,6 +604,28 @@ abstract class FunctionValue extends CallableValue {
predicate isOverriddenMethod() {
exists(Value f | f.overrides(this))
}
/** Whether `name` is a legal argument name for this function */
bindingset[name]
predicate isLegalArgumentName(string name) {
this.getScope().getAnArg().asName().getId() = name
or
this.getScope().getAKeywordOnlyArg().getId() = name
or
this.getScope().hasKwArg()
}
/** Whether this is a "normal" method, that is, it is exists as a class attribute
* which is not a lambda and not the __new__ method. */
predicate isNormalMethod() {
exists(ClassValue cls, string name |
cls.declaredAttribute(name) = this and
name != "__new__" and
exists(Expr expr, AstNode origin | expr.pointsTo(this, origin) |
not origin instanceof Lambda
)
)
}
}
/** Class representing Python functions */
@@ -650,6 +771,34 @@ class NumericValue extends Value {
}
}
/** A Python property:
* @property
* def f():
* ....
*
* https://docs.python.org/3/howto/descriptor.html#properties
* https://docs.python.org/3/library/functions.html#property
*/
class PropertyValue extends Value {
PropertyValue() {
this instanceof PropertyInternal
}
CallableValue getGetter(){
result = this.(PropertyInternal).getGetter()
}
CallableValue getSetter(){
result = this.(PropertyInternal).getSetter()
}
CallableValue getDeleter(){
result = this.(PropertyInternal).getDeleter()
}
}
/** A method-resolution-order sequence of classes */
class MRO extends TClassList {
@@ -698,34 +847,34 @@ module ClassValue {
ClassValue bool() {
result = TBuiltinClassObject(Builtin::special("bool"))
}
/** Get the `ClassValue` for the `tuple` class. */
ClassValue tuple() {
result = TBuiltinClassObject(Builtin::special("tuple"))
}
/** Get the `ClassValue` for the `list` class. */
ClassValue list() {
result = TBuiltinClassObject(Builtin::special("list"))
}
/** Get the `ClassValue` for `xrange` (Python 2), or `range` (only Python 3) */
ClassValue range() {
major_version() = 2 and result = TBuiltinClassObject(Builtin::special("xrange"))
or
major_version() = 3 and result = TBuiltinClassObject(Builtin::special("range"))
}
/** Get the `ClassValue` for the `dict` class. */
ClassValue dict() {
result = TBuiltinClassObject(Builtin::special("dict"))
}
/** Get the `ClassValue` for the `set` class. */
ClassValue set() {
result = TBuiltinClassObject(Builtin::special("set"))
}
/** Get the `ClassValue` for the `object` class. */
ClassValue object() {
result = TBuiltinClassObject(Builtin::special("object"))
@@ -735,7 +884,7 @@ module ClassValue {
ClassValue int_() {
result = TBuiltinClassObject(Builtin::special("int"))
}
/** Get the `ClassValue` for the `long` class. */
ClassValue long() {
result = TBuiltinClassObject(Builtin::special("long"))
@@ -745,12 +894,12 @@ module ClassValue {
ClassValue float_() {
result = TBuiltinClassObject(Builtin::special("float"))
}
/** Get the `ClassValue` for the `complex` class. */
ClassValue complex() {
result = TBuiltinClassObject(Builtin::special("complex"))
}
/** Get the `ClassValue` for the `bytes` class (also called `str` in Python 2). */
ClassValue bytes() {
result = TBuiltinClassObject(Builtin::special("bytes"))
@@ -770,12 +919,12 @@ module ClassValue {
else
result = unicode()
}
/** Get the `ClassValue` for the `property` class. */
ClassValue property() {
result = TBuiltinClassObject(Builtin::special("property"))
}
/** Get the `ClassValue` for the class of Python functions. */
ClassValue functionType() {
result = TBuiltinClassObject(Builtin::special("FunctionType"))
@@ -785,32 +934,32 @@ module ClassValue {
ClassValue builtinFunction() {
result = Value::named("len").getClass()
}
/** Get the `ClassValue` for the `generatorType` class. */
ClassValue generator() {
result = TBuiltinClassObject(Builtin::special("generator"))
}
/** Get the `ClassValue` for the `type` class. */
ClassValue type() {
result = TType()
}
/** Get the `ClassValue` for `ClassType`. */
ClassValue classType() {
result = TBuiltinClassObject(Builtin::special("ClassType"))
}
/** Get the `ClassValue` for `InstanceType`. */
ClassValue instanceType() {
result = TBuiltinClassObject(Builtin::special("InstanceType"))
}
/** Get the `ClassValue` for `super`. */
ClassValue super_() {
result = TBuiltinClassObject(Builtin::special("super"))
}
/** Get the `ClassValue` for the `classmethod` class. */
ClassValue classmethod() {
result = TBuiltinClassObject(Builtin::special("ClassMethod"))
@@ -820,26 +969,26 @@ module ClassValue {
ClassValue staticmethod() {
result = TBuiltinClassObject(Builtin::special("StaticMethod"))
}
/** Get the `ClassValue` for the `MethodType` class. */
pragma [noinline]
ClassValue methodType() {
result = TBuiltinClassObject(Builtin::special("MethodType"))
}
/** Get the `ClassValue` for the `MethodDescriptorType` class. */
ClassValue methodDescriptorType() {
result = TBuiltinClassObject(Builtin::special("MethodDescriptorType"))
}
/** Get the `ClassValue` for the `GetSetDescriptorType` class. */
ClassValue getSetDescriptorType() {
result = TBuiltinClassObject(Builtin::special("GetSetDescriptorType"))
}
/** Get the `ClassValue` for the `StopIteration` class. */
ClassValue stopIteration() {
result = TBuiltinClassObject(Builtin::special("StopIteration"))
result = TBuiltinClassObject(Builtin::builtin("StopIteration"))
}
/** Get the `ClassValue` for the class of modules. */
@@ -851,17 +1000,17 @@ module ClassValue {
ClassValue exception() {
result = TBuiltinClassObject(Builtin::special("Exception"))
}
/** Get the `ClassValue` for the `BaseException` class. */
ClassValue baseException() {
result = TBuiltinClassObject(Builtin::special("BaseException"))
}
/** Get the `ClassValue` for the `NoneType` class. */
ClassValue nonetype() {
result = TBuiltinClassObject(Builtin::special("NoneType"))
}
/** Get the `ClassValue` for the `TypeError` class */
ClassValue typeError() {
result = TBuiltinClassObject(Builtin::special("TypeError"))
@@ -871,22 +1020,27 @@ module ClassValue {
ClassValue nameError() {
result = TBuiltinClassObject(Builtin::builtin("NameError"))
}
/** Get the `ClassValue` for the `AttributeError` class. */
ClassValue attributeError() {
result = TBuiltinClassObject(Builtin::builtin("AttributeError"))
}
/** Get the `ClassValue` for the `KeyError` class. */
ClassValue keyError() {
result = TBuiltinClassObject(Builtin::builtin("KeyError"))
}
/** Get the `ClassValue` for the `LookupError` class. */
ClassValue lookupError() {
result = TBuiltinClassObject(Builtin::builtin("LookupError"))
}
/** Get the `ClassValue` for the `IOError` class. */
ClassValue ioError() {
result = TBuiltinClassObject(Builtin::builtin("IOError"))
}
/** Get the `ClassValue` for the `NotImplementedError` class. */
ClassValue notImplementedError() {
result = TBuiltinClassObject(Builtin::builtin("NotImplementedError"))
@@ -897,4 +1051,14 @@ module ClassValue {
result = TBuiltinClassObject(Builtin::builtin("ImportError"))
}
/** Get the `ClassValue` for the `UnicodeEncodeError` class. */
ClassValue unicodeEncodeError() {
result = TBuiltinClassObject(Builtin::builtin("UnicodeEncodeError"))
}
/** Get the `ClassValue` for the `UnicodeDecodeError` class. */
ClassValue unicodeDecodeError() {
result = TBuiltinClassObject(Builtin::builtin("UnicodeDecodeError"))
}
}

View File

@@ -12,25 +12,27 @@ private predicate re_module_function(string name, int flags) {
name = "subn" and flags = 4
}
/**
* Holds if `s` is used as a regex with the `re` module, with the regex-mode `mode` (if known).
* If regex mode is not known, `mode` will be `"None"`.
*/
predicate used_as_regex(Expr s, string mode) {
(s instanceof Bytes or s instanceof Unicode)
and
exists(ModuleValue re | re.getName() = "re" |
/* Call to re.xxx(regex, ... [mode]) */
exists(CallNode call, string name |
call.getArg(0).refersTo(_, _, s.getAFlowNode()) and
call.getFunction().pointsTo(re.attr(name)) |
mode = "None"
or
exists(Value obj |
mode = mode_from_mode_object(obj) |
exists(int flags_arg |
re_module_function(name, flags_arg) and
call.getArg(flags_arg).pointsTo(obj)
)
or
call.getArgByName("flags").pointsTo(obj)
/* Call to re.xxx(regex, ... [mode]) */
exists(CallNode call, string name |
call.getArg(0).refersTo(_, _, s.getAFlowNode()) and
call.getFunction().pointsTo(Module::named("re").attr(name)) |
mode = "None"
or
exists(Value obj |
mode = mode_from_mode_object(obj) |
exists(int flags_arg |
re_module_function(name, flags_arg) and
call.getArg(flags_arg).pointsTo(obj)
)
or
call.getArgByName("flags").pointsTo(obj)
)
)
}

View File

@@ -43,7 +43,7 @@ module ClearTextLogging {
PrintSink() {
exists(CallNode call |
call.getAnArg() = this and
thePrintFunction().(FunctionObject).getACall() = call
call = Value::named("print").getACall()
)
}
}

View File

@@ -12,13 +12,11 @@ abstract class WeakCryptoSink extends TaintSink {
}
}
/** Modeling the 'pycrypto' package https://github.com/dlitz/pycrypto (latest release 2013) */
module Pycrypto {
ModuleObject cipher(string name) {
exists(PackageObject crypto |
crypto.getName() = "Crypto.Cipher" |
crypto.submodule(name) = result
)
ModuleValue cipher(string name) {
result = Module::named("Crypto.Cipher").attr(name)
}
class CipherInstance extends TaintKind {
@@ -51,7 +49,7 @@ module Pycrypto {
CipherInstanceSource() {
exists(AttrNode attr |
this.(CallNode).getFunction() = attr and
attr.getObject("new").refersTo(cipher(instance.getName()))
attr.getObject("new").pointsTo(cipher(instance.getName()))
)
}
@@ -59,7 +57,7 @@ module Pycrypto {
result = "Source of " + instance
}
override predicate isSourceOf(TaintKind kind) {
override predicate isSourceOf(TaintKind kind) {
kind = instance
}
@@ -70,12 +68,12 @@ module Pycrypto {
string name;
PycryptoWeakCryptoSink() {
exists(CallNode call, AttrNode method, CipherInstance Cipher |
exists(CallNode call, AttrNode method, CipherInstance cipher |
call.getAnArg() = this and
call.getFunction() = method and
Cipher.taints(method.getObject("encrypt")) and
Cipher.isWeak() and
Cipher.getName() = name
cipher.taints(method.getObject("encrypt")) and
cipher.isWeak() and
cipher.getName() = name
)
}
@@ -89,25 +87,25 @@ module Pycrypto {
module Cryptography {
PackageObject ciphers() {
result.getName() = "cryptography.hazmat.primitives.ciphers"
ModuleValue ciphers() {
result = Module::named("cryptography.hazmat.primitives.ciphers") and
result.isPackage()
}
class CipherClass extends ClassObject {
class CipherClass extends ClassValue {
CipherClass() {
ciphers().attr("Cipher") = this
}
}
class AlgorithmClass extends ClassObject {
class AlgorithmClass extends ClassValue {
AlgorithmClass() {
ciphers().submodule("algorithms").attr(_) = this
ciphers().attr("algorithms").attr(_) = this
}
string getAlgorithmName() {
result = this.declaredAttribute("name").(StringObject).getText()
result = this.declaredAttribute("name").(StringValue).getText()
}
predicate isWeak() {
@@ -134,7 +132,7 @@ module Cryptography {
cls.isWeak()
}
override TaintKind getTaintOfMethodResult(string name) {
override TaintKind getTaintOfMethodResult(string name) {
name = "encryptor" and
result.(Encryptor).getAlgorithm() = this.getAlgorithm()
}
@@ -144,11 +142,11 @@ module Cryptography {
class CipherSource extends TaintSource {
CipherSource() {
this.(CallNode).getFunction().refersTo(any(CipherClass cls))
this.(CallNode).getFunction().pointsTo(any(CipherClass cls))
}
override predicate isSourceOf(TaintKind kind) {
this.(CallNode).getArg(0).refersTo(_, kind.(CipherInstance).getAlgorithm(), _)
this.(CallNode).getArg(0).pointsTo().getClass() = kind.(CipherInstance).getAlgorithm()
}
override string toString() {
@@ -203,5 +201,3 @@ private class CipherConfig extends TaintTracking::Configuration {
}
}

View File

@@ -27,7 +27,7 @@ class ExceptionInfo extends StringKind {
}
/** A class representing sources of information about
/** A class representing sources of information about
* execution state exposed in tracebacks and the like.
*/
abstract class ErrorInfoSource extends TaintSource {}
@@ -59,9 +59,9 @@ class ExceptionKind extends TaintKind {
class ExceptionSource extends ErrorInfoSource {
ExceptionSource() {
exists(ClassObject cls |
cls.isSubclassOf(theExceptionType()) and
this.(ControlFlowNode).refersTo(_, cls, _)
exists(ClassValue cls |
cls.getASuperType() = ClassValue::baseException() and
this.(ControlFlowNode).pointsTo().getClass() = cls
)
or
this = any(ExceptStmt s).getName().getAFlowNode()
@@ -116,7 +116,7 @@ class CallToTracebackFunction extends ErrorInfoSource {
}
}
/**
/**
* Represents calls to functions in the `traceback` module that return a single
* string of information about an exception.
*/

View File

@@ -166,9 +166,9 @@ module SensitiveData {
SensitiveRequestParameter() {
this.(CallNode).getFunction().(AttrNode).getName() = "get" and
exists(string sensitive |
this.(CallNode).getAnArg().refersTo(any(StringObject s | s.getText() = sensitive)) and
data = HeuristicNames::getSensitiveDataForName(sensitive)
exists(StringValue sensitive |
this.(CallNode).getAnArg().pointsTo(sensitive) and
data = HeuristicNames::getSensitiveDataForName(sensitive.getText())
)
}

View File

@@ -71,11 +71,7 @@ private predicate str_method_call(ControlFlowNode fromnode, CallNode tonode) {
/* tonode = ....format(fromnode) */
private predicate str_format(ControlFlowNode fromnode, CallNode tonode) {
tonode.getFunction().(AttrNode).getName() = "format" and
(
tonode.getAnArg() = fromnode
or
tonode.getNode().getAKeyword().getValue() = fromnode.getNode()
)
tonode.getAnArg() = fromnode
}
/* tonode = codec.[en|de]code(fromnode)*/
@@ -93,9 +89,10 @@ private predicate encode_decode(ControlFlowNode fromnode, CallNode tonode) {
/* tonode = str(fromnode)*/
private predicate to_str(ControlFlowNode fromnode, CallNode tonode) {
tonode.getAnArg() = fromnode and
exists(ClassObject str |
tonode.getFunction().refersTo(str) |
str = theUnicodeType() or str = theBytesType()
(
tonode = ClassValue::bytes().getACall()
or
tonode = ClassValue::unicode().getACall()
)
}
@@ -104,17 +101,14 @@ private predicate slice(ControlFlowNode fromnode, SubscriptNode tonode) {
exists(Slice all |
all = tonode.getIndex().getNode() and
not exists(all.getStart()) and not exists(all.getStop()) and
tonode.getValue() = fromnode
tonode.getObject() = fromnode
)
}
/* tonode = os.path.join(..., fromnode, ...) */
private predicate os_path_join(ControlFlowNode fromnode, CallNode tonode) {
exists(FunctionObject path_join |
path_join = ModuleObject::named("os").attr("path").(ModuleObject).attr("join")
and
tonode = path_join.getACall() and tonode.getAnArg() = fromnode
)
tonode = Value::named("os.path.join").getACall()
and tonode.getAnArg() = fromnode
}
/** A kind of "taint", representing a dictionary mapping str->"taint" */
@@ -125,5 +119,3 @@ class StringDictKind extends DictKind {
}
}

View File

@@ -5,12 +5,12 @@ import python
predicate copy_call(ControlFlowNode fromnode, CallNode tonode) {
tonode.getFunction().(AttrNode).getObject("copy") = fromnode
or
exists(ModuleObject copy, string name |
exists(ModuleValue copy, string name |
name = "copy" or name = "deepcopy" |
copy.attr(name).(FunctionObject).getACall() = tonode and
copy.attr(name).(FunctionValue).getACall() = tonode and
tonode.getArg(0) = fromnode
)
or
tonode.getFunction().refersTo(Object::builtin("reversed")) and
tonode.getFunction().pointsTo(Value::named("reversed")) and
tonode.getArg(0) = fromnode
}

View File

@@ -2,45 +2,40 @@ import python
import Basic
private import Common
/** An extensible kind of taint representing an externally controlled string.
/**
* An extensible kind of taint representing an externally controlled string.
*/
abstract class ExternalStringKind extends StringKind {
bindingset[this]
ExternalStringKind() {
this = this
}
ExternalStringKind() { this = this }
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
result = StringKind.super.getTaintForFlowStep(fromnode, tonode)
or
tonode.(SequenceNode).getElement(_) = fromnode and result.(ExternalStringSequenceKind).getItem() = this
tonode.(SequenceNode).getElement(_) = fromnode and
result.(ExternalStringSequenceKind).getItem() = this
or
json_load(fromnode, tonode) and result.(ExternalJsonKind).getValue() = this
or
tonode.(DictNode).getAValue() = fromnode and result.(ExternalStringDictKind).getValue() = this
or
urlsplit(fromnode, tonode) and result.(ExternalUrlSplitResult).getItem() = this
or
urlparse(fromnode, tonode) and result.(ExternalUrlParseResult).getItem() = this
}
}
/** A kind of "taint", representing a sequence, with a "taint" member */
class ExternalStringSequenceKind extends SequenceKind {
ExternalStringSequenceKind() {
this.getItem() instanceof ExternalStringKind
}
ExternalStringSequenceKind() { this.getItem() instanceof ExternalStringKind }
}
/** An hierachical dictionary or list where the entire structure is externally controlled
/**
* An hierachical dictionary or list where the entire structure is externally controlled
* This is typically a parsed JSON object.
*/
class ExternalJsonKind extends TaintKind {
ExternalJsonKind() {
this = "json[" + any(ExternalStringKind key) + "]"
}
ExternalJsonKind() { this = "json[" + any(ExternalStringKind key) + "]" }
/** Gets the taint kind for item in this sequence */
TaintKind getValue() {
@@ -54,65 +49,222 @@ class ExternalJsonKind extends TaintKind {
json_subscript_taint(tonode, fromnode, this, result)
or
result = this and copy_call(fromnode, tonode)
}
}
override TaintKind getTaintOfMethodResult(string name) {
name = "get" and result = this.getValue()
}
}
}
/** A kind of "taint", representing a dictionary mapping str->"taint" */
class ExternalStringDictKind extends DictKind {
ExternalStringDictKind() {
this.getValue() instanceof ExternalStringKind
}
ExternalStringDictKind() { this.getValue() instanceof ExternalStringKind }
}
/** A kind of "taint", representing a dictionary mapping strings to sequences of
* tainted strings */
/**
* A kind of "taint", representing a dictionary mapping strings to sequences of
* tainted strings
*/
class ExternalStringSequenceDictKind extends DictKind {
ExternalStringSequenceDictKind() {
this.getValue() instanceof ExternalStringSequenceKind
ExternalStringSequenceDictKind() { this.getValue() instanceof ExternalStringSequenceKind }
}
/** TaintKind for the result of `urlsplit(tainted_string)` */
class ExternalUrlSplitResult extends ExternalStringSequenceKind {
// https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit
override TaintKind getTaintOfAttribute(string name) {
result = super.getTaintOfAttribute(name)
or
(
// namedtuple field names
name = "scheme" or
name = "netloc" or
name = "path" or
name = "query" or
name = "fragment" or
// class methods
name = "username" or
name = "password" or
name = "hostname"
) and
result instanceof ExternalStringKind
}
override TaintKind getTaintOfMethodResult(string name) {
result = super.getTaintOfMethodResult(name)
or
name = "geturl" and
result instanceof ExternalStringKind
}
}
/** TaintKind for the result of `urlparse(tainted_string)` */
class ExternalUrlParseResult extends ExternalStringSequenceKind {
// https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
override TaintKind getTaintOfAttribute(string name) {
result = super.getTaintOfAttribute(name)
or
(
// namedtuple field names
name = "scheme" or
name = "netloc" or
name = "path" or
name = "params" or
name = "query" or
name = "fragment" or
// class methods
name = "username" or
name = "password" or
name = "hostname"
) and
result instanceof ExternalStringKind
}
override TaintKind getTaintOfMethodResult(string name) {
result = super.getTaintOfMethodResult(name)
or
name = "geturl" and
result instanceof ExternalStringKind
}
}
/* Helper for getTaintForStep() */
pragma [noinline]
private predicate json_subscript_taint(SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key) {
pragma[noinline]
private predicate json_subscript_taint(
SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key
) {
sub.isLoad() and
sub.getValue() = obj and
sub.getObject() = obj and
key = seq.getValue()
}
private predicate json_load(ControlFlowNode fromnode, CallNode tonode) {
exists(FunctionObject json_loads |
ModuleObject::named("json").attr("loads") = json_loads and
json_loads.getACall() = tonode and tonode.getArg(0) = fromnode
tonode = Value::named("json.loads").getACall() and
tonode.getArg(0) = fromnode
}
private predicate urlsplit(ControlFlowNode fromnode, CallNode tonode) {
// This could be implemented as `exists(FunctionValue` without the explicit six part,
// but then our tests will need to import +100 modules, so for now this slightly
// altered version gets to live on.
exists(Value urlsplit |
(
urlsplit = Value::named("six.moves.urllib.parse.urlsplit")
or
// Python 2
urlsplit = Value::named("urlparse.urlsplit")
or
// Python 3
urlsplit = Value::named("urllib.parse.urlsplit")
) and
tonode = urlsplit.getACall() and
tonode.getArg(0) = fromnode
)
}
private predicate urlparse(ControlFlowNode fromnode, CallNode tonode) {
// This could be implemented as `exists(FunctionValue` without the explicit six part,
// but then our tests will need to import +100 modules, so for now this slightly
// altered version gets to live on.
exists(Value urlparse |
(
urlparse = Value::named("six.moves.urllib.parse.urlparse")
or
// Python 2
urlparse = Value::named("urlparse.urlparse")
or
// Python 3
urlparse = Value::named("urllib.parse.urlparse")
) and
tonode = urlparse.getACall() and
tonode.getArg(0) = fromnode
)
}
/** A kind of "taint", representing an open file-like object from an external source. */
class ExternalFileObject extends TaintKind {
ExternalFileObject() {
this = "file[" + any(ExternalStringKind key) + "]"
}
ExternalFileObject() { this = "file[" + any(ExternalStringKind key) + "]" }
/** Gets the taint kind for the contents of this file */
TaintKind getValue() {
this = "file[" + result + "]"
}
TaintKind getValue() { this = "file[" + result + "]" }
override TaintKind getTaintOfMethodResult(string name) {
name = "read" and result = this.getValue()
}
}
/**
* Temporary sanitizer for the tainted result from `urlsplit` and `urlparse`. Can be used to reduce FPs until
* we have better support for namedtuples.
*
* Will clear **all** taint on a test of the kind. That is, on the true edge of any matching test,
* all fields/indexes will be cleared of taint.
*
* Handles:
* - `if splitres.netloc == "KNOWN_VALUE"`
* - `if splitres[0] == "KNOWN_VALUE"`
*/
class UrlsplitUrlparseTempSanitizer extends Sanitizer {
// TODO: remove this once we have better support for named tuples
UrlsplitUrlparseTempSanitizer() { this = "UrlsplitUrlparseTempSanitizer" }
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
(
taint instanceof ExternalUrlSplitResult
or
taint instanceof ExternalUrlParseResult
) and
exists(ControlFlowNode full_use |
full_use.(SubscriptNode).getObject() = test.getInput().getAUse()
or
full_use.(AttrNode).getObject() = test.getInput().getAUse()
|
clears_taint(full_use, test.getTest(), test.getSense())
)
}
private predicate clears_taint(ControlFlowNode tainted, ControlFlowNode test, boolean sense) {
test_equality_with_const(test, tainted, sense)
or
test_in_const_seq(test, tainted, sense)
or
test.(UnaryExprNode).getNode().getOp() instanceof Not and
exists(ControlFlowNode nested_test |
nested_test = test.(UnaryExprNode).getOperand() and
clears_taint(tainted, nested_test, sense.booleanNot())
)
}
/** holds for `== "KNOWN_VALUE"` on `true` edge, and `!= "KNOWN_VALUE"` on `false` edge */
private predicate test_equality_with_const(CompareNode cmp, ControlFlowNode tainted, boolean sense) {
exists(ControlFlowNode const, Cmpop op |
const.getNode() instanceof StrConst
|
(
cmp.operands(const, op, tainted)
or
cmp.operands(tainted, op, const)
) and
(
op instanceof Eq and sense = true
or
op instanceof NotEq and sense = false
)
)
}
/** holds for `in ["KNOWN_VALUE", ...]` on `true` edge, and `not in ["KNOWN_VALUE", ...]` on `false` edge */
private predicate test_in_const_seq(CompareNode cmp, ControlFlowNode tainted, boolean sense) {
exists(SequenceNode const_seq, Cmpop op |
forall(ControlFlowNode elem | elem = const_seq.getAnElement() | elem.getNode() instanceof StrConst)
|
cmp.operands(tainted, op, const_seq) and
(
op instanceof In and sense = true
or
op instanceof NotIn and sense = false
)
)
}
}

View File

@@ -82,7 +82,7 @@ private predicate tracking_step(ControlFlowNode src, ControlFlowNode dest) {
or
src = dest.(AttrNode).getObject()
or
src = dest.(SubscriptNode).getValue()
src = dest.(SubscriptNode).getObject()
or
tracked_call_step(src, dest)
or

View File

@@ -0,0 +1,2 @@
import semmle.python.web.client.StdLib
import semmle.python.web.client.Requests

View File

@@ -26,7 +26,7 @@ class WsgiEnvironment extends TaintKind {
tonode.(CallNode).getFunction().(AttrNode).getObject("get") = fromnode and
tonode.(CallNode).getArg(0).pointsTo(key)
or
tonode.(SubscriptNode).getValue() = fromnode and tonode.isLoad() and
tonode.(SubscriptNode).getObject() = fromnode and tonode.isLoad() and
tonode.(SubscriptNode).getIndex().pointsTo(key)
|
key = Value::forString(text) and result instanceof ExternalStringKind and
@@ -66,7 +66,7 @@ class UntrustedCookie extends TaintKind {
}
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
tonode.(SubscriptNode).getValue() = fromnode and
tonode.(SubscriptNode).getObject() = fromnode and
result instanceof UntrustedMorsel
}
@@ -89,7 +89,7 @@ abstract class CookieSet extends CookieOperation {}
/** Generic taint sink in a http response */
abstract class HttpResponseTaintSink extends TaintSink {
override predicate sinks(TaintKind kind) {
override predicate sinks(TaintKind kind) {
kind instanceof ExternalStringKind
}
@@ -97,9 +97,51 @@ abstract class HttpResponseTaintSink extends TaintSink {
abstract class HttpRedirectTaintSink extends TaintSink {
override predicate sinks(TaintKind kind) {
override predicate sinks(TaintKind kind) {
kind instanceof ExternalStringKind
}
}
module Client {
// TODO: user-input in other than URL:
// - `data`, `json` for `requests.post`
// - `body` for `HTTPConnection.request`
// - headers?
// TODO: Add more library support
// - urllib3 https://github.com/urllib3/urllib3
// - httpx https://github.com/encode/httpx
/**
* An outgoing http request
*
* For example:
* conn = HTTPConnection('example.com')
conn.request('GET', '/path')
*/
abstract class HttpRequest extends ControlFlowNode {
/** Get any ControlFlowNode that is used to construct the final URL.
*
* In the HTTPConnection example, there is a result for both `'example.com'` and for `'/path'`.
*/
abstract ControlFlowNode getAUrlPart();
abstract string getMethodUpper();
}
/** Taint sink for the URL-part of an outgoing http request */
class HttpRequestUrlTaintSink extends TaintSink {
HttpRequestUrlTaintSink() {
this = any(HttpRequest r).getAUrlPart()
}
override predicate sinks(TaintKind kind) {
kind instanceof ExternalStringKind
}
}
}

View File

@@ -0,0 +1,22 @@
/**
* Modeling outgoing HTTP requests using the `requests` package
* https://pypi.org/project/requests/
*/
import python
private import semmle.python.web.Http
class RequestsHttpRequest extends Client::HttpRequest, CallNode {
CallableValue func;
string method;
RequestsHttpRequest() {
method = httpVerbLower() and
func = Module::named("requests").attr(method) and
this = func.getACall()
}
override ControlFlowNode getAUrlPart() { result = func.getNamedArgumentForCall(this, "url") }
override string getMethodUpper() { result = method.toUpperCase() }
}

View File

@@ -0,0 +1,55 @@
import python
private import semmle.python.web.Http
ClassValue httpConnectionClass() {
// Python 2
result = Value::named("httplib.HTTPConnection")
or
result = Value::named("httplib.HTTPSConnection")
or
// Python 3
result = Value::named("http.client.HTTPConnection")
or
result = Value::named("http.client.HTTPSConnection")
or
// six
result = Value::named("six.moves.http_client.HTTPConnection")
or
result = Value::named("six.moves.http_client.HTTPSConnection")
}
class HttpConnectionHttpRequest extends Client::HttpRequest, CallNode {
CallNode constructor_call;
CallableValue func;
HttpConnectionHttpRequest() {
exists(ClassValue cls, AttrNode call_origin, Value constructor_call_value |
cls = httpConnectionClass() and
func = cls.lookup("request") and
this = func.getACall() and
// since you can do `r = conn.request; r('GET', path)`, we need to find the origin
this.getFunction().pointsTo(_, _, call_origin) and
// Since HTTPSConnection is a subtype of HTTPConnection, up until this point, `cls` could be either class,
// because `HTTPSConnection.request == HTTPConnection.request`. To avoid generating 2 results, we filter
// on the actual class used as the constructor
call_origin.getObject().pointsTo(_, constructor_call_value, constructor_call) and
cls = constructor_call_value.getClass() and
constructor_call = cls.getACall()
)
}
override ControlFlowNode getAUrlPart() {
result = func.getNamedArgumentForCall(this, "url")
or
result = constructor_call.getArg(0)
or
result = constructor_call.getArgByName("host")
}
override string getMethodUpper() {
exists(string method |
result = method.toUpperCase() and
func.getNamedArgumentForCall(this, "method").pointsTo(Value::forString(method))
)
}
}

View File

@@ -21,7 +21,7 @@ class DjangoRequest extends TaintKind {
/* Helper for getTaintForStep() */
pragma[noinline]
private predicate subscript_taint(SubscriptNode sub, ControlFlowNode obj, TaintKind kind) {
sub.getValue() = obj and
sub.getObject() = obj and
kind instanceof ExternalStringKind
}