Merge branch 'main' into tausbn/python-fix-match-pruning-logic

This commit is contained in:
Taus
2025-03-06 14:37:58 +01:00
committed by GitHub
1401 changed files with 44716 additions and 15745 deletions

View File

@@ -1,3 +1,10 @@
## 4.0.1
### Bug Fixes
- Fixed a bug in the extractor where a comment inside a subscript could sometimes cause the AST to be missing nodes.
- Using the `break` and `continue` keywords outside of a loop, which is a syntax error but is accepted by our parser, would cause the control-flow construction to fail. This is now no longer the case.
## 4.0.0
### Breaking Changes

View File

@@ -1,5 +0,0 @@
---
category: fix
---
- Fixed a bug in the extractor where a comment inside a subscript could sometimes cause the AST to be missing nodes.

View File

@@ -1,5 +1,6 @@
---
category: fix
---
## 4.0.1
### Bug Fixes
- Fixed a bug in the extractor where a comment inside a subscript could sometimes cause the AST to be missing nodes.
- Using the `break` and `continue` keywords outside of a loop, which is a syntax error but is accepted by our parser, would cause the control-flow construction to fail. This is now no longer the case.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 4.0.0
lastReleaseVersion: 4.0.1

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 4.0.1-dev
version: 4.0.2-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python

View File

@@ -1,3 +1,7 @@
## 1.4.3
No user-facing changes.
## 1.4.2
No user-facing changes.

View File

@@ -0,0 +1,108 @@
/** Definitions for reasoning about the expected first argument names for methods. */
import python
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.internal.DataFlowDispatch
import DataFlow
/** Holds if `f` is a method of the class `c`. */
private predicate methodOfClass(Function f, Class c) {
exists(FunctionDef d | d.getDefinedFunction() = f and d.getScope() = c)
}
/** Holds if `c` is a metaclass. */
private predicate isMetaclass(Class c) {
c = API::builtin("type").getASubclass*().asSource().asExpr().(ClassExpr).getInnerScope()
}
/** Holds if `c` is a Zope interface. */
private predicate isZopeInterface(Class c) {
c =
API::moduleImport("zope")
.getMember("interface")
.getMember("Interface")
.getASubclass*()
.asSource()
.asExpr()
.(ClassExpr)
.getInnerScope()
}
/**
* Holds if `f` is used in the initialisation of `c`.
* This means `f` isn't being used as a normal method.
* Ideally it should be a `@staticmethod`; however this wasn't possible prior to Python 3.10.
* We exclude this case from the `not-named-self` query.
* However there is potential for a new query that specifically covers and alerts for this case.
*/
private predicate usedInInit(Function f, Class c) {
methodOfClass(f, c) and
exists(Call call |
call.getScope() = c and
DataFlow::localFlow(DataFlow::exprNode(f.getDefinition()), DataFlow::exprNode(call.getFunc()))
)
}
/**
* Holds if `f` has no arguments, and also has a decorator.
* We assume that the decorator affect the method in such a way that a `self` parameter is unneeded.
*/
private predicate noArgsWithDecorator(Function f) {
not exists(f.getAnArg()) and
exists(f.getADecorator())
}
/** Holds if the first parameter of `f` should be named `self`. */
predicate shouldBeSelf(Function f, Class c) {
methodOfClass(f, c) and
not isStaticmethod(f) and
not isClassmethod(f) and
not isMetaclass(c) and
not isZopeInterface(c) and
not usedInInit(f, c) and
not noArgsWithDecorator(f)
}
/** Holds if the first parameter of `f` should be named `cls`. */
predicate shouldBeCls(Function f, Class c) {
methodOfClass(f, c) and
not isStaticmethod(f) and
(
isClassmethod(f) and not isMetaclass(c)
or
isMetaclass(c) and not isClassmethod(f)
)
}
/** Holds if the first parameter of `f` is named `self`. */
predicate firstArgNamedSelf(Function f) { f.getArgName(0) = "self" }
/** Holds if the first parameter of `f` refers to the class - it is either named `cls`, or it is named `self` and is a method of a metaclass. */
predicate firstArgRefersToCls(Function f, Class c) {
methodOfClass(f, c) and
exists(string argname | argname = f.getArgName(0) |
argname = "cls"
or
/* Not PEP8, but relatively common */
argname = "mcls"
or
/* If c is a metaclass, allow arguments named `self`. */
argname = "self" and
isMetaclass(c)
)
}
/** Holds if the first parameter of `f` should be named `self`, but isn't. */
predicate firstArgShouldBeNamedSelfAndIsnt(Function f) {
shouldBeSelf(f, _) and
not firstArgNamedSelf(f)
}
/** Holds if the first parameter of `f` should be named `cls`, but isn't. */
predicate firstArgShouldReferToClsAndDoesnt(Function f) {
exists(Class c |
methodOfClass(f, c) and
shouldBeCls(f, c) and
not firstArgRefersToCls(f, c)
)
}

View File

@@ -1,4 +1,4 @@
class Entry(object):
@classmethod
def make(klass):
def make(self):
return Entry()

View File

@@ -5,20 +5,19 @@
<overview>
<p> The first parameter of a class method, a new method or any metaclass method
should be called <code>cls</code>. This makes the purpose of the parameter clear to other developers.
<p> The first parameter of a class method (including certain special methods such as <code>__new__</code>), or a method of a metaclass,
should be named <code>cls</code>.
</p>
</overview>
<recommendation>
<p>Change the name of the first parameter to <code>cls</code> as recommended by the style guidelines
<p>Ensure that the first parameter of class methods is named <code>cls</code>, as recommended by the style guidelines
in PEP 8.</p>
</recommendation>
<example>
<p>In the example, the first parameter to <code>make()</code> is <code>klass</code> which should be changed to <code>cls</code>
for ease of comprehension.
<p>In the following example, the first parameter of the class method <code>make</code> is named <code>self</code> instead of <code>cls</code>.
</p>
<sample src="NonCls.py" />
@@ -29,6 +28,7 @@ for ease of comprehension.
<li>Python PEP 8: <a href="http://www.python.org/dev/peps/pep-0008/#function-and-method-arguments">Function and method arguments</a>.</li>
<li>Python Tutorial: <a href="http://docs.python.org/2/tutorial/classes.html">Classes</a>.</li>
<li>Python Docs: <a href="https://docs.python.org/3/library/functions.html#classmethod">classmethod</a>.</li>
</references>

View File

@@ -1,7 +1,6 @@
/**
* @name First parameter of a class method is not named 'cls'
* @description Using an alternative name for the first parameter of a class method makes code more
* difficult to read; PEP8 states that the first parameter to class methods should be 'cls'.
* @description By the PEP8 style guide, the first parameter of a class method should be named `cls`.
* @kind problem
* @tags maintainability
* readability
@@ -13,30 +12,11 @@
*/
import python
predicate first_arg_cls(Function f) {
exists(string argname | argname = f.getArgName(0) |
argname = "cls"
or
/* Not PEP8, but relatively common */
argname = "mcls"
)
}
predicate is_type_method(Function f) {
exists(ClassValue c | c.getScope() = f.getScope() and c.getASuperType() = ClassValue::type())
}
predicate classmethod_decorators_only(Function f) {
forall(Expr decorator | decorator = f.getADecorator() | decorator.(Name).getId() = "classmethod")
}
import MethodArgNames
from Function f, string message
where
(f.getADecorator().(Name).getId() = "classmethod" or is_type_method(f)) and
not first_arg_cls(f) and
classmethod_decorators_only(f) and
not f.getName() = "__new__" and
firstArgShouldReferToClsAndDoesnt(f) and
(
if exists(f.getArgName(0))
then

View File

@@ -1,9 +1,9 @@
class Point:
def __init__(val, x, y): # first parameter is mis-named 'val'
def __init__(val, x, y): # BAD: first parameter is mis-named 'val'
val._x = x
val._y = y
class Point2:
def __init__(self, x, y): # first parameter is correctly named 'self'
def __init__(self, x, y): # GOOD: first parameter is correctly named 'self'
self._x = x
self._y = y

View File

@@ -6,22 +6,18 @@
<overview>
<p> Normal methods should have at least one parameter and the first parameter should be called <code>self</code>.
This makes the purpose of the parameter clear to other developers.
</p>
</overview>
<recommendation>
<p>If there is at least one parameter, then change the name of the first parameter to <code>self</code> as recommended by the style guidelines
<p>Ensure that the first parameter of a normal method is named <code>self</code>, as recommended by the style guidelines
in PEP 8.</p>
<p>If there are no parameters, then it cannot be a normal method. It may need to be marked as a <code>staticmethod</code>
or it could be moved out of the class as a normal function.
<p>If a <code>self</code> parameter is unneeded, the method should be decorated with <code>staticmethod</code>, or moved out of the class as a regular function.
</p>
</recommendation>
<example>
<p>The following methods can both be used to assign values to variables in a <code>point</code>
object. The second method makes the association clearer because the <code>self</code> parameter is
used.</p>
<p>In the following cases, the first argument of <code>Point.__init__</code> is named <code>val</code> instead; whereas in <code>Point2.__init__</code> it is correctly named <code>self</code>.</p>
<sample src="NonSelf.py" />
@@ -31,7 +27,7 @@ used.</p>
<li>Python PEP 8: <a href="http://www.python.org/dev/peps/pep-0008/#function-and-method-arguments">Function and
method arguments</a>.</li>
<li>Python Tutorial: <a href="http://docs.python.org/2/tutorial/classes.html">Classes</a>.</li>
<li>Python Docs: <a href="https://docs.python.org/3/library/functions.html#staticmethod">staticmethod</a>.</li>
</references>

View File

@@ -1,8 +1,6 @@
/**
* @name First parameter of a method is not named 'self'
* @description Using an alternative name for the first parameter of an instance method makes
* code more difficult to read; PEP8 states that the first parameter to instance
* methods should be 'self'.
* @description By the PEP8 style guide, the first parameter of a normal method should be named `self`.
* @kind problem
* @tags maintainability
* readability
@@ -14,45 +12,19 @@
*/
import python
import semmle.python.libraries.Zope
import MethodArgNames
predicate is_type_method(FunctionValue fv) {
exists(ClassValue c | c.declaredAttribute(_) = fv and c.getASuperType() = ClassValue::type())
}
predicate used_in_defining_scope(FunctionValue fv) {
exists(Call c | c.getScope() = fv.getScope().getScope() and c.getFunc().pointsTo(fv))
}
from Function f, FunctionValue fv, string message
from Function f, string message
where
exists(ClassValue cls, string name |
cls.declaredAttribute(name) = fv and
cls.isNewStyle() and
not name = "__new__" and
not name = "__metaclass__" and
not name = "__init_subclass__" and
not name = "__class_getitem__" and
/* declared in scope */
f.getScope() = cls.getScope()
) and
not f.getArgName(0) = "self" and
not is_type_method(fv) and
fv.getScope() = f and
not f.getName() = "lambda" and
not used_in_defining_scope(fv) and
firstArgShouldBeNamedSelfAndIsnt(f) and
(
(
if exists(f.getArgName(0))
then
message =
"Normal methods should have 'self', rather than '" + f.getArgName(0) +
"', as their first parameter."
else
message =
"Normal methods should have at least one parameter (the first of which should be 'self')."
) and
not f.hasVarArg()
) and
not fv instanceof ZopeInterfaceMethodValue
if exists(f.getArgName(0))
then
message =
"Normal methods should have 'self', rather than '" + f.getArgName(0) +
"', as their first parameter."
else
message =
"Normal methods should have at least one parameter (the first of which should be 'self')."
)
select f, message

View File

@@ -0,0 +1,108 @@
/**
* @name Type metrics
* @description Counts of various kinds of type annotations in Python code.
* @kind table
* @id py/type-metrics
*/
import python
class BuiltinType extends Name {
BuiltinType() { this.getId() in ["int", "float", "str", "bool", "bytes", "None"] }
}
newtype TAnnotatable =
TAnnotatedFunction(FunctionExpr f) { exists(f.getReturns()) } or
TAnnotatedParameter(Parameter p) { exists(p.getAnnotation()) } or
TAnnotatedAssignment(AnnAssign a) { exists(a.getAnnotation()) }
abstract class Annotatable extends TAnnotatable {
string toString() { result = "Annotatable" }
abstract Expr getAnnotation();
}
class AnnotatedFunction extends TAnnotatedFunction, Annotatable {
FunctionExpr function;
AnnotatedFunction() { this = TAnnotatedFunction(function) }
override Expr getAnnotation() { result = function.getReturns() }
}
class AnnotatedParameter extends TAnnotatedParameter, Annotatable {
Parameter parameter;
AnnotatedParameter() { this = TAnnotatedParameter(parameter) }
override Expr getAnnotation() { result = parameter.getAnnotation() }
}
class AnnotatedAssignment extends TAnnotatedAssignment, Annotatable {
AnnAssign assignment;
AnnotatedAssignment() { this = TAnnotatedAssignment(assignment) }
override Expr getAnnotation() { result = assignment.getAnnotation() }
}
/** Holds if `e` is a forward declaration of a type. */
predicate is_forward_declaration(Expr e) { e instanceof StringLiteral }
/** Holds if `e` is a type that may be difficult to analyze. */
predicate is_complex_type(Expr e) {
e instanceof Subscript and not is_optional_type(e)
or
e instanceof Tuple
or
e instanceof List
}
/** Holds if `e` is a type of the form `Optional[...]`. */
predicate is_optional_type(Subscript e) { e.getObject().(Name).getId() = "Optional" }
/** Holds if `e` is a simple type, that is either an identifier (excluding built-in types) or an attribute of a simple type. */
predicate is_simple_type(Expr e) {
e instanceof Name and not e instanceof BuiltinType
or
is_simple_type(e.(Attribute).getObject())
}
/** Holds if `e` is a built-in type. */
predicate is_builtin_type(Expr e) { e instanceof BuiltinType }
predicate type_count(
string kind, int total, int built_in_count, int forward_declaration_count, int simple_type_count,
int complex_type_count, int optional_type_count
) {
kind = "Parameter annotation" and
total = count(AnnotatedParameter p) and
built_in_count = count(AnnotatedParameter p | is_builtin_type(p.getAnnotation())) and
forward_declaration_count =
count(AnnotatedParameter p | is_forward_declaration(p.getAnnotation())) and
simple_type_count = count(AnnotatedParameter p | is_simple_type(p.getAnnotation())) and
complex_type_count = count(AnnotatedParameter p | is_complex_type(p.getAnnotation())) and
optional_type_count = count(AnnotatedParameter p | is_optional_type(p.getAnnotation()))
or
kind = "Return type annotation" and
total = count(AnnotatedFunction f) and
built_in_count = count(AnnotatedFunction f | is_builtin_type(f.getAnnotation())) and
forward_declaration_count = count(AnnotatedFunction f | is_forward_declaration(f.getAnnotation())) and
simple_type_count = count(AnnotatedFunction f | is_simple_type(f.getAnnotation())) and
complex_type_count = count(AnnotatedFunction f | is_complex_type(f.getAnnotation())) and
optional_type_count = count(AnnotatedFunction f | is_optional_type(f.getAnnotation()))
or
kind = "Annotated assignment" and
total = count(AnnotatedAssignment a) and
built_in_count = count(AnnotatedAssignment a | is_builtin_type(a.getAnnotation())) and
forward_declaration_count =
count(AnnotatedAssignment a | is_forward_declaration(a.getAnnotation())) and
simple_type_count = count(AnnotatedAssignment a | is_simple_type(a.getAnnotation())) and
complex_type_count = count(AnnotatedAssignment a | is_complex_type(a.getAnnotation())) and
optional_type_count = count(AnnotatedAssignment a | is_optional_type(a.getAnnotation()))
}
from
string message, int total, int built_in, int forward_decl, int simple, int complex, int optional
where type_count(message, total, built_in, forward_decl, simple, complex, optional)
select message, total, built_in, forward_decl, simple, complex, optional

View File

@@ -0,0 +1,3 @@
## 1.4.3
No user-facing changes.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 1.4.2
lastReleaseVersion: 1.4.3

View File

@@ -1 +1,5 @@
[]
- queries: .
- include:
id:
- py/not-named-self
- py/not-named-cls

View File

@@ -1,5 +1,5 @@
name: codeql/python-queries
version: 1.4.3-dev
version: 1.4.4-dev
groups:
- python
- queries

View File

@@ -0,0 +1,67 @@
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
def read_sql(sql):
SINK(sql) # $ flow="SOURCE, l:+5 -> sql"
def process(func, arg):
func(arg)
process(func=read_sql, arg=SOURCE)
def read_sql_star(sql):
SINK(sql) # $ MISSING: flow="SOURCE, l:+5 -> sql"
def process_star(func, *args):
func(*args)
process_star(read_sql_star, SOURCE)
def read_sql_dict(sql):
SINK(sql) # $ flow="SOURCE, l:+5 -> sql"
def process_dict(func, **args):
func(**args)
process_dict(func=read_sql_dict, sql=SOURCE)
# TODO:
# Consider adding tests for
# threading.Thread, multiprocess.Process,
# concurrent.futures.ThreadPoolExecutor,
# and concurrent.futures.ProcessPoolExecutor.

View File

@@ -66,6 +66,7 @@ if __name__ == "__main__":
check_tests_valid("coverage.datamodel")
check_tests_valid("coverage.test_builtins")
check_tests_valid("coverage.loops")
check_tests_valid("coverage.functional")
check_tests_valid("coverage-py2.classes")
check_tests_valid("coverage-py3.classes")
check_tests_valid("variable-capture.in")

View File

@@ -1,3 +0,0 @@
| parameter_names.py:17:5:17:24 | Function n_cmethod | Class methods or methods of a type deriving from type should have 'cls', rather than 'self', as their first parameter. |
| parameter_names.py:22:5:22:21 | Function n_cmethod2 | Class methods or methods of a type deriving from type should have 'cls' as their first parameter. |
| parameter_names.py:37:5:37:20 | Function c_method | Class methods or methods of a type deriving from type should have 'cls', rather than 'y', as their first parameter. |

View File

@@ -1 +0,0 @@
Functions/NonCls.ql

View File

@@ -1,4 +0,0 @@
| om_test.py:71:5:71:19 | Function __repr__ | Normal methods should have at least one parameter (the first of which should be 'self'). |
| parameter_names.py:50:5:50:20 | Function __init__ | Normal methods should have 'self', rather than 'x', as their first parameter. |
| parameter_names.py:53:5:53:20 | Function s_method | Normal methods should have 'self', rather than 'y', as their first parameter. |
| parameter_names.py:56:5:56:20 | Function s_method2 | Normal methods should have at least one parameter (the first of which should be 'self'). |

View File

@@ -1 +0,0 @@
Functions/NonSelf.ql

View File

@@ -14,46 +14,47 @@ class Normal(object):
# not ok
@classmethod
def n_cmethod(self):
def n_cmethod(self): # $shouldBeCls
pass
# not ok
@classmethod
def n_cmethod2():
def n_cmethod2(): # $shouldBeCls
pass
# this is allowed because it has a decorator other than @classmethod
@classmethod
@id
def n_suppress(any_name):
def n_dec(any_name): # $shouldBeCls
pass
# Metaclass - normal methods should be named cls, though self is also accepted
class Class(type):
def __init__(cls):
pass
def c_method(y):
def c_method(y): # $shouldBeCls
pass
def c_ok(cls):
pass
@id
def c_suppress(any_name):
# technically we could alert on mixing self for metaclasses with cls for metaclasses in the same codebase,
# but it's probably not too significant.
def c_self_ok(self):
pass
class NonSelf(object):
def __init__(x):
def __init__(x): # $shouldBeSelf
pass
def s_method(y):
def s_method(y): # $shouldBeSelf
pass
def s_method2():
def s_method2(): # $shouldBeSelf
pass
def s_ok(self):
@@ -67,11 +68,12 @@ class NonSelf(object):
def s_cmethod(cls):
pass
def s_smethod2(ok):
# we allow methods that are used in class initialization, but only detect this case when they are called.
def s_smethod2(ok): # $ SPURIOUS: shouldBeSelf
pass
s_smethod2 = staticmethod(s_smethod2)
def s_cmethod2(cls):
def s_cmethod2(cls): # $ SPURIOUS: shouldBeSelf
pass
s_cmethod2 = classmethod(s_cmethod2)
@@ -118,7 +120,15 @@ class Z(zope.interface.Interface):
Z().meth(0)
def weird_decorator(f):
def g(self):
return f()
return g
class B:
@weird_decorator
def f(): # allow no-arg functions with a decorator
pass
# The `__init_subclass__` (introduced in Python 3.6)
# and `__class_getitem__` (introduced in Python 3.7) methods are methods
@@ -139,3 +149,10 @@ class SpecialMethodNames(object):
def __class_getitem__(cls):
pass
from dataclasses import dataclass, field
@dataclass
class A:
# Lambdas used in initilisation aren't methods.
x: int = field(default_factory = lambda: 2)

View File

@@ -0,0 +1,24 @@
import python
import Functions.MethodArgNames
import utils.test.InlineExpectationsTest
module MethodArgTest implements TestSig {
string getARelevantTag() { result = ["shouldBeSelf", "shouldBeCls"] }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(Function f |
element = f.toString() and
location = f.getLocation() and
value = "" and
(
firstArgShouldBeNamedSelfAndIsnt(f) and
tag = "shouldBeSelf"
or
firstArgShouldReferToClsAndDoesnt(f) and
tag = "shouldBeCls"
)
)
}
}
import MakeTest<MethodArgTest>