diff --git a/python/ql/src/Expressions/HashedButNoHash.ql b/python/ql/src/Expressions/HashedButNoHash.ql index 93edc7c9c6c..6f4dab12570 100644 --- a/python/ql/src/Expressions/HashedButNoHash.ql +++ b/python/ql/src/Expressions/HashedButNoHash.ql @@ -23,17 +23,57 @@ predicate setsHashToNone(Class cls) { DuckTyping::getAnAttributeValue(cls, "__hash__") instanceof None } +/** + * Holds if `cls` has a `@dataclass` decorator with `frozen=True` or `unsafe_hash=True`, + * which generates a `__hash__` method at runtime. + */ +predicate hasDataclassHash(Class cls) { + exists(DataFlow::CallCfgNode decorator | + decorator = API::moduleImport("dataclasses").getMember("dataclass").getACall() and + decorator.asExpr() = cls.getADecorator() and + decorator.getArgByName(["frozen", "unsafe_hash"]).asExpr().(ImmutableLiteral).booleanValue() = + true + ) +} + +/** + * Holds if `cls` defines `__eq__` directly (not inherited from `object`), + * or has a `@dataclass` decorator that generates `__eq__` (the default). + */ +predicate definesEq(Class cls) { + DuckTyping::hasMethod(cls, "__eq__") + or + // @dataclass(...) call generates __eq__ unless eq=False + exists(DataFlow::CallCfgNode decorator | + decorator = API::moduleImport("dataclasses").getMember("dataclass").getACall() and + decorator.asExpr() = cls.getADecorator() and + not decorator.getArgByName("eq").asExpr().(ImmutableLiteral).booleanValue() = false + ) + or + // bare @dataclass without arguments also generates __eq__ + cls.getADecorator() = + API::moduleImport("dataclasses").getMember("dataclass").getAValueReachableFromSource().asExpr() +} + /** * Holds if `cls` is a user-defined class whose instances are unhashable. - * A new-style class without `__hash__` is unhashable, as is one that explicitly - * sets `__hash__ = None`. + * + * In Python, a class is unhashable if: + * - It explicitly sets `__hash__ = None`, or + * - It defines `__eq__` (directly or via decorator like `@dataclass`) without + * defining `__hash__`, and doesn't have a decorator that generates `__hash__`. */ predicate isUnhashableUserClass(Class cls) { - DuckTyping::isNewStyle(cls) and - not DuckTyping::hasMethod(cls, "__hash__") and - not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) - or setsHashToNone(cls) + or + // In Python 3, defining __eq__ without __hash__ makes a class unhashable. + // This rule does not apply in Python 2. + major_version() = 3 and + DuckTyping::isNewStyle(cls) and + definesEq(cls) and + not DuckTyping::hasMethod(cls, "__hash__") and + not hasDataclassHash(cls) and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) } /** diff --git a/python/ql/test/3/query-tests/Expressions/general/HashedButNoHash.expected b/python/ql/test/3/query-tests/Expressions/general/HashedButNoHash.expected new file mode 100644 index 00000000000..c9354099a12 --- /dev/null +++ b/python/ql/test/3/query-tests/Expressions/general/HashedButNoHash.expected @@ -0,0 +1,2 @@ +| test.py:17:17:17:19 | ControlFlowNode for obj | This $@ of $@ is unhashable. | test.py:16:11:16:23 | ControlFlowNode for HasEqNoHash() | instance | test.py:16:11:16:23 | ControlFlowNode for HasEqNoHash() | HasEqNoHash | +| test.py:48:17:48:17 | ControlFlowNode for p | This $@ of $@ is unhashable. | test.py:47:9:47:27 | ControlFlowNode for MutableDataclass() | instance | test.py:47:9:47:27 | ControlFlowNode for MutableDataclass() | MutableDataclass | diff --git a/python/ql/test/3/query-tests/Expressions/general/HashedButNoHash.qlref b/python/ql/test/3/query-tests/Expressions/general/HashedButNoHash.qlref new file mode 100644 index 00000000000..5842132bd43 --- /dev/null +++ b/python/ql/test/3/query-tests/Expressions/general/HashedButNoHash.qlref @@ -0,0 +1 @@ +Expressions/HashedButNoHash.ql diff --git a/python/ql/test/3/query-tests/Expressions/general/test.py b/python/ql/test/3/query-tests/Expressions/general/test.py new file mode 100644 index 00000000000..3c900c19f0b --- /dev/null +++ b/python/ql/test/3/query-tests/Expressions/general/test.py @@ -0,0 +1,48 @@ +# Classes without __eq__ are hashable (inherit __hash__ from object) +class NoEqClass: + def __init__(self, x): + self.x = x + +def hash_no_eq(): + obj = NoEqClass(1) + return hash(obj) # OK: no __eq__, so __hash__ is inherited from object + +# Classes with __eq__ but no __hash__ are unhashable +class HasEqNoHash: + def __eq__(self, other): + return self.x == other.x + +def hash_eq_no_hash(): + obj = HasEqNoHash() + return hash(obj) # should be flagged + +# @dataclass(frozen=True) generates __hash__, so instances are hashable +from dataclasses import dataclass + +@dataclass(frozen=True) +class FrozenPoint: + x: int + y: int + +def hash_frozen_dataclass(): + p = FrozenPoint(1, 2) + return hash(p) # OK: frozen dataclass has __hash__ + +# @dataclass(unsafe_hash=True) also generates __hash__ +@dataclass(unsafe_hash=True) +class UnsafeHashPoint: + x: int + y: int + +def hash_unsafe_hash_dataclass(): + p = UnsafeHashPoint(1, 2) + return hash(p) # OK: unsafe_hash=True generates __hash__ + +# Plain @dataclass without frozen/unsafe_hash and with __eq__ is unhashable +@dataclass +class MutableDataclass: + x: int + +def hash_mutable_dataclass(): + p = MutableDataclass(1) + return hash(p) # should be flagged: @dataclass generates __eq__ but not __hash__ diff --git a/python/ql/test/query-tests/Expressions/general/expressions_test.py b/python/ql/test/query-tests/Expressions/general/expressions_test.py index 5e07b58e204..90a3c635748 100644 --- a/python/ql/test/query-tests/Expressions/general/expressions_test.py +++ b/python/ql/test/query-tests/Expressions/general/expressions_test.py @@ -279,3 +279,12 @@ def useofapply(): def apply(f): pass apply(foo)([1]) + +# NoEqClass is hashable (no __eq__ => inherits __hash__ from object). +class NoEqClass: + def __init__(self, x): + self.x = x + +def hash_no_eq(): + obj = NoEqClass(1) + return hash(obj) # OK: no __eq__, so __hash__ is inherited from object