Files
codeql/python/ql/src/Expressions/HashedButNoHash.ql
Taus c4a4e20be0 Python: Port HashedButNoHash.ql
This one is a bit more involved. Of note is the fact that it at present
only uses local flow when determining the origin of some value (whereas
the points-to version used global flow). It may be desirable to rewrite
this query to use global data-flow, but this should be done with some
care (as using "all unhashable objects" as the set of sources is
somewhat iffy with respect to performance). For that reason, I'm
sticking to mostly local flow (except for well behaved things like types
and built-ins).
2026-03-09 17:13:03 +00:00

109 lines
3.2 KiB
Plaintext

/**
* @name Unhashable object hashed
* @description Hashing an object which is not hashable will result in a TypeError at runtime.
* @kind problem
* @tags quality
* reliability
* correctness
* @problem.severity error
* @sub-severity low
* @precision very-high
* @id py/hash-unhashable-value
*/
import python
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowDispatch
private import semmle.python.ApiGraphs
/**
* Holds if `cls` explicitly sets `__hash__` to `None`, making instances unhashable.
*/
predicate setsHashToNone(Class cls) {
DuckTyping::getAnAttributeValue(cls, "__hash__") instanceof None
}
/**
* Holds if `cls` is a user-defined class whose instances are unhashable.
* A new-style class without `__hash__` is unhashable, as is one that explicitly
* sets `__hash__ = None`.
*/
predicate isUnhashableUserClass(Class cls) {
DuckTyping::isNewStyle(cls) and
not DuckTyping::hasMethod(cls, "__hash__") and
not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls))
or
setsHashToNone(cls)
}
/**
* Gets the name of a builtin type whose instances are unhashable.
*/
string getUnhashableBuiltinName() { result = ["list", "set", "dict", "bytearray"] }
/**
* Holds if `origin` is a local source node tracking an unhashable instance that
* flows to `node`, with `clsName` describing the class for the alert.
*/
predicate isUnhashable(DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName) {
exists(Class c |
isUnhashableUserClass(c) and
origin = classInstanceTracker(c) and
origin.flowsTo(node) and
clsName = c.getName()
)
or
clsName = getUnhashableBuiltinName() and
origin = API::builtin(clsName).getAnInstance().asSource() and
origin.flowsTo(node)
}
predicate explicitly_hashed(DataFlow::Node node) {
node = API::builtin("hash").getACall().getArg(0)
}
/**
* Holds if the subscript object in `sub[...]` is known to use hashing for indexing,
* i.e. it does not have a custom `__getitem__` that could accept unhashable indices.
*/
predicate subscriptUsesHashing(Subscript sub) {
DataFlow::exprNode(sub.getObject()) =
API::builtin("dict").getAnInstance().getAValueReachableFromSource()
or
exists(Class cls |
classInstanceTracker(cls)
.(DataFlow::LocalSourceNode)
.flowsTo(DataFlow::exprNode(sub.getObject())) and
not DuckTyping::hasMethod(cls, "__getitem__")
)
}
predicate unhashable_subscript(DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName) {
exists(Subscript sub |
node = DataFlow::exprNode(sub.getIndex()) and
subscriptUsesHashing(sub)
|
isUnhashable(origin, node, clsName)
)
}
/**
* Holds if `e` is inside a `try` that catches `TypeError`.
*/
predicate typeerror_is_caught(Expr e) {
exists(Try try |
try.getBody().contains(e) and
try.getAHandler().getType() = API::builtin("TypeError").getAValueReachableFromSource().asExpr()
)
}
from DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName
where
not typeerror_is_caught(node.asExpr()) and
(
explicitly_hashed(node) and isUnhashable(origin, node, clsName)
or
unhashable_subscript(origin, node, clsName)
)
select node, "This $@ of $@ is unhashable.", origin, "instance", origin, clsName