mirror of
https://github.com/github/codeql.git
synced 2025-12-26 13:46:31 +01:00
Will need subsequent PRs fixing up test failures (due to deprecated methods moving around), but other than that everything should be straight-forward.
86 lines
2.6 KiB
Plaintext
86 lines
2.6 KiB
Plaintext
/**
|
|
* @name Unhashable object hashed
|
|
* @description Hashing an object which is not hashable will result in a TypeError at runtime.
|
|
* @kind problem
|
|
* @tags reliability
|
|
* correctness
|
|
* @problem.severity error
|
|
* @sub-severity low
|
|
* @precision very-high
|
|
* @id py/hash-unhashable-value
|
|
*/
|
|
|
|
import python
|
|
|
|
/*
|
|
* This assumes that any indexing operation where the value is not a sequence or numpy array involves hashing.
|
|
* For sequences, the index must be an int, which are hashable, so we don't need to treat them specially.
|
|
* For numpy arrays, the index may be a list, which are not hashable and needs to be treated specially.
|
|
*/
|
|
|
|
predicate numpy_array_type(ClassValue na) {
|
|
exists(ModuleValue np | np.getName() = "numpy" or np.getName() = "numpy.core" |
|
|
na.getASuperType() = np.attr("ndarray")
|
|
)
|
|
}
|
|
|
|
predicate has_custom_getitem(Value v) {
|
|
v.getClass().lookup("__getitem__") instanceof PythonFunctionValue
|
|
or
|
|
numpy_array_type(v.getClass())
|
|
}
|
|
|
|
predicate explicitly_hashed(ControlFlowNode f) {
|
|
exists(CallNode c, GlobalVariable hash |
|
|
c.getArg(0) = f and c.getFunction().(NameNode).uses(hash) and hash.getId() = "hash"
|
|
)
|
|
}
|
|
|
|
predicate unhashable_subscript(ControlFlowNode f, ClassValue c, ControlFlowNode origin) {
|
|
is_unhashable(f, c, origin) and
|
|
exists(SubscriptNode sub | sub.getIndex() = f |
|
|
exists(Value custom_getitem |
|
|
sub.getObject().pointsTo(custom_getitem) and
|
|
not has_custom_getitem(custom_getitem)
|
|
)
|
|
)
|
|
}
|
|
|
|
predicate is_unhashable(ControlFlowNode f, ClassValue cls, ControlFlowNode origin) {
|
|
exists(Value v | f.pointsTo(v, origin) and v.getClass() = cls |
|
|
not cls.hasAttribute("__hash__") and not cls.failedInference(_) and cls.isNewStyle()
|
|
or
|
|
cls.lookup("__hash__") = Value::named("None")
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `f` is inside a `try` that catches `TypeError`. For example:
|
|
*
|
|
* try:
|
|
* ... f ...
|
|
* except TypeError:
|
|
* ...
|
|
*
|
|
* This predicate is used to eliminate false positive results. If `hash`
|
|
* is called on an unhashable object then a `TypeError` will be thrown.
|
|
* But this is not a bug if the code catches the `TypeError` and handles
|
|
* it.
|
|
*/
|
|
predicate typeerror_is_caught(ControlFlowNode f) {
|
|
exists(Try try |
|
|
try.getBody().contains(f.getNode()) and
|
|
try.getAHandler().getType().pointsTo(ClassValue::typeError())
|
|
)
|
|
}
|
|
|
|
from ControlFlowNode f, ClassValue c, ControlFlowNode origin
|
|
where
|
|
not typeerror_is_caught(f) and
|
|
(
|
|
explicitly_hashed(f) and is_unhashable(f, c, origin)
|
|
or
|
|
unhashable_subscript(f, c, origin)
|
|
)
|
|
select f.getNode(), "This $@ of $@ is unhashable.", origin, "instance", c, c.getQualifiedName()
|