Files
codeql/python/ql/src/Expressions/HashedButNoHash.ql
Taus Brock-Nannestad f07a7bf8cf Python: Autoformat everything using qlformat.
Will need subsequent PRs fixing up test failures (due to deprecated
methods moving around), but other than that everything should be
straight-forward.
2020-07-07 15:43:52 +02:00

86 lines
2.6 KiB
Plaintext

/**
* @name Unhashable object hashed
* @description Hashing an object which is not hashable will result in a TypeError at runtime.
* @kind problem
* @tags reliability
* correctness
* @problem.severity error
* @sub-severity low
* @precision very-high
* @id py/hash-unhashable-value
*/
import python
/*
* This assumes that any indexing operation where the value is not a sequence or numpy array involves hashing.
* For sequences, the index must be an int, which are hashable, so we don't need to treat them specially.
* For numpy arrays, the index may be a list, which are not hashable and needs to be treated specially.
*/
predicate numpy_array_type(ClassValue na) {
exists(ModuleValue np | np.getName() = "numpy" or np.getName() = "numpy.core" |
na.getASuperType() = np.attr("ndarray")
)
}
predicate has_custom_getitem(Value v) {
v.getClass().lookup("__getitem__") instanceof PythonFunctionValue
or
numpy_array_type(v.getClass())
}
predicate explicitly_hashed(ControlFlowNode f) {
exists(CallNode c, GlobalVariable hash |
c.getArg(0) = f and c.getFunction().(NameNode).uses(hash) and hash.getId() = "hash"
)
}
predicate unhashable_subscript(ControlFlowNode f, ClassValue c, ControlFlowNode origin) {
is_unhashable(f, c, origin) and
exists(SubscriptNode sub | sub.getIndex() = f |
exists(Value custom_getitem |
sub.getObject().pointsTo(custom_getitem) and
not has_custom_getitem(custom_getitem)
)
)
}
predicate is_unhashable(ControlFlowNode f, ClassValue cls, ControlFlowNode origin) {
exists(Value v | f.pointsTo(v, origin) and v.getClass() = cls |
not cls.hasAttribute("__hash__") and not cls.failedInference(_) and cls.isNewStyle()
or
cls.lookup("__hash__") = Value::named("None")
)
}
/**
* Holds if `f` is inside a `try` that catches `TypeError`. For example:
*
* try:
* ... f ...
* except TypeError:
* ...
*
* This predicate is used to eliminate false positive results. If `hash`
* is called on an unhashable object then a `TypeError` will be thrown.
* But this is not a bug if the code catches the `TypeError` and handles
* it.
*/
predicate typeerror_is_caught(ControlFlowNode f) {
exists(Try try |
try.getBody().contains(f.getNode()) and
try.getAHandler().getType().pointsTo(ClassValue::typeError())
)
}
from ControlFlowNode f, ClassValue c, ControlFlowNode origin
where
not typeerror_is_caught(f) and
(
explicitly_hashed(f) and is_unhashable(f, c, origin)
or
unhashable_subscript(f, c, origin)
)
select f.getNode(), "This $@ of $@ is unhashable.", origin, "instance", c, c.getQualifiedName()