mirror of
https://github.com/github/codeql.git
synced 2025-12-17 17:23:36 +01:00
238 lines
8.9 KiB
Plaintext
238 lines
8.9 KiB
Plaintext
import python
|
|
import semmle.python.ApiGraphs
|
|
import experimental.cryptography.CryptoArtifact
|
|
private import experimental.cryptography.utils.Utils as Utils
|
|
private import experimental.cryptography.CryptoAlgorithmNames
|
|
|
|
/**
|
|
* `hashlib` is a ptyhon standard library module for hashing algorithms.
|
|
* https://docs.python.org/3/library/hashlib.html
|
|
* This is an abstract class to reference all hashlib artifacts.
|
|
*/
|
|
// -----------------------------------------------
|
|
// Hash Artifacts
|
|
// -----------------------------------------------
|
|
module Hashes {
|
|
/**
|
|
* Represents a hash algorithm used by `hashlib.new`, where the hash algorithm is a string in the first argument.
|
|
*/
|
|
class HashlibNewHashAlgorithm extends HashAlgorithm {
|
|
HashlibNewHashAlgorithm() {
|
|
this =
|
|
Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib")
|
|
.getMember("new")
|
|
.getACall()
|
|
.getParameter(0, "name"))
|
|
}
|
|
|
|
override string getName() {
|
|
result = super.normalizeName(this.asExpr().(StringLiteral).getText())
|
|
or
|
|
// if not a known/static string, assume from an outside source and the algorithm is UNKNOWN
|
|
not this.asExpr() instanceof StringLiteral and result = unknownAlgorithm()
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Identifies hashlib.pbdkf2_hmac calls, identifying the hash algorithm used
|
|
* in the hmac (matching kdf is handled separately by `HashlibPbkdf2HMACArtifact`).
|
|
*
|
|
* https://docs.python.org/3/library/hashlib.html#hashlib.pbkdf2_hmac
|
|
*/
|
|
class HashlibPbkdf2HMACHashAlgorithm extends HashAlgorithm {
|
|
HashlibPbkdf2HMACHashAlgorithm() {
|
|
this =
|
|
Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib")
|
|
.getMember("pbkdf2_hmac")
|
|
.getACall()
|
|
.getParameter(0, "hash_name"))
|
|
}
|
|
|
|
override string getName() {
|
|
result = super.normalizeName(this.asExpr().(StringLiteral).getText())
|
|
or
|
|
// if not a known/static string, assume from an outside source and the algorithm is UNKNOWN
|
|
not this.asExpr() instanceof StringLiteral and result = unknownAlgorithm()
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets a call to `hashlib.file_digest` where the hash algorithm is the first argument in `digest`
|
|
* `nameSrc` is the source of the first argument.
|
|
*
|
|
* https://docs.python.org/3/library/hashlib.html#hashlib.file_digest
|
|
*
|
|
* NOTE: the digest argument can be, in addition to a string,
|
|
* a callable that returns a hash object or a hash constructor.
|
|
* These cases are not considered here since they would be detected separately.
|
|
* Specifically, other non-string cases are themselves considered sources for alerts, e.g.,
|
|
* references to hashlib.sha512 is found by `HashlibMemberAlgorithm`.
|
|
* The only exception is if the source is not a string constant or HashlibMemberAlgorithm.
|
|
* In these cases, the algorithm is considered 'UNKNOWN'.
|
|
*/
|
|
class HashlibFileDigestAlgorithm extends HashAlgorithm {
|
|
HashlibFileDigestAlgorithm() {
|
|
this =
|
|
Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib")
|
|
.getMember("file_digest")
|
|
.getACall()
|
|
.getParameter(1, "digest")) and
|
|
// Ignore sources that are hash constructors, allow `HashlibMemberAlgorithm` to detect these
|
|
this != hashlibMemberHashAlgorithm(_) and
|
|
// Ignore sources that are HMAC objects, to be handled by HmacModule
|
|
this != API::moduleImport("hmac").getMember("new").getACall() and
|
|
this != API::moduleImport("hmac").getMember("HMAC").getACall()
|
|
}
|
|
|
|
override string getName() {
|
|
// Name is a string constant or consider the name unknown
|
|
// NOTE: we are excluding hmac.new and hmac.HMAC constructor calls so we are expecting
|
|
// a string or an outside configuration only
|
|
result = super.normalizeName(this.asExpr().(StringLiteral).getText())
|
|
or
|
|
not this.asExpr() instanceof StringLiteral and
|
|
result = unknownAlgorithm()
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets a member access of hashlib that is an algorithm invocation.
|
|
* `hashName` is the name of the hash algorithm.
|
|
*
|
|
* Note: oringally a variant of this predicate was in codeql/github/main
|
|
* to a predicate to avoid a bad join order.
|
|
*/
|
|
// Copying use of nomagic from similar predicate in codeql/main
|
|
pragma[nomagic]
|
|
DataFlow::Node hashlibMemberHashAlgorithm(string hashName) {
|
|
result = API::moduleImport("hashlib").getMember(hashName).asSource() and
|
|
// Don't matches known non-hash members
|
|
not hashName in [
|
|
"new", "pbkdf2_hmac", "algorithms_available", "algorithms_guaranteed", "file_digest"
|
|
] and
|
|
// Don't match things like __file__
|
|
not hashName.regexpMatch("_.*")
|
|
}
|
|
|
|
/**
|
|
* Identifies hashing algorithm members (i.e., functions) of the `hashlib` module,
|
|
* e.g., `hashlib.sha512`.
|
|
*/
|
|
class HashlibMemberAlgorithm extends HashAlgorithm {
|
|
HashlibMemberAlgorithm() { this = hashlibMemberHashAlgorithm(_) }
|
|
|
|
override string getName() {
|
|
exists(string rawName |
|
|
result = super.normalizeName(rawName) and this = hashlibMemberHashAlgorithm(rawName)
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
// -----------------------------------------------
|
|
// Key Derivation Functions
|
|
// -----------------------------------------------
|
|
module KDF {
|
|
// NOTE: Only finds the params of `pbkdf2_hmac` that are non-optional
|
|
// dk_len is optional, i.e., can be None, and if addressed in this predicate
|
|
// would result in an unsatisfiable predicate.
|
|
predicate hashlibPBDKF2HMACKDFRequiredParams(
|
|
HashlibPbkdf2HMACOperation kdf, API::Node hashParam, API::Node saltParam,
|
|
API::Node iterationParam
|
|
) {
|
|
kdf.getParameter(0, "hash_name") = hashParam and
|
|
kdf.getParameter(2, "salt") = saltParam and
|
|
kdf.getParameter(3, "iterations") = iterationParam
|
|
}
|
|
|
|
predicate hashlibPBDKF2HMACKDFOptionalParams(HashlibPbkdf2HMACOperation kdf, API::Node keylenParam) {
|
|
kdf.getParameter(4, "dklen") = keylenParam
|
|
}
|
|
|
|
/**
|
|
* Identifies kery derivation function hashlib.pbdkf2_hmac accesses.
|
|
* https://docs.python.org/3/library/hashlib.html#hashlib.pbkdf2_hmac
|
|
*/
|
|
class HashlibPbkdf2HMACOperation extends KeyDerivationAlgorithm, KeyDerivationOperation {
|
|
HashlibPbkdf2HMACOperation() {
|
|
this = API::moduleImport("hashlib").getMember("pbkdf2_hmac").getACall()
|
|
}
|
|
|
|
override string getName() { result = super.normalizeName("pbkdf2_hmac") }
|
|
|
|
override DataFlow::Node getIterationSizeSrc() {
|
|
exists(API::Node it | hashlibPBDKF2HMACKDFRequiredParams(this, _, _, it) |
|
|
result = Utils::getUltimateSrcFromApiNode(it)
|
|
)
|
|
}
|
|
|
|
override DataFlow::Node getSaltConfigSrc() {
|
|
exists(API::Node s | hashlibPBDKF2HMACKDFRequiredParams(this, _, s, _) |
|
|
result = Utils::getUltimateSrcFromApiNode(s)
|
|
)
|
|
}
|
|
|
|
override DataFlow::Node getHashConfigSrc() {
|
|
exists(API::Node h | hashlibPBDKF2HMACKDFRequiredParams(this, h, _, _) |
|
|
result = Utils::getUltimateSrcFromApiNode(h)
|
|
)
|
|
}
|
|
|
|
override DataFlow::Node getDerivedKeySizeSrc() {
|
|
exists(API::Node dk | hashlibPBDKF2HMACKDFOptionalParams(this, dk) |
|
|
result = Utils::getUltimateSrcFromApiNode(dk)
|
|
)
|
|
}
|
|
|
|
// TODO: if DK is none, then the length is based on the hash type, if hash length not known, must call this unknown
|
|
// The issue is the src is what we model not the size
|
|
// For now, we are not modeling this and are relying on the fact that the accepted hashes are of accepted length.
|
|
// I.e., any query looking at length will ignore cases where it is unknown
|
|
override KeyDerivationAlgorithm getAlgorithm() { result = this }
|
|
|
|
override predicate requiresHash() { any() }
|
|
|
|
override predicate requiresMode() { none() }
|
|
|
|
override predicate requiresSalt() { any() }
|
|
|
|
override predicate requiresIteration() { any() }
|
|
}
|
|
|
|
// TODO: better modeling of scrypt
|
|
/**
|
|
* Identifies key derivation function hashlib.scrypt accesses.
|
|
*/
|
|
class HashlibScryptAlgorithm extends KeyDerivationAlgorithm, KeyDerivationOperation {
|
|
HashlibScryptAlgorithm() { this = API::moduleImport("hashlib").getMember("scrypt").getACall() }
|
|
|
|
override string getName() { result = super.normalizeName("scrypt") }
|
|
|
|
override DataFlow::Node getIterationSizeSrc() { none() }
|
|
|
|
override DataFlow::Node getSaltConfigSrc() {
|
|
// TODO: need to address getting salt from params, unsure how this works in CodeQL
|
|
// since the signature is defined as hashlib.scrypt(password, *, salt, n, r, p, maxmem=0, dklen=64)
|
|
// What position is 'salt' then such that we can reliably extract it?
|
|
none()
|
|
}
|
|
|
|
override DataFlow::Node getHashConfigSrc() { none() }
|
|
|
|
override DataFlow::Node getDerivedKeySizeSrc() {
|
|
//TODO: see comment for getSaltConfigSrc above
|
|
none()
|
|
}
|
|
|
|
override KeyDerivationAlgorithm getAlgorithm() { result = this }
|
|
|
|
override predicate requiresHash() { none() }
|
|
|
|
override predicate requiresMode() { none() }
|
|
|
|
override predicate requiresSalt() { any() }
|
|
|
|
override predicate requiresIteration() { none() }
|
|
}
|
|
}
|