import python import semmle.python.ApiGraphs import experimental.cryptography.CryptoArtifact private import experimental.cryptography.utils.Utils as Utils private import experimental.cryptography.CryptoAlgorithmNames /** * `hashlib` is a ptyhon standard library module for hashing algorithms. * https://docs.python.org/3/library/hashlib.html * This is an abstract class to reference all hashlib artifacts. */ // ----------------------------------------------- // Hash Artifacts // ----------------------------------------------- module Hashes{ /** * Represents a hash algorithm used by `hashlib.new`, where the hash algorithm is a string in the first argument. */ class HashlibNewHashAlgorithm extends HashAlgorithm { HashlibNewHashAlgorithm(){ this = Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib").getMember("new").getACall().getParameter(0, "name")) } override string getName(){ result = super.normalizeName(this.asExpr().(StrConst).getText()) or // if not a known/static string, assume from an outside source and the algorithm is UNKNOWN (not this.asExpr() instanceof StrConst and result = unknownAlgorithm()) } } /** * Identifies hashlib.pbdkf2_hmac calls, identifying the hash algorithm used * in the hmac (matching kdf is handled separately by `HashlibPbkdf2HMACArtifact`). * * https://docs.python.org/3/library/hashlib.html#hashlib.pbkdf2_hmac */ class HashlibPbkdf2HMACHashAlgorithm extends HashAlgorithm { HashlibPbkdf2HMACHashAlgorithm(){ this = Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib").getMember("pbkdf2_hmac").getACall().getParameter(0, "hash_name")) } override string getName(){ result = super.normalizeName(this.asExpr().(StrConst).getText()) or // if not a known/static string, assume from an outside source and the algorithm is UNKNOWN (not this.asExpr() instanceof StrConst and result = unknownAlgorithm()) } } /** * Gets a call to `hashlib.file_digest` where the hash algorithm is the first argument in `digest` * `nameSrc` is the source of the first argument. * * https://docs.python.org/3/library/hashlib.html#hashlib.file_digest * * NOTE: the digest argument can be, in addition to a string, * a callable that returns a hash object or a hash constructor. * These cases are not considered here since they would be detected separately. * Specifically, other non-string cases are themselves considered sources for alerts, e.g., * references to hashlib.sha512 is found by `HashlibMemberAlgorithm`. * The only exception is if the source is not a string constant or HashlibMemberAlgorithm. * In these cases, the algorithm is considered 'UNKNOWN'. * */ class HashlibFileDigestAlgorithm extends HashAlgorithm { HashlibFileDigestAlgorithm(){ this = Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib").getMember("file_digest").getACall().getParameter(1, "digest")) and // Ignore sources that are hash constructors, allow `HashlibMemberAlgorithm` to detect these this != hashlibMemberHashAlgorithm(_) // Ignore sources that are HMAC objects, to be handled by HmacModule and this != API::moduleImport("hmac").getMember("new").getACall() and this != API::moduleImport("hmac").getMember("HMAC").getACall() } override string getName(){ // Name is a string constant or consider the name unknown // NOTE: we are excluding hmac.new and hmac.HMAC constructor calls so we are expecting // a string or an outside configuration only result = super.normalizeName(this.asExpr().(StrConst).getText()) or ( not this.asExpr() instanceof StrConst and result = unknownAlgorithm() ) } } /** * Gets a member access of hashlib that is an algorithm invocation. * `hashName` is the name of the hash algorithm. * * Note: oringally a variant of this predicate was in codeql/github/main * to a predicate to avoid a bad join order. */ pragma[nomagic] // Copying use of nomagic from similar predicate in codeql/main DataFlow::Node hashlibMemberHashAlgorithm(string hashName) { result = API::moduleImport("hashlib").getMember(hashName).asSource() and // Don't matches known non-hash members not hashName in ["new", "pbkdf2_hmac", "algorithms_available", "algorithms_guaranteed", "file_digest"] // Don't match things like __file__ and not hashName.regexpMatch("_.*") } /** * Identifies hashing algorithm members (i.e., functions) of the `hashlib` module, * e.g., `hashlib.sha512`. */ class HashlibMemberAlgorithm extends HashAlgorithm { HashlibMemberAlgorithm(){ this = hashlibMemberHashAlgorithm(_) } override string getName(){ exists(string rawName | result = super.normalizeName(rawName) and this = hashlibMemberHashAlgorithm(rawName) ) } } } // ----------------------------------------------- // Key Derivation Functions // ----------------------------------------------- module KDF{ // NOTE: Only finds the params of `pbkdf2_hmac` that are non-optional // dk_len is optional, i.e., can be None, and if addressed in this predicate // would result in an unsatisfiable predicate. predicate hashlibPBDKF2HMACKDFRequiredParams(HashlibPbkdf2HMACOperation kdf, API::Node hashParam, API::Node saltParam, API::Node iterationParam){ kdf.getParameter(0, "hash_name") = hashParam and kdf.getParameter(2, "salt") = saltParam and kdf.getParameter(3, "iterations") = iterationParam } predicate hashlibPBDKF2HMACKDFOptionalParams(HashlibPbkdf2HMACOperation kdf, API::Node keylenParam){ kdf.getParameter(4, "dklen") = keylenParam } /** * Identifies kery derivation function hashlib.pbdkf2_hmac accesses. * https://docs.python.org/3/library/hashlib.html#hashlib.pbkdf2_hmac */ class HashlibPbkdf2HMACOperation extends KeyDerivationAlgorithm, KeyDerivationOperation { HashlibPbkdf2HMACOperation(){ this = API::moduleImport("hashlib").getMember("pbkdf2_hmac").getACall() } override string getName(){ result = super.normalizeName("pbkdf2_hmac") } override DataFlow::Node getIterationSizeSrc(){ exists(API::Node it | hashlibPBDKF2HMACKDFRequiredParams(this, _, _, it) | result = Utils::getUltimateSrcFromApiNode(it) ) } override DataFlow::Node getSaltConfigSrc(){ exists(API::Node s | hashlibPBDKF2HMACKDFRequiredParams(this, _, s, _) | result = Utils::getUltimateSrcFromApiNode(s) ) } override DataFlow::Node getHashConfigSrc(){ exists(API::Node h | hashlibPBDKF2HMACKDFRequiredParams(this,h,_,_) | result = Utils::getUltimateSrcFromApiNode(h) ) } override DataFlow::Node getDerivedKeySizeSrc(){ exists(API::Node dk | hashlibPBDKF2HMACKDFOptionalParams(this,dk) | result = Utils::getUltimateSrcFromApiNode(dk) ) } // TODO: if DK is none, then the length is based on the hash type, if hash length not known, must call this unknown // The issue is the src is what we model not the size // For now, we are not modeling this and are relying on the fact that the accepted hashes are of accepted length. // I.e., any query looking at length will ignore cases where it is unknown override KeyDerivationAlgorithm getAlgorithm(){ result = this } override predicate requiresHash(){ any() } override predicate requiresMode(){none()} override predicate requiresSalt(){any()} override predicate requiresIteration(){any()} } // TODO: better modeling of scrypt /** * Identifies key derivation fucntion hashlib.scrypt accesses. */ class HashlibScryptAlgorithm extends KeyDerivationAlgorithm, KeyDerivationOperation { HashlibScryptAlgorithm(){ this = API::moduleImport("hashlib").getMember("scrypt").getACall() } override string getName(){ result = super.normalizeName("scrypt") } override DataFlow::Node getIterationSizeSrc(){ none() } override DataFlow::Node getSaltConfigSrc(){ // TODO: need to address getting salt from params, unsure how this works in CodeQL // since the signature is defined as hashlib.scrypt(password, *, salt, n, r, p, maxmem=0, dklen=64) // What position is 'salt' then such that we can reliably extract it? none() } override DataFlow::Node getHashConfigSrc(){ none() } override DataFlow::Node getDerivedKeySizeSrc(){ //TODO: see comment for getSaltConfigSrc above none() } override KeyDerivationAlgorithm getAlgorithm(){ result = this } override predicate requiresHash(){ none() } override predicate requiresMode(){none()} override predicate requiresSalt(){any()} override predicate requiresIteration(){none()} } }