Moved Cpp into sub directory 'cryptography' instead of crypto. Added python models, inventory, and example alerts.

This commit is contained in:
Benjamin Rodes
2023-09-15 12:12:01 -04:00
committed by Josh Brown
parent 7560db66fa
commit 50db4fd63e
73 changed files with 2763 additions and 44 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,258 @@
import python
import semmle.python.ApiGraphs
import experimental.cryptography.CryptoArtifact
private import experimental.cryptography.utils.Utils as Utils
private import experimental.cryptography.CryptoAlgorithmNames
/**
* `hashlib` is a ptyhon standard library module for hashing algorithms.
* https://docs.python.org/3/library/hashlib.html
* This is an abstract class to reference all hashlib artifacts.
*/
// -----------------------------------------------
// Hash Artifacts
// -----------------------------------------------
module Hashes{
/**
* Represents a hash algorithm used by `hashlib.new`, where the hash algorithm is a string in the first argument.
*/
class HashlibNewHashAlgorithm extends HashAlgorithm
{
HashlibNewHashAlgorithm(){
this = Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib").getMember("new").getACall().getParameter(0, "name"))
}
override string getName(){
result = super.normalizeName(this.asExpr().(StrConst).getText())
or
// if not a known/static string, assume from an outside source and the algorithm is UNKNOWN
(not this.asExpr() instanceof StrConst and result = unknownAlgorithm())
}
}
/**
* Identifies hashlib.pbdkf2_hmac calls, identifying the hash algorithm used
* in the hmac (matching kdf is handled separately by `HashlibPbkdf2HMACArtifact`).
*
* https://docs.python.org/3/library/hashlib.html#hashlib.pbkdf2_hmac
*/
class HashlibPbkdf2HMACHashAlgorithm extends HashAlgorithm
{
HashlibPbkdf2HMACHashAlgorithm(){
this = Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib").getMember("pbkdf2_hmac").getACall().getParameter(0, "hash_name"))
}
override string getName(){
result = super.normalizeName(this.asExpr().(StrConst).getText())
or
// if not a known/static string, assume from an outside source and the algorithm is UNKNOWN
(not this.asExpr() instanceof StrConst and result = unknownAlgorithm())
}
}
/**
* Gets a call to `hashlib.file_digest` where the hash algorithm is the first argument in `digest`
* `nameSrc` is the source of the first argument.
*
* https://docs.python.org/3/library/hashlib.html#hashlib.file_digest
*
* NOTE: the digest argument can be, in addition to a string,
* a callable that returns a hash object or a hash constructor.
* These cases are not considered here since they would be detected separately.
* Specifically, other non-string cases are themselves considered sources for alerts, e.g.,
* references to hashlib.sha512 is found by `HashlibMemberAlgorithm`.
* The only exception is if the source is not a string constant or HashlibMemberAlgorithm.
* In these cases, the algorithm is considered 'UNKNOWN'.
*
*/
class HashlibFileDigestAlgorithm extends HashAlgorithm
{
HashlibFileDigestAlgorithm(){
this = Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib").getMember("file_digest").getACall().getParameter(1, "digest"))
and
// Ignore sources that are hash constructors, allow `HashlibMemberAlgorithm` to detect these
this != hashlibMemberHashAlgorithm(_)
// Ignore sources that are HMAC objects, to be handled by HmacModule
and
this != API::moduleImport("hmac").getMember("new").getACall()
and
this != API::moduleImport("hmac").getMember("HMAC").getACall()
}
override string getName(){
// Name is a string constant or consider the name unknown
// NOTE: we are excluding hmac.new and hmac.HMAC constructor calls so we are expecting
// a string or an outside configuration only
result = super.normalizeName(this.asExpr().(StrConst).getText())
or
(
not this.asExpr() instanceof StrConst
and
result = unknownAlgorithm()
)
}
}
/**
* Gets a member access of hashlib that is an algorithm invocation.
* `hashName` is the name of the hash algorithm.
*
* Note: oringally a variant of this predicate was in codeql/github/main
* to a predicate to avoid a bad join order.
*/
pragma[nomagic] // Copying use of nomagic from similar predicate in codeql/main
DataFlow::Node hashlibMemberHashAlgorithm(string hashName) {
result = API::moduleImport("hashlib").getMember(hashName).asSource() and
// Don't matches known non-hash members
not hashName in ["new", "pbkdf2_hmac", "algorithms_available",
"algorithms_guaranteed", "file_digest"]
// Don't match things like __file__
and not hashName.regexpMatch("_.*")
}
/**
* Identifies hashing algorithm members (i.e., functions) of the `hashlib` module,
* e.g., `hashlib.sha512`.
*/
class HashlibMemberAlgorithm extends HashAlgorithm
{
HashlibMemberAlgorithm(){
this = hashlibMemberHashAlgorithm(_)
}
override string getName(){
exists(string rawName |
result = super.normalizeName(rawName) and this = hashlibMemberHashAlgorithm(rawName)
)
}
}
}
// -----------------------------------------------
// Key Derivation Functions
// -----------------------------------------------
module KDF{
// NOTE: Only finds the params of `pbkdf2_hmac` that are non-optional
// dk_len is optional, i.e., can be None, and if addressed in this predicate
// would result in an unsatisfiable predicate.
predicate hashlibPBDKF2HMACKDFRequiredParams(HashlibPbkdf2HMACOperation kdf,
API::Node hashParam, API::Node saltParam, API::Node iterationParam){
kdf.getParameter(0, "hash_name") = hashParam and
kdf.getParameter(2, "salt") = saltParam and
kdf.getParameter(3, "iterations") = iterationParam
}
predicate hashlibPBDKF2HMACKDFOptionalParams(HashlibPbkdf2HMACOperation kdf, API::Node keylenParam){
kdf.getParameter(4, "dklen") = keylenParam
}
/**
* Identifies kery derivation function hashlib.pbdkf2_hmac accesses.
* https://docs.python.org/3/library/hashlib.html#hashlib.pbkdf2_hmac
*/
class HashlibPbkdf2HMACOperation extends KeyDerivationAlgorithm, KeyDerivationOperation
{
HashlibPbkdf2HMACOperation(){
this = API::moduleImport("hashlib").getMember("pbkdf2_hmac").getACall()
}
override string getName(){
result = super.normalizeName("pbkdf2_hmac")
}
override DataFlow::Node getIterationSizeSrc(){
exists(API::Node it | hashlibPBDKF2HMACKDFRequiredParams(this, _, _, it) |
result = Utils::getUltimateSrcFromApiNode(it)
)
}
override DataFlow::Node getSaltConfigSrc(){
exists(API::Node s | hashlibPBDKF2HMACKDFRequiredParams(this, _, s, _) |
result = Utils::getUltimateSrcFromApiNode(s)
)
}
override DataFlow::Node getHashConfigSrc(){
exists(API::Node h | hashlibPBDKF2HMACKDFRequiredParams(this,h,_,_) |
result = Utils::getUltimateSrcFromApiNode(h)
)
}
override DataFlow::Node getDerivedKeySizeSrc(){
exists(API::Node dk | hashlibPBDKF2HMACKDFOptionalParams(this,dk) |
result = Utils::getUltimateSrcFromApiNode(dk)
)
}
// TODO: if DK is none, then the length is based on the hash type, if hash length not known, must call this unknown
// The issue is the src is what we model not the size
// For now, we are not modeling this and are relying on the fact that the accepted hashes are of accepted length.
// I.e., any query looking at length will ignore cases where it is unknown
override KeyDerivationAlgorithm getAlgorithm(){
result = this
}
override predicate requiresHash(){ any() }
override predicate requiresMode(){none()}
override predicate requiresSalt(){any()}
override predicate requiresIteration(){any()}
}
// TODO: better modeling of scrypt
/**
* Identifies key derivation fucntion hashlib.scrypt accesses.
*/
class HashlibScryptAlgorithm extends KeyDerivationAlgorithm, KeyDerivationOperation
{
HashlibScryptAlgorithm(){
this = API::moduleImport("hashlib").getMember("scrypt").getACall()
}
override string getName(){
result = super.normalizeName("scrypt")
}
override DataFlow::Node getIterationSizeSrc(){
none()
}
override DataFlow::Node getSaltConfigSrc(){
// TODO: need to address getting salt from params, unsure how this works in CodeQL
// since the signature is defined as hashlib.scrypt(password, *, salt, n, r, p, maxmem=0, dklen=64)
// What position is 'salt' then such that we can reliably extract it?
none()
}
override DataFlow::Node getHashConfigSrc(){
none()
}
override DataFlow::Node getDerivedKeySizeSrc(){
//TODO: see comment for getSaltConfigSrc above
none()
}
override KeyDerivationAlgorithm getAlgorithm(){
result = this
}
override predicate requiresHash(){ none() }
override predicate requiresMode(){none()}
override predicate requiresSalt(){any()}
override predicate requiresIteration(){none()}
}
}

View File

@@ -0,0 +1,80 @@
import python
import semmle.python.ApiGraphs
import experimental.cryptography.CryptoArtifact
private import experimental.cryptography.utils.Utils as Utils
private import experimental.cryptography.CryptoAlgorithmNames
private import experimental.cryptography.modules.stdlib.HashlibModule as HashlibModule
/**
* `hmac` is a ptyhon standard library module for key-based hashing algorithms.
* https://docs.python.org/3/library/hmac.html
*/
// -----------------------------------------------
// Hash Artifacts
// -----------------------------------------------
module Hashes{
class GenericHmacHashCall extends API::CallNode
{
GenericHmacHashCall(){
this = API::moduleImport("hmac").getMember("new").getACall()
or this = API::moduleImport("hmac").getMember("HMAC").getACall()
or this = API::moduleImport("hmac").getMember("digest").getACall()
}
}
DataFlow::Node getDigestModParamSrc(GenericHmacHashCall call){
result = Utils::getUltimateSrcFromApiNode(call.(API::CallNode).getParameter(2, "digestmod"))
}
/**
* This class captures the common behavior for all HMAC operations:
* hmac.HMAC https://docs.python.org/3/library/hmac.html#hmac.HMAC
* hmac.new https://docs.python.org/3/library/hmac.html#hmac.new
* hmac.digest https://docs.python.org/3/library/hmac.html#hmac.digest
* These operations commonly set the algorithm as a string in the third argument (`digestmod`)
* of the operation itself.
*
* NOTE: `digestmod` is the digest name, digest constructor or module for the HMAC object to use, however
* this class only identifies string names. The other forms are found by CryptopgraphicArtifacts,
* modeled in `HmacHMACConsArtifact` and `Hashlib.qll`, specifically through hashlib.new and
* direct member accesses (e.g., hashlib.md5).
*
* Where no `digestmod` exists, the algorithm is assumed to be `md5` per the docs found here:
* https://docs.python.org/3/library/hmac.html#hmac.new
*
* Where `digestmod` exists but is not a string and not a hashlib algorithm, it is assumed
* to be `UNKNOWN`. Note this includes cases wheere the digest is provided as a `A module supporting PEP 247.`
* Such modules are currently not modeled.
*/
class HmacGenericAlgorithm extends HashAlgorithm {
HmacGenericAlgorithm(){
exists(GenericHmacHashCall c |
if not exists(getDigestModParamSrc(c))
then this = c
else this = getDigestModParamSrc(c)
)
and
// Ignore case where the algorithm is a hashlib algorithm, rely on `HashlibMemberAlgorithm` to catch these cases
not this instanceof HashlibModule::Hashes::HashlibMemberAlgorithm
// NOTE: the docs say the digest can be `A module supporting PEP 247.`, however, this is not modeled, and will be considered UNKNOWN
}
override string getName(){
// when the this is a generic hmac call
// it means the algorithm parameter was not identified, assume the default case of 'md5' (per the docs)
if this instanceof GenericHmacHashCall
then result = super.normalizeName("MD5")
else
(
// Else get the string name, if its a string constant, or UNKNOWN if otherwise
result = super.normalizeName(this.asExpr().(StrConst).getText())
or
(not this.asExpr() instanceof StrConst and result = unknownAlgorithm())
)
}
}
}