diff --git a/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll new file mode 100644 index 00000000000..ede88ebf814 --- /dev/null +++ b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll @@ -0,0 +1,188 @@ +/** + * INTERNAL: Do not use. + * + * Provides classes and predicates for identifying strings that may indicate the presence of sensitive data. + * Such that we can share this logic across our CodeQL analysis of different languages. + * + * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. + */ + +/** + * A classification of different kinds of sensitive data: + * + * - secret: generic secret or trusted data; + * - id: a user name or other account information; + * - password: a password or authorization key; + * - certificate: a certificate. + * - private: private data such as credit card numbers + * + * While classifications are represented as strings, this should not be relied upon. + * Instead, use the predicates in `SensitiveDataClassification::` to work with + * classifications. + */ +class SensitiveDataClassification extends string { + SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } +} + +/** + * Provides predicates to select the different kinds of sensitive data we support. + */ +module SensitiveDataClassification { + /** Gets the classification for secret or trusted data. */ + SensitiveDataClassification secret() { result = "secret" } + + /** Gets the classification for user names or other account information. */ + SensitiveDataClassification id() { result = "id" } + + /** Gets the classification for passwords or authorization keys. */ + SensitiveDataClassification password() { result = "password" } + + /** Gets the classification for certificates. */ + SensitiveDataClassification certificate() { result = "certificate" } + + /** Gets the classification for private data. */ + SensitiveDataClassification private() { result = "private" } +} + +/** + * INTERNAL: Do not use. + * + * Provides heuristics for identifying names related to sensitive information. + */ +module HeuristicNames { + /** + * Gets a regular expression that identifies strings that may indicate the presence of secret + * or trusted data. + */ + string maybeSecret() { result = "(?is).*((?