diff --git a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll index 96abac10da2..7a319f79303 100644 --- a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll +++ b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll @@ -96,10 +96,14 @@ module HeuristicNames { * Gets a regular expression that identifies strings that may indicate the presence of data * that is hashed or encrypted, and hence rendered non-sensitive, or contains special characters * suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query). + * + * We also filter out common words like `certain` and `concert`, since otherwise these could + * be matched by the certificate regular expressions. Same for `accountable` (account), or + * `secretarial` (secret). */ string notSensitiveRegexp() { result = - "(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?