mirror of
https://github.com/github/codeql.git
synced 2025-12-24 12:46:34 +01:00
Merge pull request #9649 from RasmusWL/certificate-modeling
Python/JS/Ruby: Ignore common words (like certain) as sensitive data source
This commit is contained in:
@@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
category: minorAnalysis
|
||||||
|
---
|
||||||
|
* Improved modeling of sensitive data sources, so common words like `certain` and `secretary` are no longer considered a certificate and a secret (respectively).
|
||||||
@@ -50,7 +50,7 @@ module HeuristicNames {
|
|||||||
* Gets a regular expression that identifies strings that may indicate the presence of secret
|
* Gets a regular expression that identifies strings that may indicate the presence of secret
|
||||||
* or trusted data.
|
* or trusted data.
|
||||||
*/
|
*/
|
||||||
string maybeSecret() { result = "(?is).*((?<!is)secret|(?<!un|is)trusted).*" }
|
string maybeSecret() { result = "(?is).*((?<!is|is_)secret|(?<!un|un_|is|is_)trusted).*" }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a regular expression that identifies strings that may indicate the presence of
|
* Gets a regular expression that identifies strings that may indicate the presence of
|
||||||
@@ -96,10 +96,14 @@ module HeuristicNames {
|
|||||||
* Gets a regular expression that identifies strings that may indicate the presence of data
|
* Gets a regular expression that identifies strings that may indicate the presence of data
|
||||||
* that is hashed or encrypted, and hence rendered non-sensitive, or contains special characters
|
* that is hashed or encrypted, and hence rendered non-sensitive, or contains special characters
|
||||||
* suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
|
* suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
|
||||||
|
*
|
||||||
|
* We also filter out common words like `certain` and `concert`, since otherwise these could
|
||||||
|
* be matched by the certificate regular expressions. Same for `accountable` (account), or
|
||||||
|
* `secretarial` (secret).
|
||||||
*/
|
*/
|
||||||
string notSensitiveRegexp() {
|
string notSensitiveRegexp() {
|
||||||
result =
|
result =
|
||||||
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)).*"
|
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)|certain|concert|secretar|accountant|accountab).*"
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
category: minorAnalysis
|
||||||
|
---
|
||||||
|
* Improved modeling of sensitive data sources, so common words like `certain` and `secretary` are no longer considered a certificate and a secret (respectively).
|
||||||
@@ -50,7 +50,7 @@ module HeuristicNames {
|
|||||||
* Gets a regular expression that identifies strings that may indicate the presence of secret
|
* Gets a regular expression that identifies strings that may indicate the presence of secret
|
||||||
* or trusted data.
|
* or trusted data.
|
||||||
*/
|
*/
|
||||||
string maybeSecret() { result = "(?is).*((?<!is)secret|(?<!un|is)trusted).*" }
|
string maybeSecret() { result = "(?is).*((?<!is|is_)secret|(?<!un|un_|is|is_)trusted).*" }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a regular expression that identifies strings that may indicate the presence of
|
* Gets a regular expression that identifies strings that may indicate the presence of
|
||||||
@@ -96,10 +96,14 @@ module HeuristicNames {
|
|||||||
* Gets a regular expression that identifies strings that may indicate the presence of data
|
* Gets a regular expression that identifies strings that may indicate the presence of data
|
||||||
* that is hashed or encrypted, and hence rendered non-sensitive, or contains special characters
|
* that is hashed or encrypted, and hence rendered non-sensitive, or contains special characters
|
||||||
* suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
|
* suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
|
||||||
|
*
|
||||||
|
* We also filter out common words like `certain` and `concert`, since otherwise these could
|
||||||
|
* be matched by the certificate regular expressions. Same for `accountable` (account), or
|
||||||
|
* `secretarial` (secret).
|
||||||
*/
|
*/
|
||||||
string notSensitiveRegexp() {
|
string notSensitiveRegexp() {
|
||||||
result =
|
result =
|
||||||
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)).*"
|
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)|certain|concert|secretar|accountant|accountab).*"
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -37,6 +37,10 @@ f = not_found.get_passwd # $ SensitiveDataSource=password
|
|||||||
x = f()
|
x = f()
|
||||||
print(x) # $ SensitiveUse=password
|
print(x) # $ SensitiveUse=password
|
||||||
|
|
||||||
|
# some prefixes makes us ignore it as a source
|
||||||
|
not_found.isSecret
|
||||||
|
not_found.is_secret
|
||||||
|
|
||||||
def my_func(non_sensitive_name):
|
def my_func(non_sensitive_name):
|
||||||
x = non_sensitive_name()
|
x = non_sensitive_name()
|
||||||
print(x) # $ SensitiveUse=password
|
print(x) # $ SensitiveUse=password
|
||||||
@@ -56,6 +60,11 @@ getattr(foo, x) # $ SensitiveDataSource=password
|
|||||||
def my_func(password): # $ SensitiveDataSource=password
|
def my_func(password): # $ SensitiveDataSource=password
|
||||||
print(password) # $ SensitiveUse=password
|
print(password) # $ SensitiveUse=password
|
||||||
|
|
||||||
|
# FP where the `cert` in `uncertainty` makes us treat it like a certificate
|
||||||
|
# https://github.com/github/codeql/issues/9632
|
||||||
|
def my_other_func(uncertainty):
|
||||||
|
print(uncertainty)
|
||||||
|
|
||||||
password = some_function() # $ SensitiveDataSource=password
|
password = some_function() # $ SensitiveDataSource=password
|
||||||
print(password) # $ SensitiveUse=password
|
print(password) # $ SensitiveUse=password
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
category: minorAnalysis
|
||||||
|
---
|
||||||
|
* Improved modeling of sensitive data sources, so common words like `certain` and `secretary` are no longer considered a certificate and a secret (respectively).
|
||||||
@@ -50,7 +50,7 @@ module HeuristicNames {
|
|||||||
* Gets a regular expression that identifies strings that may indicate the presence of secret
|
* Gets a regular expression that identifies strings that may indicate the presence of secret
|
||||||
* or trusted data.
|
* or trusted data.
|
||||||
*/
|
*/
|
||||||
string maybeSecret() { result = "(?is).*((?<!is)secret|(?<!un|is)trusted).*" }
|
string maybeSecret() { result = "(?is).*((?<!is|is_)secret|(?<!un|un_|is|is_)trusted).*" }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a regular expression that identifies strings that may indicate the presence of
|
* Gets a regular expression that identifies strings that may indicate the presence of
|
||||||
@@ -96,10 +96,14 @@ module HeuristicNames {
|
|||||||
* Gets a regular expression that identifies strings that may indicate the presence of data
|
* Gets a regular expression that identifies strings that may indicate the presence of data
|
||||||
* that is hashed or encrypted, and hence rendered non-sensitive, or contains special characters
|
* that is hashed or encrypted, and hence rendered non-sensitive, or contains special characters
|
||||||
* suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
|
* suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
|
||||||
|
*
|
||||||
|
* We also filter out common words like `certain` and `concert`, since otherwise these could
|
||||||
|
* be matched by the certificate regular expressions. Same for `accountable` (account), or
|
||||||
|
* `secretarial` (secret).
|
||||||
*/
|
*/
|
||||||
string notSensitiveRegexp() {
|
string notSensitiveRegexp() {
|
||||||
result =
|
result =
|
||||||
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)).*"
|
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)|certain|concert|secretar|accountant|accountab).*"
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Reference in New Issue
Block a user