mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge pull request #16306 from github/nickrolfe/js-sensitive
JS: do fewer regexp matches in SensitiveActions
This commit is contained in:
@@ -86,39 +86,37 @@ private predicate writesProperty(DataFlow::Node node, string name) {
|
|||||||
|
|
||||||
/** A write to a variable or property that might contain sensitive data. */
|
/** A write to a variable or property that might contain sensitive data. */
|
||||||
private class BasicSensitiveWrite extends SensitiveWrite {
|
private class BasicSensitiveWrite extends SensitiveWrite {
|
||||||
SensitiveDataClassification classification;
|
string name;
|
||||||
|
|
||||||
BasicSensitiveWrite() {
|
BasicSensitiveWrite() {
|
||||||
exists(string name |
|
/*
|
||||||
/*
|
* PERFORMANCE OPTIMISATION:
|
||||||
* PERFORMANCE OPTIMISATION:
|
* `nameIndicatesSensitiveData` performs a `regexpMatch` on `name`.
|
||||||
* `nameIndicatesSensitiveData` performs a `regexpMatch` on `name`.
|
* To carry out a regex match, we must first compute the Cartesian product
|
||||||
* To carry out a regex match, we must first compute the Cartesian product
|
* of all possible `name`s and regexes, then match.
|
||||||
* of all possible `name`s and regexes, then match.
|
* To keep this product as small as possible,
|
||||||
* To keep this product as small as possible,
|
* we want to filter `name` as much as possible before the product.
|
||||||
* we want to filter `name` as much as possible before the product.
|
*
|
||||||
*
|
* Do this by factoring out a helper predicate containing the filtering
|
||||||
* Do this by factoring out a helper predicate containing the filtering
|
* logic that restricts `name`. This helper predicate will get picked first
|
||||||
* logic that restricts `name`. This helper predicate will get picked first
|
* in the join order, since it is the only call here that binds `name`.
|
||||||
* in the join order, since it is the only call here that binds `name`.
|
*/
|
||||||
*/
|
|
||||||
|
|
||||||
writesProperty(this, name) and
|
writesProperty(this, name) and
|
||||||
nameIndicatesSensitiveData(name, classification)
|
nameIndicatesSensitiveData(name)
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Gets a classification of the kind of sensitive data the write might handle. */
|
/** Gets a classification of the kind of sensitive data the write might handle. */
|
||||||
SensitiveDataClassification getClassification() { result = classification }
|
SensitiveDataClassification getClassification() { nameIndicatesSensitiveData(name, result) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/** An access to a variable or property that might contain sensitive data. */
|
/** An access to a variable or property that might contain sensitive data. */
|
||||||
private class BasicSensitiveVariableAccess extends SensitiveVariableAccess {
|
private class BasicSensitiveVariableAccess extends SensitiveVariableAccess {
|
||||||
SensitiveDataClassification classification;
|
BasicSensitiveVariableAccess() { nameIndicatesSensitiveData(name) }
|
||||||
|
|
||||||
BasicSensitiveVariableAccess() { nameIndicatesSensitiveData(name, classification) }
|
override SensitiveDataClassification getClassification() {
|
||||||
|
nameIndicatesSensitiveData(name, result)
|
||||||
override SensitiveDataClassification getClassification() { result = classification }
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** A function name that suggests it may be sensitive. */
|
/** A function name that suggests it may be sensitive. */
|
||||||
@@ -138,11 +136,11 @@ abstract class SensitiveDataFunctionName extends SensitiveFunctionName {
|
|||||||
|
|
||||||
/** A method that might return sensitive data, based on the name. */
|
/** A method that might return sensitive data, based on the name. */
|
||||||
class CredentialsFunctionName extends SensitiveDataFunctionName {
|
class CredentialsFunctionName extends SensitiveDataFunctionName {
|
||||||
SensitiveDataClassification classification;
|
CredentialsFunctionName() { nameIndicatesSensitiveData(this) }
|
||||||
|
|
||||||
CredentialsFunctionName() { nameIndicatesSensitiveData(this, classification) }
|
override SensitiveDataClassification getClassification() {
|
||||||
|
nameIndicatesSensitiveData(this, result)
|
||||||
override SensitiveDataClassification getClassification() { result = classification }
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -106,6 +106,25 @@ module HeuristicNames {
|
|||||||
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
|
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
|
||||||
|
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
|
||||||
|
*
|
||||||
|
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
|
||||||
|
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
|
||||||
|
*/
|
||||||
|
bindingset[name]
|
||||||
|
predicate nameIndicatesSensitiveData(string name) {
|
||||||
|
exists(string combinedRegexp |
|
||||||
|
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
|
||||||
|
combinedRegexp =
|
||||||
|
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
||||||
|
|
|
||||||
|
name.regexpMatch(combinedRegexp)
|
||||||
|
) and
|
||||||
|
not name.regexpMatch(notSensitiveRegexp())
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds if `name` may indicate the presence of sensitive data, and
|
* Holds if `name` may indicate the presence of sensitive data, and
|
||||||
* `name` does not indicate that the data is in fact non-sensitive (for example since
|
* `name` does not indicate that the data is in fact non-sensitive (for example since
|
||||||
@@ -115,6 +134,10 @@ module HeuristicNames {
|
|||||||
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
|
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
|
||||||
* given classification), and none of the regexps from `notSensitiveRegexp` matches
|
* given classification), and none of the regexps from `notSensitiveRegexp` matches
|
||||||
* `name`.
|
* `name`.
|
||||||
|
*
|
||||||
|
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
|
||||||
|
* pass, since that combines all the regexps into one, and should be faster. Then call this
|
||||||
|
* predicate to get the classification(s).
|
||||||
*/
|
*/
|
||||||
bindingset[name]
|
bindingset[name]
|
||||||
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
|
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
|
||||||
|
|||||||
@@ -106,6 +106,25 @@ module HeuristicNames {
|
|||||||
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
|
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
|
||||||
|
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
|
||||||
|
*
|
||||||
|
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
|
||||||
|
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
|
||||||
|
*/
|
||||||
|
bindingset[name]
|
||||||
|
predicate nameIndicatesSensitiveData(string name) {
|
||||||
|
exists(string combinedRegexp |
|
||||||
|
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
|
||||||
|
combinedRegexp =
|
||||||
|
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
||||||
|
|
|
||||||
|
name.regexpMatch(combinedRegexp)
|
||||||
|
) and
|
||||||
|
not name.regexpMatch(notSensitiveRegexp())
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds if `name` may indicate the presence of sensitive data, and
|
* Holds if `name` may indicate the presence of sensitive data, and
|
||||||
* `name` does not indicate that the data is in fact non-sensitive (for example since
|
* `name` does not indicate that the data is in fact non-sensitive (for example since
|
||||||
@@ -115,6 +134,10 @@ module HeuristicNames {
|
|||||||
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
|
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
|
||||||
* given classification), and none of the regexps from `notSensitiveRegexp` matches
|
* given classification), and none of the regexps from `notSensitiveRegexp` matches
|
||||||
* `name`.
|
* `name`.
|
||||||
|
*
|
||||||
|
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
|
||||||
|
* pass, since that combines all the regexps into one, and should be faster. Then call this
|
||||||
|
* predicate to get the classification(s).
|
||||||
*/
|
*/
|
||||||
bindingset[name]
|
bindingset[name]
|
||||||
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
|
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
|
||||||
|
|||||||
@@ -106,6 +106,25 @@ module HeuristicNames {
|
|||||||
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
|
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
|
||||||
|
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
|
||||||
|
*
|
||||||
|
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
|
||||||
|
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
|
||||||
|
*/
|
||||||
|
bindingset[name]
|
||||||
|
predicate nameIndicatesSensitiveData(string name) {
|
||||||
|
exists(string combinedRegexp |
|
||||||
|
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
|
||||||
|
combinedRegexp =
|
||||||
|
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
||||||
|
|
|
||||||
|
name.regexpMatch(combinedRegexp)
|
||||||
|
) and
|
||||||
|
not name.regexpMatch(notSensitiveRegexp())
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds if `name` may indicate the presence of sensitive data, and
|
* Holds if `name` may indicate the presence of sensitive data, and
|
||||||
* `name` does not indicate that the data is in fact non-sensitive (for example since
|
* `name` does not indicate that the data is in fact non-sensitive (for example since
|
||||||
@@ -115,6 +134,10 @@ module HeuristicNames {
|
|||||||
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
|
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
|
||||||
* given classification), and none of the regexps from `notSensitiveRegexp` matches
|
* given classification), and none of the regexps from `notSensitiveRegexp` matches
|
||||||
* `name`.
|
* `name`.
|
||||||
|
*
|
||||||
|
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
|
||||||
|
* pass, since that combines all the regexps into one, and should be faster. Then call this
|
||||||
|
* predicate to get the classification(s).
|
||||||
*/
|
*/
|
||||||
bindingset[name]
|
bindingset[name]
|
||||||
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
|
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
|
||||||
|
|||||||
@@ -106,6 +106,25 @@ module HeuristicNames {
|
|||||||
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
|
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
|
||||||
|
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
|
||||||
|
*
|
||||||
|
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
|
||||||
|
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
|
||||||
|
*/
|
||||||
|
bindingset[name]
|
||||||
|
predicate nameIndicatesSensitiveData(string name) {
|
||||||
|
exists(string combinedRegexp |
|
||||||
|
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
|
||||||
|
combinedRegexp =
|
||||||
|
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
||||||
|
|
|
||||||
|
name.regexpMatch(combinedRegexp)
|
||||||
|
) and
|
||||||
|
not name.regexpMatch(notSensitiveRegexp())
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds if `name` may indicate the presence of sensitive data, and
|
* Holds if `name` may indicate the presence of sensitive data, and
|
||||||
* `name` does not indicate that the data is in fact non-sensitive (for example since
|
* `name` does not indicate that the data is in fact non-sensitive (for example since
|
||||||
@@ -115,6 +134,10 @@ module HeuristicNames {
|
|||||||
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
|
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
|
||||||
* given classification), and none of the regexps from `notSensitiveRegexp` matches
|
* given classification), and none of the regexps from `notSensitiveRegexp` matches
|
||||||
* `name`.
|
* `name`.
|
||||||
|
*
|
||||||
|
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
|
||||||
|
* pass, since that combines all the regexps into one, and should be faster. Then call this
|
||||||
|
* predicate to get the classification(s).
|
||||||
*/
|
*/
|
||||||
bindingset[name]
|
bindingset[name]
|
||||||
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
|
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
|
||||||
|
|||||||
Reference in New Issue
Block a user