JS: do fewer regexp matches in SensitiveActions

This commit is contained in:
Nick Rolfe
2024-04-23 14:51:01 +01:00
parent bea7b94537
commit 003d208574
5 changed files with 116 additions and 26 deletions

View File

@@ -86,39 +86,37 @@ private predicate writesProperty(DataFlow::Node node, string name) {
/** A write to a variable or property that might contain sensitive data. */
private class BasicSensitiveWrite extends SensitiveWrite {
SensitiveDataClassification classification;
string name;
BasicSensitiveWrite() {
exists(string name |
/*
* PERFORMANCE OPTIMISATION:
* `nameIndicatesSensitiveData` performs a `regexpMatch` on `name`.
* To carry out a regex match, we must first compute the Cartesian product
* of all possible `name`s and regexes, then match.
* To keep this product as small as possible,
* we want to filter `name` as much as possible before the product.
*
* Do this by factoring out a helper predicate containing the filtering
* logic that restricts `name`. This helper predicate will get picked first
* in the join order, since it is the only call here that binds `name`.
*/
/*
* PERFORMANCE OPTIMISATION:
* `nameIndicatesSensitiveData` performs a `regexpMatch` on `name`.
* To carry out a regex match, we must first compute the Cartesian product
* of all possible `name`s and regexes, then match.
* To keep this product as small as possible,
* we want to filter `name` as much as possible before the product.
*
* Do this by factoring out a helper predicate containing the filtering
* logic that restricts `name`. This helper predicate will get picked first
* in the join order, since it is the only call here that binds `name`.
*/
writesProperty(this, name) and
nameIndicatesSensitiveData(name, classification)
)
writesProperty(this, name) and
nameIndicatesSensitiveData(name)
}
/** Gets a classification of the kind of sensitive data the write might handle. */
SensitiveDataClassification getClassification() { result = classification }
SensitiveDataClassification getClassification() { nameIndicatesSensitiveData(name, result) }
}
/** An access to a variable or property that might contain sensitive data. */
private class BasicSensitiveVariableAccess extends SensitiveVariableAccess {
SensitiveDataClassification classification;
BasicSensitiveVariableAccess() { nameIndicatesSensitiveData(name) }
BasicSensitiveVariableAccess() { nameIndicatesSensitiveData(name, classification) }
override SensitiveDataClassification getClassification() { result = classification }
override SensitiveDataClassification getClassification() {
nameIndicatesSensitiveData(name, result)
}
}
/** A function name that suggests it may be sensitive. */
@@ -138,11 +136,11 @@ abstract class SensitiveDataFunctionName extends SensitiveFunctionName {
/** A method that might return sensitive data, based on the name. */
class CredentialsFunctionName extends SensitiveDataFunctionName {
SensitiveDataClassification classification;
CredentialsFunctionName() { nameIndicatesSensitiveData(this) }
CredentialsFunctionName() { nameIndicatesSensitiveData(this, classification) }
override SensitiveDataClassification getClassification() { result = classification }
override SensitiveDataClassification getClassification() {
nameIndicatesSensitiveData(this, result)
}
}
/**

View File

@@ -106,6 +106,25 @@ module HeuristicNames {
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
}
/**
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
*
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name) {
exists(string combinedRegexp |
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
combinedRegexp =
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
name.regexpMatch(combinedRegexp)
) and
not name.regexpMatch(notSensitiveRegexp())
}
/**
* Holds if `name` may indicate the presence of sensitive data, and
* `name` does not indicate that the data is in fact non-sensitive (for example since
@@ -115,6 +134,10 @@ module HeuristicNames {
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
* given classification), and none of the regexps from `notSensitiveRegexp` matches
* `name`.
*
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
* pass, since that combines all the regexps into one, and should be faster. Then call this
* predicate to get the classification(s).
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {

View File

@@ -106,6 +106,25 @@ module HeuristicNames {
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
}
/**
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
*
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name) {
exists(string combinedRegexp |
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
combinedRegexp =
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
name.regexpMatch(combinedRegexp)
) and
not name.regexpMatch(notSensitiveRegexp())
}
/**
* Holds if `name` may indicate the presence of sensitive data, and
* `name` does not indicate that the data is in fact non-sensitive (for example since
@@ -115,6 +134,10 @@ module HeuristicNames {
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
* given classification), and none of the regexps from `notSensitiveRegexp` matches
* `name`.
*
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
* pass, since that combines all the regexps into one, and should be faster. Then call this
* predicate to get the classification(s).
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {

View File

@@ -106,6 +106,25 @@ module HeuristicNames {
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
}
/**
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
*
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name) {
exists(string combinedRegexp |
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
combinedRegexp =
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
name.regexpMatch(combinedRegexp)
) and
not name.regexpMatch(notSensitiveRegexp())
}
/**
* Holds if `name` may indicate the presence of sensitive data, and
* `name` does not indicate that the data is in fact non-sensitive (for example since
@@ -115,6 +134,10 @@ module HeuristicNames {
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
* given classification), and none of the regexps from `notSensitiveRegexp` matches
* `name`.
*
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
* pass, since that combines all the regexps into one, and should be faster. Then call this
* predicate to get the classification(s).
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {

View File

@@ -106,6 +106,25 @@ module HeuristicNames {
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
}
/**
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
*
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name) {
exists(string combinedRegexp |
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
combinedRegexp =
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
name.regexpMatch(combinedRegexp)
) and
not name.regexpMatch(notSensitiveRegexp())
}
/**
* Holds if `name` may indicate the presence of sensitive data, and
* `name` does not indicate that the data is in fact non-sensitive (for example since
@@ -115,6 +134,10 @@ module HeuristicNames {
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
* given classification), and none of the regexps from `notSensitiveRegexp` matches
* `name`.
*
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
* pass, since that combines all the regexps into one, and should be faster. Then call this
* predicate to get the classification(s).
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {