Merge pull request #7323 from adityasharad/atm/perf-debugging-std-lib

JS: Performance improvements to libraries using regex matching
This commit is contained in:
Aditya Sharad
2021-12-13 08:53:11 -08:00
committed by GitHub
3 changed files with 61 additions and 27 deletions

View File

@@ -49,14 +49,14 @@ private predicate codeGeneratorMarkerComment(Comment c, string tool) {
*/
private class GenericGeneratedCodeMarkerComment extends GeneratedCodeMarkerComment {
GenericGeneratedCodeMarkerComment() {
exists(string line | line = getLine(_) |
exists(string entity, string was, string automatically |
entity = "code|file|class|interface|art[ei]fact|module|script" and
was = "was|is|has been" and
automatically = "automatically |mechanically |auto[- ]?" and
line.regexpMatch("(?i).*\\b(This|The following) (" + entity + ") (" + was + ") (" +
automatically + ")?gener(e?)ated\\b.*")
)
exists(string entity, string was, string automatically |
entity = "code|file|class|interface|art[ei]fact|module|script" and
was = "was|is|has been" and
automatically = "automatically |mechanically |auto[- ]?" and
// Look for this pattern in each line of the comment.
this.getText()
.regexpMatch("(?im)^.*\\b(This|The following) (" + entity + ") (" + was + ") (" +
automatically + ")?gener(e?)ated\\b.*$")
)
}
}
@@ -66,9 +66,14 @@ private class GenericGeneratedCodeMarkerComment extends GeneratedCodeMarkerComme
*/
private class DontModifyMarkerComment extends GeneratedCodeMarkerComment {
DontModifyMarkerComment() {
exists(string line | line = getLine(_) |
line.regexpMatch("(?i).*\\bGenerated by\\b.*\\bDo not edit\\b.*") or
line.regexpMatch("(?i).*\\bAny modifications to this file will be lost\\b.*")
exists(string pattern |
// Look for these patterns in each line of the comment.
this.getText().regexpMatch(pattern) and
pattern =
[
"(?im)^.*\\bGenerated by\\b.*\\bDo not edit\\b.*$",
"(?im)^.*\\bAny modifications to this file will be lost\\b.*$"
]
)
}
}

View File

@@ -299,12 +299,15 @@ private class JQuery extends FrameworkLibraryWithGenericURL {
private predicate jQueryMarkerComment(Comment c, TopLevel tl, string version) {
tl = c.getTopLevel() and
exists(string txt | txt = c.getText() |
// more recent versions use this format
// More recent versions use this format:
// "(?s).*jQuery (?:JavaScript Library )?v(" + versionRegex() + ").*",
// Earlier versions used this format:
// "(?s).*jQuery (" + versionRegex() + ") - New Wave Javascript.*"
// For efficiency, construct a single regex that matches both,
// at the cost of being slightly more permissive.
version =
txt.regexpCapture("(?s).*jQuery (?:JavaScript Library )?v(" + versionRegex() + ").*", 1)
or
// earlier versions used this format
version = txt.regexpCapture("(?s).*jQuery (" + versionRegex() + ") - New Wave Javascript.*", 1)
txt.regexpCapture("(?s).*jQuery (?:JavaScript Library )?v?(" + versionRegex() +
")(?: - New Wave Javascript)?.*", 1)
or
// 1.0.0 and 1.0.1 have the same marker comment; we call them both "1.0"
txt.matches("%jQuery - New Wave Javascript%") and version = "1.0"

View File

@@ -81,22 +81,48 @@ abstract class SensitiveVariableAccess extends SensitiveExpr {
/** A write to a location that might contain sensitive data. */
abstract class SensitiveWrite extends DataFlow::Node { }
/**
* Holds if `node` is a write to a variable or property named `name`.
*
* Helper predicate factored out for performance,
* to filter `name` as much as possible before using it in
* regex matching.
*/
pragma[nomagic]
private predicate writesProperty(DataFlow::Node node, string name) {
exists(DataFlow::PropWrite pwn |
pwn.getPropertyName() = name and
pwn.getRhs() = node
)
or
exists(VarDef v | v.getAVariable().getName() = name |
if exists(v.getSource())
then v.getSource() = node.asExpr()
else node = DataFlow::ssaDefinitionNode(SSA::definition(v))
)
}
/** A write to a variable or property that might contain sensitive data. */
private class BasicSensitiveWrite extends SensitiveWrite {
SensitiveDataClassification classification;
BasicSensitiveWrite() {
exists(string name | nameIndicatesSensitiveData(name, classification) |
exists(DataFlow::PropWrite pwn |
pwn.getPropertyName() = name and
pwn.getRhs() = this
)
or
exists(VarDef v | v.getAVariable().getName() = name |
if exists(v.getSource())
then v.getSource() = this.asExpr()
else this = DataFlow::ssaDefinitionNode(SSA::definition(v))
)
exists(string name |
/*
* PERFORMANCE OPTIMISATION:
* `nameIndicatesSensitiveData` performs a `regexpMatch` on `name`.
* To carry out a regex match, we must first compute the Cartesian product
* of all possible `name`s and regexes, then match.
* To keep this product as small as possible,
* we want to filter `name` as much as possible before the product.
*
* Do this by factoring out a helper predicate containing the filtering
* logic that restricts `name`. This helper predicate will get picked first
* in the join order, since it is the only call here that binds `name`.
*/
writesProperty(this, name) and
nameIndicatesSensitiveData(name, classification)
)
}