mirror of
https://github.com/github/codeql.git
synced 2025-12-21 19:26:31 +01:00
Some subclasses of GeneratedCodeMarkerComment regex match against `getLine(_)`. When evaluated, this results in multiple scans (one per subclass that uses it) of all comment lines in the database, before regex matching against those lines. To make these scans smaller, regex match against the entire comment text without splitting them into lines. This is achieved using `?m` (multiline) and line boundaries in the regexes.
198 lines
5.9 KiB
Plaintext
198 lines
5.9 KiB
Plaintext
/**
|
|
* Provides classes for detecting generated code.
|
|
*/
|
|
|
|
import javascript
|
|
import semmle.javascript.frameworks.Bundling
|
|
import semmle.javascript.frameworks.Emscripten
|
|
import semmle.javascript.frameworks.GWT
|
|
import semmle.javascript.SourceMaps
|
|
|
|
/**
|
|
* A comment that marks generated code.
|
|
*/
|
|
abstract class GeneratedCodeMarkerComment extends Comment { }
|
|
|
|
/**
|
|
* A source mapping comment, viewed as a marker comment indicating generated code.
|
|
*/
|
|
private class SourceMappingCommentMarkerComment extends GeneratedCodeMarkerComment {
|
|
SourceMappingCommentMarkerComment() { this instanceof SourceMappingComment }
|
|
}
|
|
|
|
/**
|
|
* A marker comment left by a known code generator.
|
|
*/
|
|
class CodeGeneratorMarkerComment extends GeneratedCodeMarkerComment {
|
|
CodeGeneratorMarkerComment() { codeGeneratorMarkerComment(this, _) }
|
|
|
|
/** Gets the name of the code generator that left this marker comment. */
|
|
string getGeneratorName() { codeGeneratorMarkerComment(this, result) }
|
|
}
|
|
|
|
/**
|
|
* Holds if `c` is a comment left by code generator `tool`.
|
|
*/
|
|
private predicate codeGeneratorMarkerComment(Comment c, string tool) {
|
|
exists(string toolPattern |
|
|
toolPattern =
|
|
"js_of_ocaml|CoffeeScript|LiveScript|dart2js|ANTLR|PEG\\.js|Opal|JSX|jison(?:-lex)?|(?:Microsoft \\(R\\) AutoRest Code Generator)|purs" and
|
|
tool =
|
|
c.getText()
|
|
.regexpCapture("(?s)[\\s*]*(?:parser |Code )?[gG]eneratedy? (?:from .*)?by (" +
|
|
toolPattern + ")\\b.*", 1)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* A generic generated code marker comment.
|
|
*/
|
|
private class GenericGeneratedCodeMarkerComment extends GeneratedCodeMarkerComment {
|
|
GenericGeneratedCodeMarkerComment() {
|
|
exists(string entity, string was, string automatically |
|
|
entity = "code|file|class|interface|art[ei]fact|module|script" and
|
|
was = "was|is|has been" and
|
|
automatically = "automatically |mechanically |auto[- ]?" and
|
|
// Look for this pattern in each line of the comment.
|
|
this.getText()
|
|
.regexpMatch("(?im)^.*\\b(This|The following) (" + entity + ") (" + was + ") (" +
|
|
automatically + ")?gener(e?)ated\\b.*$")
|
|
)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A comment warning against modifications, viewed as a marker comment indicating generated code.
|
|
*/
|
|
private class DontModifyMarkerComment extends GeneratedCodeMarkerComment {
|
|
DontModifyMarkerComment() {
|
|
exists(string pattern |
|
|
// Look for these patterns in each line of the comment.
|
|
this.getText().regexpMatch(pattern) and
|
|
pattern =
|
|
[
|
|
"(?im)^.*\\bGenerated by\\b.*\\bDo not edit\\b.*$",
|
|
"(?im)^.*\\bAny modifications to this file will be lost\\b.*$"
|
|
]
|
|
)
|
|
}
|
|
}
|
|
|
|
/** A script that looks like it was generated by dart2js. */
|
|
private class DartGeneratedTopLevel extends TopLevel {
|
|
DartGeneratedTopLevel() {
|
|
exists(VarAccess deferredInit | deferredInit.getTopLevel() = this |
|
|
deferredInit.getName() = "$dart_deferred_initializers$" or
|
|
deferredInit.getName() = "$dart_deferred_initializers"
|
|
)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Holds if `tl` has unusually many or unusually complicated function invocations, which is
|
|
* often a sign of generated code.
|
|
*/
|
|
private predicate hasManyInvocations(TopLevel tl) {
|
|
// heuristic: more than 100 arguments per line means it's probably generated
|
|
exists(int nl, int na |
|
|
nl = tl.getNumberOfLines() and
|
|
nl > 0 and
|
|
na = sum(InvokeExpr invk | tl = invk.getTopLevel() | invk.getNumArgument()) and
|
|
na.(float) / nl > 100
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `f` is side effect free, and full of primitive literals, which is often a sign of generated data code.
|
|
*/
|
|
private predicate isData(File f) {
|
|
// heuristic: `f` has more than 1000 primitive literal expressions ...
|
|
count(SyntacticConstants::PrimitiveLiteralConstant e | e.getFile() = f) > 1000 and
|
|
// ... but no expressions with side effects ...
|
|
not exists(Expr e |
|
|
e.getFile() = f and
|
|
e.isImpure() and
|
|
// ... except for variable initializers
|
|
not e instanceof VariableDeclarator
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `f` is a single line that looks like a non-trivial amount of JSON data, which is often a sign of generated data code.
|
|
*/
|
|
private predicate isJsonLine(File f) {
|
|
f.getNumberOfLines() = 1 and
|
|
count(Expr e | e.getFile() = f) > 100 and
|
|
forall(Expr e | e.getFile() = f |
|
|
e instanceof ObjectExpr or
|
|
e instanceof ArrayExpr or
|
|
e instanceof NumberLiteral or
|
|
e instanceof StringLiteral or
|
|
e instanceof BooleanLiteral
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `f` is a generated HTML file.
|
|
*/
|
|
private predicate isGeneratedHtml(File f) {
|
|
exists(HTML::Element e |
|
|
e.getFile() = f and
|
|
e.getName() = "meta" and
|
|
e.getAttributeByName("name").getValue() = "generator"
|
|
)
|
|
or
|
|
exists(HTML::CommentNode comment |
|
|
comment.getText().regexpMatch("\\s*Generated by [\\w-]+ \\d+\\.\\d+\\.\\d+\\s*") and
|
|
comment.getFile() = f
|
|
)
|
|
or
|
|
20 < countStartingHtmlElements(f, _)
|
|
}
|
|
|
|
/**
|
|
* Gets an element that starts at line `l` in file `f`.
|
|
*/
|
|
private HTML::Element getAStartingElement(File f, int l) {
|
|
result.getFile() = f and result.getLocation().getStartLine() = l
|
|
}
|
|
|
|
/**
|
|
* Gets the number of HTML elements that start at line `l` in file `f`.
|
|
*/
|
|
private int countStartingHtmlElements(File f, int l) {
|
|
result = strictcount(getAStartingElement(f, l))
|
|
}
|
|
|
|
/**
|
|
* Holds if the base name of `f` is a number followed by a single extension.
|
|
*/
|
|
predicate isGeneratedFileName(File f) {
|
|
f.getStem().regexpMatch("[0-9]+") and
|
|
not f.getExtension() = "vue"
|
|
}
|
|
|
|
/**
|
|
* Holds if `tl` looks like it contains generated code.
|
|
*/
|
|
predicate isGenerated(TopLevel tl) {
|
|
tl.isMinified() or
|
|
isBundle(tl) or
|
|
tl instanceof GWTGeneratedTopLevel or
|
|
tl instanceof DartGeneratedTopLevel or
|
|
exists(GeneratedCodeMarkerComment gcmc | tl = gcmc.getTopLevel()) or
|
|
hasManyInvocations(tl) or
|
|
isData(tl.getFile()) or
|
|
isJsonLine(tl.getFile()) or
|
|
isGeneratedHtml(tl.getFile()) or
|
|
isGeneratedFileName(tl.getFile())
|
|
}
|
|
|
|
/**
|
|
* Holds if `file` look like it contains generated code.
|
|
*/
|
|
predicate isGeneratedCode(File file) {
|
|
isGenerated(file.getATopLevel()) or
|
|
isGeneratedHtml(file)
|
|
}
|