mirror of
https://github.com/github/codeql.git
synced 2025-12-24 12:46:34 +01:00
197 lines
5.9 KiB
Plaintext
197 lines
5.9 KiB
Plaintext
/**
|
|
* Provides classes for detecting generated code.
|
|
*/
|
|
|
|
import javascript
|
|
import semmle.javascript.frameworks.Bundling
|
|
import semmle.javascript.frameworks.Emscripten
|
|
import semmle.javascript.frameworks.GWT
|
|
import semmle.javascript.SourceMaps
|
|
|
|
/**
|
|
* A comment that marks generated code.
|
|
*/
|
|
abstract class GeneratedCodeMarkerComment extends Comment { }
|
|
|
|
/**
|
|
* A source mapping comment, viewed as a marker comment indicating generated code.
|
|
*/
|
|
private class SourceMappingCommentMarkerComment extends GeneratedCodeMarkerComment instanceof SourceMappingComment
|
|
{ }
|
|
|
|
/**
|
|
* A marker comment left by a known code generator.
|
|
*/
|
|
class CodeGeneratorMarkerComment extends GeneratedCodeMarkerComment {
|
|
CodeGeneratorMarkerComment() { codeGeneratorMarkerComment(this, _) }
|
|
|
|
/** Gets the name of the code generator that left this marker comment. */
|
|
string getGeneratorName() { codeGeneratorMarkerComment(this, result) }
|
|
}
|
|
|
|
/**
|
|
* Holds if `c` is a comment left by code generator `tool`.
|
|
*/
|
|
private predicate codeGeneratorMarkerComment(Comment c, string tool) {
|
|
exists(string toolPattern |
|
|
toolPattern =
|
|
"js_of_ocaml|CoffeeScript|LiveScript|dart2js|ANTLR|PEG\\.js|Opal|JSX|jison(?:-lex)?|(?:Microsoft \\(R\\) AutoRest Code Generator)|purs" and
|
|
tool =
|
|
c.getText()
|
|
.regexpCapture("(?s)[\\s*]*(?:parser |Code )?[gG]eneratedy? (?:from .*)?by (" +
|
|
toolPattern + ")\\b.*", 1)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* A generic generated code marker comment.
|
|
*/
|
|
private class GenericGeneratedCodeMarkerComment extends GeneratedCodeMarkerComment {
|
|
GenericGeneratedCodeMarkerComment() {
|
|
exists(string entity, string was, string automatically |
|
|
entity = "code|file|class|interface|art[ei]fact|module|script" and
|
|
was = "was|is|has been" and
|
|
automatically = "automatically |mechanically |auto[- ]?" and
|
|
// Look for this pattern in each line of the comment.
|
|
this.getText()
|
|
.regexpMatch("(?im)^.*\\b(This|The following) (" + entity + ") (" + was + ") (" +
|
|
automatically + ")?gener(e?)ated\\b.*$")
|
|
)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A comment warning against modifications, viewed as a marker comment indicating generated code.
|
|
*/
|
|
private class DontModifyMarkerComment extends GeneratedCodeMarkerComment {
|
|
DontModifyMarkerComment() {
|
|
exists(string pattern |
|
|
// Look for these patterns in each line of the comment.
|
|
this.getText().regexpMatch(pattern) and
|
|
pattern =
|
|
[
|
|
"(?im)^.*\\bGenerated by\\b.*\\bDo not edit\\b.*$",
|
|
"(?im)^.*\\bAny modifications to this file will be lost\\b.*$"
|
|
]
|
|
)
|
|
}
|
|
}
|
|
|
|
/** A script that looks like it was generated by dart2js. */
|
|
private class DartGeneratedTopLevel extends TopLevel {
|
|
DartGeneratedTopLevel() {
|
|
exists(VarAccess deferredInit | deferredInit.getTopLevel() = this |
|
|
deferredInit.getName() = "$dart_deferred_initializers$" or
|
|
deferredInit.getName() = "$dart_deferred_initializers"
|
|
)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Holds if `tl` has unusually many or unusually complicated function invocations, which is
|
|
* often a sign of generated code.
|
|
*/
|
|
private predicate hasManyInvocations(TopLevel tl) {
|
|
// heuristic: more than 100 arguments per line means it's probably generated
|
|
exists(int nl, int na |
|
|
nl = tl.getNumberOfLines() and
|
|
nl > 0 and
|
|
na = sum(InvokeExpr invk | tl = invk.getTopLevel() | invk.getNumArgument()) and
|
|
na.(float) / nl > 100
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `f` is side effect free, and full of primitive literals, which is often a sign of generated data code.
|
|
*/
|
|
private predicate isData(File f) {
|
|
// heuristic: `f` has more than 1000 primitive literal expressions ...
|
|
count(SyntacticConstants::PrimitiveLiteralConstant e | e.getFile() = f) > 1000 and
|
|
// ... but no expressions with side effects ...
|
|
not exists(Expr e |
|
|
e.getFile() = f and
|
|
e.isImpure() and
|
|
// ... except for variable initializers
|
|
not e instanceof VariableDeclarator
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `f` is a single line that looks like a non-trivial amount of JSON data, which is often a sign of generated data code.
|
|
*/
|
|
private predicate isJsonLine(File f) {
|
|
f.getNumberOfLines() = 1 and
|
|
count(Expr e | e.getFile() = f) > 100 and
|
|
forall(Expr e | e.getFile() = f |
|
|
e instanceof ObjectExpr or
|
|
e instanceof ArrayExpr or
|
|
e instanceof NumberLiteral or
|
|
e instanceof StringLiteral or
|
|
e instanceof BooleanLiteral
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `f` is a generated HTML file.
|
|
*/
|
|
private predicate isGeneratedHtml(File f) {
|
|
exists(HTML::Element e |
|
|
e.getFile() = f and
|
|
e.getName() = "meta" and
|
|
e.getAttributeByName("name").getValue() = "generator"
|
|
)
|
|
or
|
|
exists(HTML::CommentNode comment |
|
|
comment.getText().regexpMatch("\\s*Generated by [\\w-]+ \\d+\\.\\d+\\.\\d+\\s*") and
|
|
comment.getFile() = f
|
|
)
|
|
or
|
|
20 < countStartingHtmlElements(f, _)
|
|
}
|
|
|
|
/**
|
|
* Gets an element that starts at line `l` in file `f`.
|
|
*/
|
|
private HTML::Element getAStartingElement(File f, int l) {
|
|
result.getFile() = f and result.getLocation().getStartLine() = l
|
|
}
|
|
|
|
/**
|
|
* Gets the number of HTML elements that start at line `l` in file `f`.
|
|
*/
|
|
private int countStartingHtmlElements(File f, int l) {
|
|
result = strictcount(getAStartingElement(f, l))
|
|
}
|
|
|
|
/**
|
|
* Holds if the base name of `f` is a number followed by a single extension.
|
|
*/
|
|
predicate isGeneratedFileName(File f) {
|
|
f.getStem().regexpMatch("[0-9]+") and
|
|
not f.getExtension() = "vue"
|
|
}
|
|
|
|
/**
|
|
* Holds if `tl` looks like it contains generated code.
|
|
*/
|
|
predicate isGenerated(TopLevel tl) {
|
|
tl.isMinified() or
|
|
isBundle(tl) or
|
|
tl instanceof GwtGeneratedTopLevel or
|
|
tl instanceof DartGeneratedTopLevel or
|
|
exists(GeneratedCodeMarkerComment gcmc | tl = gcmc.getTopLevel()) or
|
|
hasManyInvocations(tl) or
|
|
isData(tl.getFile()) or
|
|
isJsonLine(tl.getFile()) or
|
|
isGeneratedHtml(tl.getFile()) or
|
|
isGeneratedFileName(tl.getFile())
|
|
}
|
|
|
|
/**
|
|
* Holds if `file` look like it contains generated code.
|
|
*/
|
|
predicate isGeneratedCode(File file) {
|
|
isGenerated(file.getATopLevel()) or
|
|
isGeneratedHtml(file)
|
|
}
|