Files
codeql/cpp/ql/lib/semmle/code/cpp/AutogeneratedFile.qll
Geoffrey White 0aa1945f30 C++: Comments.
2022-04-19 10:04:15 +01:00

141 lines
4.7 KiB
Plaintext

/**
* Provides a class and predicate for recognizing files that are likely to have been generated
* automatically.
*/
import semmle.code.cpp.Comments
import semmle.code.cpp.File
import semmle.code.cpp.Preprocessor
/**
* Holds if comment `c` indicates that it might be in an auto-generated file, for
* example because it contains the text "auto-generated by".
*/
bindingset[comment]
private predicate autogeneratedComment(string comment) {
// ?s = include newlines in anything (`.`)
// ?i = ignore case
exists(string cond |
cond =
// generated by (not mid-sentence)
"(^ generated by[^a-z])|" + "(! generated by[^a-z])|" +
// generated file
"(generated file)|" +
// file [is/was/has been] generated
"(file( is| was| has been)? generated)|" +
// changes made in this file will be lost
"(changes made in this file will be lost)|" +
// do not edit/modify (not mid-sentence)
"(^ do(n't|nt| not) (hand-?)?(edit|modify))|" +
"(! do(n't|nt| not) (hand-?)?(edit|modify))|" +
// do not edit/modify + generated
"(do(n't|nt| not) (hand-?)?(edit|modify).*generated)|" +
"(generated.*do(n't|nt| not) (hand-?)?(edit|modify))" and
comment
.regexpMatch("(?si).*(" +
// replace `generated` with a regexp that also catches things like
// `auto-generated`.
cond.replaceAll("generated", "(auto[\\w-]*[\\s/\\*\\r\\n]*)?generated")
// replace `!` with a regexp for end-of-sentence / separator characters.
.replaceAll("!", "[\\.\\?\\!\\-\\;\\,]")
// replace ` ` with a regexp for one or more whitespace characters
// (including newlines and `/*`).
.replaceAll(" ", "[\\s/\\*\\r\\n]+") + ").*")
)
}
/**
* Holds if the file contains `#line` pragmas that refer to a different file.
* For example, in `parser.c` a pragma `#line 1 "parser.rl"`.
* Such pragmas usually indicate that the file was automatically generated.
*/
predicate hasPragmaDifferentFile(File f) {
exists(PreprocessorLine pl, string s |
pl.getFile() = f and
pl.getHead().splitAt(" ", 1) = s and
/* Zero index is line number, one index is file reference */
not "\"" + f.getAbsolutePath() + "\"" = s and
not "\"" + f.getRelativePath() + "\"" = s and
not "\"" + f.getBaseName() + "\"" = s
)
}
/**
* The line where the first comment in file `f` begins (maximum of 5). This allows
* us to skip past any preprocessor logic or similar code before the first comment.
*/
private int fileFirstComment(File f) {
result =
min(int line |
exists(Comment c |
c.getFile() = f and
c.getLocation().getStartLine() = line and
line < 5
)
).minimum(5)
}
/**
* The line where the initial comments of file `f` end. This is just before the
* first bit of code, excluding anything skipped over by `fileFirstComment`.
*/
private int fileHeaderLimit(File f) {
exists(int fc |
fc = fileFirstComment(f) and
result =
min(int line |
// code ending the initial comments
exists(DeclarationEntry de, Location l |
l = de.getLocation() and
l.getFile() = f and
line = l.getStartLine() - 1 and
line > fc
)
or
exists(PreprocessorDirective pd, Location l |
l = pd.getLocation() and
l.getFile() = f and
line = l.getStartLine() - 1 and
line > fc
)
or
exists(NamespaceDeclarationEntry nde, Location l |
l = nde.getLocation() and
l.getFile() = f and
line = l.getStartLine() - 1 and
line > fc
)
or
// end of the file
line = f.getMetrics().getNumberOfLines()
or
// rarely, we've seen extremely long sequences of initial comments
// (and/or limitations in the above constraints) cause an overflow of
// the maximum string length. So don't look past 1000 lines regardless.
line = 1000
)
)
}
/**
* Holds if the file is probably an autogenerated file.
*
* A file is probably autogenerated if either of the following heuristics
* hold:
* 1. There is a comment in the start of the file that matches
* 'autogenerated', 'generated by', or a similar phrase.
* 2. There is a `#line` directive referring to a different file.
*/
class AutogeneratedFile extends File {
cached
AutogeneratedFile() {
autogeneratedComment(strictconcat(Comment c |
c.getFile() = this and
c.getLocation().getStartLine() <= fileHeaderLimit(this)
|
c.getContents() order by c.getLocation().getStartLine()
)) or
hasPragmaDifferentFile(this)
}
}