CPP: Improve accuracy further.

This commit is contained in:
Geoffrey White
2019-01-15 16:51:37 +00:00
parent 105f8dddd0
commit 49cfa43fd8

View File

@@ -6,30 +6,43 @@ import semmle.code.cpp.Preprocessor
* Holds if comment `c` indicates that it might be in an auto-generated file, for
* example because it contains the text "auto-generated by".
*/
private predicate autogeneratedComment(Comment c) {
private bindingset[comment] predicate autogeneratedComment(string comment) {
// ?s = include newlines in anything (`.`)
// ?i = ignore case
c.getContents().regexpMatch("(?si).*(" +
exists(string cond |
cond =
// generated by (not mid-sentence)
//"([^a-z\\s/\\*\\r\\n] generated by )|" +
"(^ generated by[^a-z])|" +
"(! generated by[^a-z])|" +
// auto-generated, automatically generated etc.
"(auto[\\w-]*\\s*?generated)|" +
// generated file
"(generated file)|" +
// generated by (at beginning of sentence)
"([^a-z\\s\\*\\r\\n][\\s\\*\\r\\n]*(generated by)[^a-z])|" +
// file [is/was/has been] generated
"(file( is| was| has been)? generated)|" +
// generated file
"(generated file)|" +
// file [is] generated
"(file( is)? generated)|" +
// changes made in this file will be lost
"(changes made in this file will be lost)|" +
// changes made in this file will be lost
"(changes made in this file will be lost)|" +
// do not edit/modify
"(do(n't|nt| not) (edit|modify))" +
// do not edit/modify
"(^ do(n't|nt| not) (hand-?)?(edit|modify))|" +
"(! do(n't|nt| not) (hand-?)?(edit|modify))" and
comment.regexpMatch("(?si).*(" +
// replace `generated` with a regexp that also catches things like
// `auto-generated`.
cond.replaceAll("generated", "(auto[\\w-]*[\\s/\\*\\r\\n]*)?generated")
").*")
// replace `!` with a regexp for end-of-sentence / separator characters.
.replaceAll("!", "[\\.\\?\\!\\-\\;\\,]")
// replace ` ` with a regexp for one or more whitespace characters
// (including newlines and `/*`).
.replaceAll(" ", "[\\s/\\*\\r\\n]+") +
").*"
)
)
}
/**