/** * Provides a class and predicate for recognizing files that are likely to have been generated * automatically. */ import semmle.code.cpp.Comments import semmle.code.cpp.File import semmle.code.cpp.Preprocessor /** * Holds if comment `c` indicates that it might be in an auto-generated file, for * example because it contains the text "auto-generated by". */ bindingset[comment] private predicate autogeneratedComment(string comment) { // ?s = include newlines in anything (`.`) // ?i = ignore case exists(string cond | cond = // generated by (not mid-sentence) "(^ generated by[^a-z])|" + "(! generated by[^a-z])|" + // generated file "(generated file)|" + // file [is/was/has been] generated "(file( is| was| has been)? generated)|" + // changes made in this file will be lost "(changes made in this file will be lost)|" + // do not edit/modify (not mid-sentence) "(^ do(n't|nt| not) (hand-?)?(edit|modify))|" + "(! do(n't|nt| not) (hand-?)?(edit|modify))|" + // do not edit/modify + generated "(do(n't|nt| not) (hand-?)?(edit|modify).*generated)|" + "(generated.*do(n't|nt| not) (hand-?)?(edit|modify))" and comment .regexpMatch("(?si).*(" + // replace `generated` with a regexp that also catches things like // `auto-generated`. cond.replaceAll("generated", "(auto[\\w-]*[\\s/\\*\\r\\n]*)?generated") // replace `!` with a regexp for end-of-sentence / separator characters. .replaceAll("!", "[\\.\\?\\!\\-\\;\\,]") // replace ` ` with a regexp for one or more whitespace characters // (including newlines and `/*`). .replaceAll(" ", "[\\s/\\*\\r\\n]+") + ").*") ) } /** * Holds if the file contains `#line` pragmas that refer to a different file. * For example, in `parser.c` a pragma `#line 1 "parser.rl"`. * Such pragmas usually indicate that the file was automatically generated. */ predicate hasPragmaDifferentFile(File f) { exists(PreprocessorLine pl, string s | pl.getFile() = f and pl.getHead().splitAt(" ", 1) = s and /* Zero index is line number, one index is file reference */ not "\"" + f.getAbsolutePath() + "\"" = s and not "\"" + f.getRelativePath() + "\"" = s and not "\"" + f.getBaseName() + "\"" = s ) } /** * The line where the first comment in file `f` begins (maximum of 5). This allows * us to skip past any preprocessor logic or similar code before the first comment. */ private int fileFirstComment(File f) { result = min(int line | exists(Comment c | c.getFile() = f and c.getLocation().getStartLine() = line and line < 5 ) ).minimum(5) } /** * The line where the initial comments of file `f` end. This is just before the * first bit of code, excluding anything skipped over by `fileFirstComment`. */ private int fileHeaderLimit(File f) { exists(int fc | fc = fileFirstComment(f) and result = min(int line | // code ending the initial comments exists(DeclarationEntry de, Location l | l = de.getLocation() and l.getFile() = f and line = l.getStartLine() - 1 and line > fc ) or exists(PreprocessorDirective pd, Location l | l = pd.getLocation() and l.getFile() = f and line = l.getStartLine() - 1 and line > fc ) or exists(NamespaceDeclarationEntry nde, Location l | l = nde.getLocation() and l.getFile() = f and line = l.getStartLine() - 1 and line > fc ) or // end of the file line = f.getMetrics().getNumberOfLines() or // rarely, we've seen extremely long sequences of initial comments // (and/or limitations in the above constraints) cause an overflow of // the maximum string length. So don't look past 1000 lines regardless. line = 1000 ) ) } /** * Holds if the file is probably an autogenerated file. * * A file is probably autogenerated if either of the following heuristics * hold: * 1. There is a comment in the start of the file that matches * 'autogenerated', 'generated by', or a similar phrase. * 2. There is a `#line` directive referring to a different file. */ class AutogeneratedFile extends File { cached AutogeneratedFile() { autogeneratedComment(strictconcat(Comment c | c.getFile() = this and c.getLocation().getStartLine() <= fileHeaderLimit(this) | c.getContents() order by c.getLocation().getStartLine() )) or hasPragmaDifferentFile(this) } }