mirror of
https://github.com/github/codeql.git
synced 2025-12-24 04:36:35 +01:00
217 lines
7.4 KiB
Plaintext
217 lines
7.4 KiB
Plaintext
/**
|
|
* Provides classes and predicates for identifying C/C++ comments that look like code.
|
|
*/
|
|
|
|
import cpp
|
|
|
|
/**
|
|
* Holds if `line` looks like a line of code.
|
|
*/
|
|
bindingset[line]
|
|
private predicate looksLikeCode(string line) {
|
|
exists(string trimmed |
|
|
// trim leading and trailing whitespace, and HTML codes:
|
|
// * HTML entities in common notation (e.g. > and é)
|
|
// * HTML entities in decimal notation (e.g. à)
|
|
// * HTML entities in hexadecimal notation (e.g. 灟)
|
|
trimmed = line.regexpReplaceAll("(?i)(^\\s+|&#?[a-z0-9]{1,31};|\\s+$)", "")
|
|
|
|
|
(
|
|
// Match comment lines ending with '{', '}' or ';'
|
|
trimmed.regexpMatch(".*[{};]") and
|
|
(
|
|
// If this line looks like code because it ends with a closing
|
|
// brace that's preceded by something other than whitespace ...
|
|
trimmed.regexpMatch(".*.\\}")
|
|
implies
|
|
// ... then there has to be ") {" (or some variation)
|
|
// on the line, suggesting it's a statement like `if`
|
|
// or a function definition. Otherwise it's likely to be a
|
|
// benign use of braces such as a JSON example or explanatory
|
|
// pseudocode.
|
|
trimmed.regexpMatch(".*(\\)|const|volatile|override|final|noexcept|&)\\s*\\{.*")
|
|
)
|
|
or
|
|
// Match comment lines that look like preprocessor code
|
|
trimmed
|
|
.regexpMatch("#\\s*(include|define|undef|if|ifdef|ifndef|elif|else|endif|error|pragma)\\b.*")
|
|
) and
|
|
// Exclude lines that start with '>' or contain '@{' or '@}'.
|
|
// To account for the code generated by protobuf, we also insist that the comment
|
|
// does not begin with `optional` or `repeated` and end with a `;`, which would
|
|
// normally be a quoted bit of literal `.proto` specification above the associated
|
|
// declaration.
|
|
// To account for emacs folding markers, we ignore any line containing
|
|
// `{{{` or `}}}`.
|
|
// Finally, some code tends to embed GUIDs in comments, so we also exclude those.
|
|
not trimmed
|
|
.regexpMatch("(>.*|.*[\\\\@][{}].*|(optional|repeated) .*;|.*(\\{\\{\\{|\\}\\}\\}).*|\\{[-0-9a-zA-Z]+\\})")
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if there is a preprocessor directive on the line indicated by
|
|
* `f` and `line` that we permit code comments besides. For example this
|
|
* is considered acceptable:
|
|
* ```
|
|
* #ifdef MYMACRO
|
|
* ...
|
|
* #endif // #ifdef MYMACRO
|
|
* ```
|
|
*/
|
|
private predicate preprocLine(File f, int line) {
|
|
exists(PreprocessorDirective pd, Location l |
|
|
(
|
|
pd instanceof PreprocessorElse or
|
|
pd instanceof PreprocessorElif or
|
|
pd instanceof PreprocessorEndif
|
|
) and
|
|
pd.getLocation() = l and
|
|
l.getFile() = f and
|
|
l.getStartLine() = line
|
|
)
|
|
}
|
|
|
|
/**
|
|
* The line of a C++-style comment within its file `f`.
|
|
*/
|
|
private int lineInFile(CppStyleComment c, File f) {
|
|
f = c.getFile() and
|
|
result = c.getLocation().getStartLine() and
|
|
// Ignore comments on the same line as a preprocessor directive.
|
|
not preprocLine(f, result)
|
|
}
|
|
|
|
/**
|
|
* The "comment block ID" for a comment line in a file.
|
|
* The block ID is obtained by subtracting the line rank of the line from
|
|
* the line itself, where the line rank is the (1-based) rank within `f`
|
|
* of lines containing a C++-style comment. As a result, line comments on
|
|
* consecutive lines are assigned the same block ID (as both line number
|
|
* and line rank increase by 1 for each line), while intervening lines
|
|
* without line comments would increase the line number without increasing
|
|
* the rank and thus force a change of block ID.
|
|
*/
|
|
pragma[nomagic]
|
|
private int commentLineBlockID(File f, int line) {
|
|
exists(int lineRank |
|
|
line = rank[lineRank](lineInFile(_, f)) and
|
|
result = line - lineRank
|
|
)
|
|
}
|
|
|
|
/**
|
|
* The comment ID of the given comment (on line `line` of file `f`).
|
|
* The resulting number is meaningless, except that it will be the same
|
|
* for all comments in a run of consecutive comment lines, and different
|
|
* for separate runs.
|
|
*/
|
|
private int commentId(CppStyleComment c, File f, int line) {
|
|
result = commentLineBlockID(f, line) and
|
|
line = lineInFile(c, f)
|
|
}
|
|
|
|
/**
|
|
* A contiguous block of comments.
|
|
*/
|
|
class CommentBlock extends Comment {
|
|
CommentBlock() {
|
|
(
|
|
this instanceof CppStyleComment
|
|
implies
|
|
not exists(File f | lineInFile(_, f) + 1 = lineInFile(this, f))
|
|
) and
|
|
// Ignore comments on the same line as a preprocessor directive.
|
|
not exists(Location l |
|
|
l = this.getLocation() and
|
|
preprocLine(l.getFile(), l.getStartLine())
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets the `i`th comment associated with this comment block.
|
|
*/
|
|
Comment getComment(int i) {
|
|
i = 0 and result = this
|
|
or
|
|
exists(File f, int thisLine, int resultLine |
|
|
commentId(this, f, thisLine) = commentId(result, f, resultLine)
|
|
|
|
|
i = resultLine - thisLine
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets the last comment associated with this comment block.
|
|
*/
|
|
Comment lastComment() { result = this.getComment(max(int i | exists(this.getComment(i)))) }
|
|
|
|
/**
|
|
* Gets the contents of the `i`'th comment associated with this comment block.
|
|
*/
|
|
string getLine(int i) {
|
|
this instanceof CStyleComment and
|
|
result = this.getContents().regexpCapture("(?s)/\\*+(.*)\\*+/", 1).splitAt("\n", i)
|
|
or
|
|
this instanceof CppStyleComment and result = this.getComment(i).getContents().suffix(2)
|
|
}
|
|
|
|
/**
|
|
* Gets the number of lines in the comments associated with this comment block.
|
|
*/
|
|
int numLines() {
|
|
result = strictcount(int i, string line | line = this.getLine(i) and line.trim() != "")
|
|
}
|
|
|
|
/**
|
|
* Gets the number of lines that look like code in the comments associated with this comment block.
|
|
*/
|
|
int numCodeLines() {
|
|
result = strictcount(int i, string line | line = this.getLine(i) and looksLikeCode(line))
|
|
}
|
|
|
|
/**
|
|
* Holds if the comment block is a C-style comment, and each
|
|
* comment line starts with a *.
|
|
*/
|
|
predicate isDocumentation() {
|
|
// If a C-style comment starts each line with a *, then it's
|
|
// probably documentation rather than code.
|
|
this instanceof CStyleComment and
|
|
forex(int i | i in [1 .. this.numLines() - 1] | this.getLine(i).trim().matches("*%"))
|
|
}
|
|
|
|
/**
|
|
* Holds if this comment block looks like code that has been commented out. Specifically:
|
|
* 1. It does not look like documentation (see `isDocumentation`).
|
|
* 2. It is not in a header file without any declaration entries or top level declarations.
|
|
* 3. More than half of the lines in the comment block look like code.
|
|
*/
|
|
predicate isCommentedOutCode() {
|
|
not this.isDocumentation() and
|
|
not this.getFile().(HeaderFile).noTopLevelCode() and
|
|
this.numCodeLines().(float) / this.numLines().(float) > 0.5
|
|
}
|
|
|
|
/**
|
|
* Holds if this element is at the specified location.
|
|
* The location spans column `startcolumn` of line `startline` to
|
|
* column `endcolumn` of line `endline` in file `filepath`.
|
|
* For more information, see
|
|
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
|
*/
|
|
predicate hasLocationInfo(
|
|
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
|
) {
|
|
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, _, _) and
|
|
this.lastComment().getLocation().hasLocationInfo(_, _, _, endline, endcolumn)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A piece of commented-out code, identified using heuristics
|
|
*/
|
|
class CommentedOutCode extends CommentBlock {
|
|
CommentedOutCode() { this.isCommentedOutCode() }
|
|
}
|