mirror of
https://github.com/github/codeql.git
synced 2025-12-24 12:46:34 +01:00
Will need subsequent PRs fixing up test failures (due to deprecated methods moving around), but other than that everything should be straight-forward.
340 lines
9.4 KiB
Plaintext
340 lines
9.4 KiB
Plaintext
import python
|
|
|
|
private predicate def_statement(Comment c) {
|
|
c.getText().regexpMatch("#(\\S*\\s+)?def\\s.*\\(.*\\).*:\\s*(#.*)?")
|
|
}
|
|
|
|
private predicate if_statement(Comment c) {
|
|
c.getText().regexpMatch("#(\\S*\\s+)?(el)?if\\s.*:\\s*(#.*)?")
|
|
or
|
|
c.getText().regexpMatch("#(\\S*\\s+)?else:\\s*(#.*)?")
|
|
}
|
|
|
|
private predicate for_statement(Comment c) {
|
|
c.getText().regexpMatch("#(\\S*\\s+)?for\\s.*\\sin\\s.*:\\s*(#.*)?")
|
|
}
|
|
|
|
private predicate with_statement(Comment c) {
|
|
c.getText().regexpMatch("#(\\S*\\s+)?with\\s+.*:\\s*(#.*)?")
|
|
}
|
|
|
|
private predicate try_statement(Comment c) {
|
|
c.getText().regexpMatch("#(\\S*\\s+)?try:\\s*(#.*)?")
|
|
or
|
|
c.getText().regexpMatch("#(\\S*\\s+)?except\\s*(\\w+\\s*(\\sas\\s+\\w+\\s*)?)?:\\s*(#.*)?")
|
|
or
|
|
c.getText().regexpMatch("#(\\S*\\s+)?finally:\\s*(#.*)?")
|
|
}
|
|
|
|
private int indentation(Comment c) {
|
|
exists(int offset |
|
|
maybe_code(c) and
|
|
exists(c.getText().regexpFind("[^\\s#]", 1, offset)) and
|
|
result = offset + c.getLocation().getStartColumn()
|
|
)
|
|
}
|
|
|
|
private predicate class_statement(Comment c) {
|
|
c.getText().regexpMatch("#(\\S*\\s+)?class\\s+\\w+.*:\\s*(#.*)?")
|
|
}
|
|
|
|
private predicate triple_quote(Comment c) { c.getText().regexpMatch("#.*(\"\"\"|''').*") }
|
|
|
|
private predicate triple_quoted_string_part(Comment start, Comment end) {
|
|
triple_quote(start) and end = start
|
|
or
|
|
exists(Comment mid |
|
|
triple_quoted_string_part(start, mid) and
|
|
end = non_empty_following(mid) and
|
|
not triple_quote(end)
|
|
)
|
|
}
|
|
|
|
private predicate maybe_code(Comment c) {
|
|
not non_code(c) and not filler(c) and not endline_comment(c) and not file_or_url(c)
|
|
or
|
|
commented_out_comment(c)
|
|
}
|
|
|
|
private predicate commented_out_comment(Comment c) { c.getText().regexpMatch("#+\\s+#.*") }
|
|
|
|
private int scope_start(Comment start) {
|
|
(
|
|
def_statement(start) or
|
|
class_statement(start)
|
|
) and
|
|
result = indentation(start) and
|
|
not non_code(start)
|
|
}
|
|
|
|
private int block_start(Comment start) {
|
|
(
|
|
if_statement(start) or
|
|
for_statement(start) or
|
|
try_statement(start) or
|
|
with_statement(start)
|
|
) and
|
|
result = indentation(start) and
|
|
not non_code(start)
|
|
}
|
|
|
|
private int scope_doc_string_part(Comment start, Comment end) {
|
|
result = scope_start(start) and
|
|
triple_quote(end) and
|
|
end = non_empty_following(start)
|
|
or
|
|
exists(Comment mid |
|
|
result = scope_doc_string_part(start, mid) and
|
|
end = non_empty_following(mid)
|
|
|
|
|
not triple_quote(end)
|
|
)
|
|
}
|
|
|
|
private int scope_part(Comment start, Comment end) {
|
|
result = scope_start(start) and end = start
|
|
or
|
|
exists(Comment mid |
|
|
result = scope_doc_string_part(start, mid) and
|
|
end = non_empty_following(mid) and
|
|
triple_quote(end)
|
|
)
|
|
or
|
|
exists(Comment mid |
|
|
result = scope_part(start, mid) and
|
|
end = non_empty_following(mid)
|
|
|
|
|
indentation(end) > result
|
|
)
|
|
}
|
|
|
|
private int block_part(Comment start, Comment end) {
|
|
result = block_start(start) and
|
|
end = non_empty_following(start) and
|
|
indentation(end) > result
|
|
or
|
|
exists(Comment mid |
|
|
result = block_part(start, mid) and
|
|
end = non_empty_following(mid)
|
|
|
|
|
indentation(end) > result
|
|
or
|
|
result = block_start(end)
|
|
)
|
|
}
|
|
|
|
private predicate commented_out_scope_part(Comment start, Comment end) {
|
|
exists(scope_doc_string_part(start, end))
|
|
or
|
|
exists(scope_part(start, end))
|
|
}
|
|
|
|
private predicate commented_out_code(Comment c) {
|
|
commented_out_scope_part(c, _)
|
|
or
|
|
commented_out_scope_part(_, c)
|
|
or
|
|
exists(block_part(c, _))
|
|
or
|
|
exists(block_part(_, c))
|
|
}
|
|
|
|
private predicate commented_out_code_part(Comment start, Comment end) {
|
|
commented_out_code(start) and
|
|
end = start and
|
|
not exists(Comment prev | non_empty_following(prev) = start | commented_out_code(prev))
|
|
or
|
|
exists(Comment mid |
|
|
commented_out_code_part(start, mid) and
|
|
non_empty_following(mid) = end and
|
|
commented_out_code(end)
|
|
)
|
|
}
|
|
|
|
private predicate commented_out_code_block(Comment start, Comment end) {
|
|
/* A block must be at least 2 comments long. */
|
|
start != end and
|
|
commented_out_code_part(start, end) and
|
|
not commented_out_code(non_empty_following(end))
|
|
}
|
|
|
|
/* A single line comment that appears to be commented out code */
|
|
class CommentedOutCodeLine extends Comment {
|
|
CommentedOutCodeLine() { exists(CommentedOutCodeBlock b | b.contains(this)) }
|
|
|
|
/* Whether this commented-out code line is likely to be example code embedded in a larger comment. */
|
|
predicate maybeExampleCode() {
|
|
exists(CommentedOutCodeBlock block |
|
|
block.contains(this) and
|
|
block.maybeExampleCode()
|
|
)
|
|
}
|
|
}
|
|
|
|
/** A block of comments that appears to be commented out code */
|
|
class CommentedOutCodeBlock extends @py_comment {
|
|
CommentedOutCodeBlock() { commented_out_code_block(this, _) }
|
|
|
|
/** Gets a textual representation of this element. */
|
|
string toString() { result = "Commented out code" }
|
|
|
|
/** Whether this commented-out code block contains the comment c */
|
|
predicate contains(Comment c) {
|
|
this = c
|
|
or
|
|
exists(Comment prev |
|
|
non_empty_following(prev) = c and
|
|
not commented_out_code_block(this, prev) and
|
|
this.contains(prev)
|
|
)
|
|
}
|
|
|
|
/** The length of this comment block (in comments) */
|
|
int length() { result = count(Comment c | this.contains(c)) }
|
|
|
|
/**
|
|
* Holds if this element is at the specified location.
|
|
* The location spans column `startcolumn` of line `startline` to
|
|
* column `endcolumn` of line `endline` in file `filepath`.
|
|
* For more information, see
|
|
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
|
*/
|
|
predicate hasLocationInfo(
|
|
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
|
) {
|
|
this.(Comment).getLocation().hasLocationInfo(filepath, startline, startcolumn, _, _) and
|
|
exists(Comment end | commented_out_code_block(this, end) |
|
|
end.getLocation().hasLocationInfo(_, _, _, endline, endcolumn)
|
|
)
|
|
}
|
|
|
|
/** Whether this commented-out code block is likely to be example code embedded in a larger comment. */
|
|
predicate maybeExampleCode() {
|
|
exists(CommentBlock block | block.contains(this.(Comment)) |
|
|
exists(int all_code |
|
|
all_code = sum(CommentedOutCodeBlock code | block.contains(code.(Comment)) | code.length()) and
|
|
/* This ratio may need fine tuning */
|
|
block.length() > all_code * 2
|
|
)
|
|
)
|
|
}
|
|
}
|
|
|
|
/** Does c contain the pair of words "s1 s2" with only whitespace between them */
|
|
private predicate word_pair(Comment c, string s1, string s2) {
|
|
exists(int i1, int i2, int o1, int o2 |
|
|
s1 = c.getText().regexpFind("\\w+", i1, o1) and
|
|
s2 = c.getText().regexpFind("\\w+", i2, o2) and
|
|
i2 = i1 + 1 and
|
|
c.getText().prefix(o1).regexpMatch("[^'\"]*") and
|
|
c.getText().substring(o1 + s1.length(), o2).regexpMatch("\\s+")
|
|
)
|
|
}
|
|
|
|
/**
|
|
* The comment c cannot be code if it contains a word pair "word1 word2" and
|
|
* either:
|
|
* 1. word1 is not a keyword and word2 is not an operator:
|
|
* "x is" could be code, "return y" could be code, but "isnt code" cannot be code.
|
|
* or
|
|
* 2. word1 is a keyword requiring a colon and there is no colon:
|
|
* "with spam" can only be code if the comment contains a colon.
|
|
*/
|
|
private predicate non_code(Comment c) {
|
|
exists(string word1, string word2 |
|
|
word_pair(c, word1, word2) and
|
|
not word2 = operator_keyword()
|
|
|
|
|
not word1 = a_keyword()
|
|
or
|
|
word1 = keyword_requiring_colon() and not c.getText().matches("%:%")
|
|
) and
|
|
/* Except comments of the form: # (maybe code) # some comment */
|
|
not c.getText().regexpMatch("#\\S+\\s.*#.*")
|
|
or
|
|
/* Don't count doctests as code */
|
|
c.getText().matches("%>>>%")
|
|
or
|
|
c.getText().matches("%...%")
|
|
}
|
|
|
|
private predicate filler(Comment c) { c.getText().regexpMatch("#+[\\s*#-_=+]*") }
|
|
|
|
/** Gets the first non empty comment following c */
|
|
private Comment non_empty_following(Comment c) {
|
|
not empty(result) and
|
|
(
|
|
result = empty_following(c).getFollowing()
|
|
or
|
|
not empty(c) and result = c.getFollowing()
|
|
)
|
|
}
|
|
|
|
/* Helper for non_empty_following() */
|
|
private Comment empty_following(Comment c) {
|
|
not empty(c) and
|
|
empty(result) and
|
|
exists(Comment prev | result = prev.getFollowing() |
|
|
prev = c
|
|
or
|
|
prev = empty_following(c)
|
|
)
|
|
}
|
|
|
|
private predicate empty(Comment c) { c.getText().regexpMatch("#+\\s*") }
|
|
|
|
/* A comment following code on the same line */
|
|
private predicate endline_comment(Comment c) {
|
|
exists(Expr e, string f, int line |
|
|
e.getLocation().hasLocationInfo(f, line, _, _, _) and
|
|
c.getLocation().hasLocationInfo(f, line, _, _, _)
|
|
)
|
|
}
|
|
|
|
private predicate file_or_url(Comment c) {
|
|
c.getText().regexpMatch("#[^'\"]+(https?|file)://.*") or
|
|
c.getText().regexpMatch("#[^'\"]+(/[a-zA-Z]\\w*)+\\.[a-zA-Z]+.*") or
|
|
c.getText().regexpMatch("#[^'\"]+(\\[a-zA-Z]\\w*)+\\.[a-zA-Z]+.*")
|
|
}
|
|
|
|
private string operator_keyword() {
|
|
result = "import" or
|
|
result = "and" or
|
|
result = "is" or
|
|
result = "or" or
|
|
result = "in" or
|
|
result = "not" or
|
|
result = "as"
|
|
}
|
|
|
|
private string keyword_requiring_colon() {
|
|
result = "try" or
|
|
result = "while" or
|
|
result = "elif" or
|
|
result = "else" or
|
|
result = "if" or
|
|
result = "except" or
|
|
result = "def" or
|
|
result = "class"
|
|
}
|
|
|
|
private string other_keyword() {
|
|
result = "del" or
|
|
result = "lambda" or
|
|
result = "from" or
|
|
result = "global" or
|
|
result = "with" or
|
|
result = "assert" or
|
|
result = "yield" or
|
|
result = "finally" or
|
|
result = "print" or
|
|
result = "exec" or
|
|
result = "raise" or
|
|
result = "return" or
|
|
result = "for"
|
|
}
|
|
|
|
private string a_keyword() {
|
|
result = keyword_requiring_colon() or result = other_keyword() or result = operator_keyword()
|
|
}
|