mirror of
https://github.com/github/codeql.git
synced 2026-01-27 21:33:03 +01:00
282 lines
11 KiB
Plaintext
282 lines
11 KiB
Plaintext
/** Provides classes for detecting duplicate or similar code. */
|
|
|
|
import python
|
|
|
|
/** Gets the relative path of `file`, with backslashes replaced by forward slashes. */
|
|
private
|
|
string relativePath(File file) {
|
|
result = file.getRelativePath().replaceAll("\\", "/")
|
|
}
|
|
|
|
/**
|
|
* Holds if the `index`-th token of block `copy` is in file `file`, spanning
|
|
* column `sc` of line `sl` to column `ec` of line `el`.
|
|
*
|
|
* For more information, see [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
|
*/
|
|
pragma[noinline, nomagic]
|
|
private predicate tokenLocation(File file, int sl, int sc, int ec, int el, Copy copy, int index) {
|
|
file = copy.sourceFile() and
|
|
tokens(copy, index, sl, sc, ec, el)
|
|
}
|
|
|
|
/** A token block used for detection of duplicate and similar code. */
|
|
class Copy extends @duplication_or_similarity
|
|
{
|
|
private
|
|
int lastToken() {
|
|
result = max(int i | tokens(this, i, _, _, _, _) | i)
|
|
}
|
|
|
|
/** Gets the index of the token in this block starting at the location `loc`, if any. */
|
|
int tokenStartingAt(Location loc) {
|
|
tokenLocation(loc.getFile(), loc.getStartLine(), loc.getStartColumn(),
|
|
_, _, this, result)
|
|
}
|
|
|
|
/** Gets the index of the token in this block ending at the location `loc`, if any. */
|
|
int tokenEndingAt(Location loc) {
|
|
tokenLocation(loc.getFile(), _, _,
|
|
loc.getEndLine(), loc.getEndColumn(), this, result)
|
|
}
|
|
|
|
/** Gets the line on which the first token in this block starts. */
|
|
int sourceStartLine() {
|
|
tokens(this, 0, result, _, _, _)
|
|
}
|
|
|
|
/** Gets the column on which the first token in this block starts. */
|
|
int sourceStartColumn() {
|
|
tokens(this, 0, _, result, _, _)
|
|
}
|
|
|
|
/** Gets the line on which the last token in this block ends. */
|
|
int sourceEndLine() {
|
|
tokens(this, this.lastToken(), _, _, result, _)
|
|
}
|
|
|
|
/** Gets the column on which the last token in this block ends. */
|
|
int sourceEndColumn() {
|
|
tokens(this, this.lastToken(), _, _, _, result)
|
|
}
|
|
|
|
/** Gets the number of lines containing at least (part of) one token in this block. */
|
|
int sourceLines() {
|
|
result = this.sourceEndLine() + 1 - this.sourceStartLine()
|
|
}
|
|
|
|
/** Gets an opaque identifier for the equivalence class of this block. */
|
|
int getEquivalenceClass() {
|
|
duplicateCode(this, _, result) or similarCode(this, _, result)
|
|
}
|
|
|
|
/** Gets the source file in which this block appears. */
|
|
File sourceFile() {
|
|
exists(string name |
|
|
duplicateCode(this, name, _) or similarCode(this, name, _) |
|
|
name.replaceAll("\\", "/") = relativePath(result))
|
|
}
|
|
|
|
/**
|
|
* Holds if this element is at the specified location.
|
|
* The location spans column `startcolumn` of line `startline` to
|
|
* column `endcolumn` of line `endline` in file `filepath`.
|
|
* For more information, see
|
|
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
|
*/
|
|
predicate hasLocationInfo(string filepath, int startline, int startcolumn, int endline, int endcolumn) {
|
|
sourceFile().getName() = filepath and
|
|
startline = sourceStartLine() and
|
|
startcolumn = sourceStartColumn() and
|
|
endline = sourceEndLine() and
|
|
endcolumn = sourceEndColumn()
|
|
}
|
|
|
|
/** Gets a textual representation of this element. */
|
|
string toString() { none() }
|
|
|
|
/**
|
|
* Gets a block that extends this one, that is, its first token is also
|
|
* covered by this block, but they are not the same block.
|
|
*/
|
|
Copy extendingBlock() {
|
|
exists(File file, int sl, int sc, int ec, int el |
|
|
tokenLocation(file, sl, sc, ec, el, this, _) and
|
|
tokenLocation(file, sl, sc, ec, el, result, 0)) and
|
|
this != result
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Holds if there is a sequence of `SimilarBlock`s `start1, ..., end1` and another sequence
|
|
* `start2, ..., end2` such that each block extends the previous one and corresponding blocks
|
|
* have the same equivalence class, with `start` being the equivalence class of `start1` and
|
|
* `start2`, and `end` the equivalence class of `end1` and `end2`.
|
|
*/
|
|
predicate similar_extension(SimilarBlock start1, SimilarBlock start2, SimilarBlock ext1, SimilarBlock ext2, int start, int ext) {
|
|
start1.getEquivalenceClass() = start and
|
|
start2.getEquivalenceClass() = start and
|
|
ext1.getEquivalenceClass() = ext and
|
|
ext2.getEquivalenceClass() = ext and
|
|
start1 != start2 and
|
|
(ext1 = start1 and ext2 = start2 or
|
|
similar_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if there is a sequence of `DuplicateBlock`s `start1, ..., end1` and another sequence
|
|
* `start2, ..., end2` such that each block extends the previous one and corresponding blocks
|
|
* have the same equivalence class, with `start` being the equivalence class of `start1` and
|
|
* `start2`, and `end` the equivalence class of `end1` and `end2`.
|
|
*/
|
|
predicate duplicate_extension(DuplicateBlock start1, DuplicateBlock start2, DuplicateBlock ext1, DuplicateBlock ext2, int start, int ext) {
|
|
start1.getEquivalenceClass() = start and
|
|
start2.getEquivalenceClass() = start and
|
|
ext1.getEquivalenceClass() = ext and
|
|
ext2.getEquivalenceClass() = ext and
|
|
start1 != start2 and
|
|
(ext1 = start1 and ext2 = start2 or
|
|
duplicate_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext)
|
|
)
|
|
}
|
|
|
|
/** A block of duplicated code. */
|
|
class DuplicateBlock extends Copy, @duplication
|
|
{
|
|
override string toString() {
|
|
result = "Duplicate code: " + sourceLines() + " duplicated lines."
|
|
}
|
|
}
|
|
|
|
/** A block of similar code. */
|
|
class SimilarBlock extends Copy, @similarity
|
|
{
|
|
override string toString() {
|
|
result = "Similar code: " + sourceLines() + " almost duplicated lines."
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Holds if `stmt1` and `stmt2` are duplicate statements in function or toplevel `sc1` and `sc2`,
|
|
* respectively, where `scope1` and `scope2` are not the same.
|
|
*/
|
|
predicate duplicateStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) {
|
|
exists(int equivstart, int equivend, int first, int last |
|
|
scope1.contains(stmt1) and
|
|
scope2.contains(stmt2) and
|
|
duplicateCoversStatement(equivstart, equivend, first, last, stmt1) and
|
|
duplicateCoversStatement(equivstart, equivend, first, last, stmt2) and
|
|
stmt1 != stmt2 and scope1 != scope2
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if statement `stmt` is covered by a sequence of `DuplicateBlock`s, where `first`
|
|
* is the index of the token in the first block that starts at the beginning of `stmt`,
|
|
* while `last` is the index of the token in the last block that ends at the end of `stmt`,
|
|
* and `equivstart` and `equivend` are the equivalence classes of the first and the last
|
|
* block, respectively.
|
|
*/
|
|
private
|
|
predicate duplicateCoversStatement(int equivstart, int equivend, int first, int last, Stmt stmt) {
|
|
exists(DuplicateBlock b1, DuplicateBlock b2, Location startloc, Location endloc |
|
|
stmt.getLocation() = startloc and
|
|
stmt.getLastStatement().getLocation() = endloc and
|
|
first = b1.tokenStartingAt(startloc) and
|
|
last = b2.tokenEndingAt(endloc) and
|
|
b1.getEquivalenceClass() = equivstart and
|
|
b2.getEquivalenceClass() = equivend and
|
|
duplicate_extension(b1, _, b2, _, equivstart, equivend)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or
|
|
* toplevel that has `duplicate` lines in common with `scope1`.
|
|
*/
|
|
predicate duplicateStatements(Scope scope1, Scope scope2, int duplicate, int total) {
|
|
duplicate = strictcount(Stmt stmt | duplicateStatement(scope1, scope2, stmt, _)) and
|
|
total = strictcount(Stmt stmt | scope1.contains(stmt))
|
|
}
|
|
|
|
/**
|
|
* Find pairs of scopes that are identical or almost identical
|
|
*/
|
|
predicate duplicateScopes(Scope s, Scope other, float percent, string message) {
|
|
exists(int total, int duplicate |
|
|
duplicateStatements(s, other, duplicate, total) |
|
|
percent = 100.0 * duplicate / total and percent >= 80.0 and
|
|
if duplicate = total then
|
|
message = "All " + total + " statements in " + s.getName() + " are identical in $@."
|
|
else
|
|
message = duplicate + " out of " + total + " statements in " + s.getName() + " are duplicated in $@."
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `stmt1` and `stmt2` are similar statements in function or toplevel `scope1` and `scope2`,
|
|
* respectively, where `scope1` and `scope2` are not the same.
|
|
*/
|
|
private predicate similarStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) {
|
|
exists(int start, int end, int first, int last |
|
|
scope1.contains(stmt1) and
|
|
scope2.contains(stmt2) and
|
|
similarCoversStatement(start, end, first, last, stmt1) and
|
|
similarCoversStatement(start, end, first, last, stmt2) and
|
|
stmt1 != stmt2 and scope1 != scope2
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if statement `stmt` is covered by a sequence of `SimilarBlock`s, where `first`
|
|
* is the index of the token in the first block that starts at the beginning of `stmt`,
|
|
* while `last` is the index of the token in the last block that ends at the end of `stmt`,
|
|
* and `equivstart` and `equivend` are the equivalence classes of the first and the last
|
|
* block, respectively.
|
|
*/
|
|
private predicate similarCoversStatement(int equivstart, int equivend, int first, int last, Stmt stmt) {
|
|
exists(SimilarBlock b1, SimilarBlock b2, Location startloc, Location endloc |
|
|
stmt.getLocation() = startloc and
|
|
stmt.getLastStatement().getLocation() = endloc and
|
|
first = b1.tokenStartingAt(startloc) and
|
|
last = b2.tokenEndingAt(endloc) and
|
|
b1.getEquivalenceClass() = equivstart and
|
|
b2.getEquivalenceClass() = equivend and
|
|
similar_extension(b1, _, b2, _, equivstart, equivend)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or
|
|
* toplevel that has `similar` similar lines to `scope1`.
|
|
*/
|
|
private predicate similarStatements(Scope scope1, Scope scope2, int similar, int total) {
|
|
similar = strictcount(Stmt stmt | similarStatement(scope1, scope2, stmt, _)) and
|
|
total = strictcount(Stmt stmt | scope1.contains(stmt))
|
|
}
|
|
|
|
/**
|
|
* Find pairs of scopes that are similar
|
|
*/
|
|
predicate similarScopes(Scope s, Scope other, float percent, string message) {
|
|
exists(int total, int similar |
|
|
similarStatements(s, other, similar, total) |
|
|
percent = 100.0 * similar / total and percent >= 80.0 and
|
|
if similar = total then
|
|
message = "All statements in " + s.getName() + " are similar in $@."
|
|
else
|
|
message = similar + " out of " + total + " statements in " + s.getName() + " are similar in $@."
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if the line is acceptable as a duplicate.
|
|
* This is true for blocks of import statements.
|
|
*/
|
|
predicate whitelistedLineForDuplication(File f, int line) {
|
|
exists(ImportingStmt i |
|
|
i.getLocation().getFile() = f and i.getLocation().getStartLine() = line
|
|
)
|
|
}
|