Merge pull request #5532 from RasmusWL/python-cleanup

Python: Delete filter queries, code duplication library, and precision tag from metric queries
This commit is contained in:
Calum Grant
2021-03-29 17:16:43 +01:00
committed by GitHub
47 changed files with 35 additions and 1248 deletions

View File

@@ -1,14 +0,0 @@
/**
* @name Filter: non-generated files
* @description Only keep results that aren't (or don't appear to be) generated.
* @kind problem
* @id py/not-generated-file-filter
*/
import python
import external.DefectFilter
import semmle.python.filters.GeneratedCode
from DefectResult res
where not exists(GeneratedFile f | res.getFile() = f)
select res, res.getMessage()

View File

@@ -1,14 +0,0 @@
/**
* @name Filter: non-test files
* @description Only keep results that aren't in tests
* @kind problem
* @id py/not-test-file-filter
*/
import python
import external.DefectFilter
import semmle.python.filters.Tests
from DefectResult res
where not exists(TestScope s | contains(s.getLocation(), res))
select res, res.getMessage()

View File

@@ -4,7 +4,6 @@
* @kind treemap
* @treemap.warnOn highValues
* @metricType file
* @precision high
* @tags maintainability
* @id py/lines-of-commented-out-code-in-files
*/

View File

@@ -5,7 +5,6 @@
* @kind treemap
* @treemap.warnOn highValues
* @metricType externalDependency
* @precision medium
* @id py/external-dependencies
*/

View File

@@ -6,7 +6,6 @@
* @treemap.warnOn highValues
* @metricType file
* @metricAggregate avg sum max
* @precision very-high
* @tags maintainability
* @id py/lines-of-code-in-files
*/

View File

@@ -6,7 +6,6 @@
* @treemap.warnOn lowValues
* @metricType file
* @metricAggregate avg sum max
* @precision very-high
* @id py/lines-of-comments-in-files
*/

View File

@@ -7,21 +7,12 @@
* @treemap.warnOn highValues
* @metricType file
* @metricAggregate avg sum max
* @precision high
* @tags testability
* @id py/duplicated-lines-in-files
*/
import python
import external.CodeDuplication
from File f, int n
where
n =
count(int line |
exists(DuplicateBlock d | d.sourceFile() = f |
line in [d.sourceStartLine() .. d.sourceEndLine()] and
not allowlistedLineForDuplication(f, line)
)
)
where none()
select f, n order by n desc

View File

@@ -7,21 +7,12 @@
* @treemap.warnOn highValues
* @metricType file
* @metricAggregate avg sum max
* @precision high
* @tags testability
* @id py/similar-lines-in-files
*/
import python
import external.CodeDuplication
from File f, int n
where
n =
count(int line |
exists(SimilarBlock d | d.sourceFile() = f |
line in [d.sourceStartLine() .. d.sourceEndLine()] and
not allowlistedLineForDuplication(f, line)
)
)
where none()
select f, n order by n desc

View File

@@ -5,7 +5,6 @@
* @treemap.warnOn lowValues
* @metricType file
* @metricAggregate avg sum max
* @precision medium
* @id py/tests-in-files
*/

View File

@@ -3,6 +3,8 @@
* @description Binding a socket to all interfaces opens it up to traffic from any IPv4 address
* and is therefore associated with security risks.
* @kind problem
* @id py/old/bind-socket-all-network-interfaces
* @problem.severity error
*/
import python

View File

@@ -2,6 +2,8 @@
* @name OLD QUERY: Uncontrolled data used in path expression
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
* @kind path-problem
* @problem.severity error
* @id py/old/path-injection
*/
import python

View File

@@ -3,6 +3,8 @@
* @description Using externally controlled strings in a command line may allow a malicious
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @id py/old/command-line-injection
*/
import python

View File

@@ -3,6 +3,8 @@
* @description Writing user input directly to a web page
* allows for a cross-site scripting vulnerability.
* @kind path-problem
* @problem.severity error
* @id py/old/reflective-xss
*/
import python

View File

@@ -3,6 +3,8 @@
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @id py/old/sql-injection
*/
import python

View File

@@ -1,8 +1,10 @@
/**
* @name Code injection
* @description Interpreting unsanitized user input as code allows a malicious user arbitrary
* @description OLD QUERY: Interpreting unsanitized user input as code allows a malicious user arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
* @id py/old/code-injection
*/
import python

View File

@@ -1,12 +1,9 @@
/**
* @name Use of weak cryptographic key
* @name OLD QUERY: Use of weak cryptographic key
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
* @kind problem
* @problem.severity error
* @precision high
* @id py/weak-crypto-key
* @tags security
* external/cwe/cwe-326
* @id py/old/weak-crypto-key
*/
import python

View File

@@ -2,6 +2,8 @@
* @name OLD QUERY: Deserializing untrusted input
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
* @kind path-problem
* @id py/old/unsafe-deserialization
* @problem.severity error
*/
import python

View File

@@ -3,6 +3,8 @@
* @description URL redirection based on unvalidated user input
* may cause redirection to malicious web sites.
* @kind path-problem
* @problem.severity error
* @id py/old/url-redirection
*/
import python

View File

@@ -1,273 +0,0 @@
/** Provides classes for detecting duplicate or similar code. */
import python
/** Gets the relative path of `file`, with backslashes replaced by forward slashes. */
private string relativePath(File file) { result = file.getRelativePath().replaceAll("\\", "/") }
/**
* Holds if the `index`-th token of block `copy` is in file `file`, spanning
* column `sc` of line `sl` to column `ec` of line `el`.
*
* For more information, see [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
pragma[noinline, nomagic]
private predicate tokenLocation(File file, int sl, int sc, int ec, int el, Copy copy, int index) {
file = copy.sourceFile() and
tokens(copy, index, sl, sc, ec, el)
}
/** A token block used for detection of duplicate and similar code. */
class Copy extends @duplication_or_similarity {
private int lastToken() { result = max(int i | tokens(this, i, _, _, _, _) | i) }
/** Gets the index of the token in this block starting at the location `loc`, if any. */
int tokenStartingAt(Location loc) {
tokenLocation(loc.getFile(), loc.getStartLine(), loc.getStartColumn(), _, _, this, result)
}
/** Gets the index of the token in this block ending at the location `loc`, if any. */
int tokenEndingAt(Location loc) {
tokenLocation(loc.getFile(), _, _, loc.getEndLine(), loc.getEndColumn(), this, result)
}
/** Gets the line on which the first token in this block starts. */
int sourceStartLine() { tokens(this, 0, result, _, _, _) }
/** Gets the column on which the first token in this block starts. */
int sourceStartColumn() { tokens(this, 0, _, result, _, _) }
/** Gets the line on which the last token in this block ends. */
int sourceEndLine() { tokens(this, this.lastToken(), _, _, result, _) }
/** Gets the column on which the last token in this block ends. */
int sourceEndColumn() { tokens(this, this.lastToken(), _, _, _, result) }
/** Gets the number of lines containing at least (part of) one token in this block. */
int sourceLines() { result = this.sourceEndLine() + 1 - this.sourceStartLine() }
/** Gets an opaque identifier for the equivalence class of this block. */
int getEquivalenceClass() { duplicateCode(this, _, result) or similarCode(this, _, result) }
/** Gets the source file in which this block appears. */
File sourceFile() {
exists(string name | duplicateCode(this, name, _) or similarCode(this, name, _) |
name.replaceAll("\\", "/") = relativePath(result)
)
}
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
sourceFile().getAbsolutePath() = filepath and
startline = sourceStartLine() and
startcolumn = sourceStartColumn() and
endline = sourceEndLine() and
endcolumn = sourceEndColumn()
}
/** Gets a textual representation of this element. */
string toString() { result = "Copy" }
/**
* Gets a block that extends this one, that is, its first token is also
* covered by this block, but they are not the same block.
*/
Copy extendingBlock() {
exists(File file, int sl, int sc, int ec, int el |
tokenLocation(file, sl, sc, ec, el, this, _) and
tokenLocation(file, sl, sc, ec, el, result, 0)
) and
this != result
}
}
/**
* Holds if there is a sequence of `SimilarBlock`s `start1, ..., end1` and another sequence
* `start2, ..., end2` such that each block extends the previous one and corresponding blocks
* have the same equivalence class, with `start` being the equivalence class of `start1` and
* `start2`, and `end` the equivalence class of `end1` and `end2`.
*/
predicate similar_extension(
SimilarBlock start1, SimilarBlock start2, SimilarBlock ext1, SimilarBlock ext2, int start, int ext
) {
start1.getEquivalenceClass() = start and
start2.getEquivalenceClass() = start and
ext1.getEquivalenceClass() = ext and
ext2.getEquivalenceClass() = ext and
start1 != start2 and
(
ext1 = start1 and ext2 = start2
or
similar_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext)
)
}
/**
* Holds if there is a sequence of `DuplicateBlock`s `start1, ..., end1` and another sequence
* `start2, ..., end2` such that each block extends the previous one and corresponding blocks
* have the same equivalence class, with `start` being the equivalence class of `start1` and
* `start2`, and `end` the equivalence class of `end1` and `end2`.
*/
predicate duplicate_extension(
DuplicateBlock start1, DuplicateBlock start2, DuplicateBlock ext1, DuplicateBlock ext2, int start,
int ext
) {
start1.getEquivalenceClass() = start and
start2.getEquivalenceClass() = start and
ext1.getEquivalenceClass() = ext and
ext2.getEquivalenceClass() = ext and
start1 != start2 and
(
ext1 = start1 and ext2 = start2
or
duplicate_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext)
)
}
/** A block of duplicated code. */
class DuplicateBlock extends Copy, @duplication {
override string toString() { result = "Duplicate code: " + sourceLines() + " duplicated lines." }
}
/** A block of similar code. */
class SimilarBlock extends Copy, @similarity {
override string toString() {
result = "Similar code: " + sourceLines() + " almost duplicated lines."
}
}
/**
* Holds if `stmt1` and `stmt2` are duplicate statements in function or toplevel `sc1` and `sc2`,
* respectively, where `scope1` and `scope2` are not the same.
*/
predicate duplicateStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) {
exists(int equivstart, int equivend, int first, int last |
scope1.contains(stmt1) and
scope2.contains(stmt2) and
duplicateCoversStatement(equivstart, equivend, first, last, stmt1) and
duplicateCoversStatement(equivstart, equivend, first, last, stmt2) and
stmt1 != stmt2 and
scope1 != scope2
)
}
/**
* Holds if statement `stmt` is covered by a sequence of `DuplicateBlock`s, where `first`
* is the index of the token in the first block that starts at the beginning of `stmt`,
* while `last` is the index of the token in the last block that ends at the end of `stmt`,
* and `equivstart` and `equivend` are the equivalence classes of the first and the last
* block, respectively.
*/
private predicate duplicateCoversStatement(
int equivstart, int equivend, int first, int last, Stmt stmt
) {
exists(DuplicateBlock b1, DuplicateBlock b2, Location startloc, Location endloc |
stmt.getLocation() = startloc and
stmt.getLastStatement().getLocation() = endloc and
first = b1.tokenStartingAt(startloc) and
last = b2.tokenEndingAt(endloc) and
b1.getEquivalenceClass() = equivstart and
b2.getEquivalenceClass() = equivend and
duplicate_extension(b1, _, b2, _, equivstart, equivend)
)
}
/**
* Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or
* toplevel that has `duplicate` lines in common with `scope1`.
*/
predicate duplicateStatements(Scope scope1, Scope scope2, int duplicate, int total) {
duplicate = strictcount(Stmt stmt | duplicateStatement(scope1, scope2, stmt, _)) and
total = strictcount(Stmt stmt | scope1.contains(stmt))
}
/**
* Find pairs of scopes that are identical or almost identical
*/
predicate duplicateScopes(Scope s, Scope other, float percent, string message) {
exists(int total, int duplicate | duplicateStatements(s, other, duplicate, total) |
percent = 100.0 * duplicate / total and
percent >= 80.0 and
if duplicate = total
then message = "All " + total + " statements in " + s.getName() + " are identical in $@."
else
message =
duplicate + " out of " + total + " statements in " + s.getName() + " are duplicated in $@."
)
}
/**
* Holds if `stmt1` and `stmt2` are similar statements in function or toplevel `scope1` and `scope2`,
* respectively, where `scope1` and `scope2` are not the same.
*/
private predicate similarStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) {
exists(int start, int end, int first, int last |
scope1.contains(stmt1) and
scope2.contains(stmt2) and
similarCoversStatement(start, end, first, last, stmt1) and
similarCoversStatement(start, end, first, last, stmt2) and
stmt1 != stmt2 and
scope1 != scope2
)
}
/**
* Holds if statement `stmt` is covered by a sequence of `SimilarBlock`s, where `first`
* is the index of the token in the first block that starts at the beginning of `stmt`,
* while `last` is the index of the token in the last block that ends at the end of `stmt`,
* and `equivstart` and `equivend` are the equivalence classes of the first and the last
* block, respectively.
*/
private predicate similarCoversStatement(
int equivstart, int equivend, int first, int last, Stmt stmt
) {
exists(SimilarBlock b1, SimilarBlock b2, Location startloc, Location endloc |
stmt.getLocation() = startloc and
stmt.getLastStatement().getLocation() = endloc and
first = b1.tokenStartingAt(startloc) and
last = b2.tokenEndingAt(endloc) and
b1.getEquivalenceClass() = equivstart and
b2.getEquivalenceClass() = equivend and
similar_extension(b1, _, b2, _, equivstart, equivend)
)
}
/**
* Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or
* toplevel that has `similar` similar lines to `scope1`.
*/
private predicate similarStatements(Scope scope1, Scope scope2, int similar, int total) {
similar = strictcount(Stmt stmt | similarStatement(scope1, scope2, stmt, _)) and
total = strictcount(Stmt stmt | scope1.contains(stmt))
}
/**
* Find pairs of scopes that are similar
*/
predicate similarScopes(Scope s, Scope other, float percent, string message) {
exists(int total, int similar | similarStatements(s, other, similar, total) |
percent = 100.0 * similar / total and
percent >= 80.0 and
if similar = total
then message = "All statements in " + s.getName() + " are similar in $@."
else
message =
similar + " out of " + total + " statements in " + s.getName() + " are similar in $@."
)
}
/**
* Holds if the line is acceptable as a duplicate.
* This is true for blocks of import statements.
*/
predicate allowlistedLineForDuplication(File f, int line) {
exists(ImportingStmt i | i.getLocation().getFile() = f and i.getLocation().getStartLine() = line)
}

View File

@@ -16,19 +16,7 @@
*/
import python
import CodeDuplication
predicate sorted_by_location(DuplicateBlock x, DuplicateBlock y) {
if x.sourceFile() = y.sourceFile()
then x.sourceStartLine() < y.sourceStartLine()
else x.sourceFile().getAbsolutePath() < y.sourceFile().getAbsolutePath()
}
from DuplicateBlock d, DuplicateBlock other
where
d.sourceLines() > 10 and
other.getEquivalenceClass() = d.getEquivalenceClass() and
sorted_by_location(other, d)
select d,
"Duplicate code: " + d.sourceLines() + " lines are duplicated at " +
other.sourceFile().getShortName() + ":" + other.sourceStartLine().toString()
from BasicBlock d
where none()
select d, "Duplicate code: " + "-1" + " lines are duplicated at " + "<file>" + ":" + "-1"

View File

@@ -16,15 +16,7 @@
*/
import python
import CodeDuplication
predicate relevant(Function m) { m.getMetrics().getNumberOfLinesOfCode() > 5 }
from Function m, Function other, string message, int percent
where
duplicateScopes(m, other, percent, message) and
relevant(m) and
percent > 95.0 and
not duplicateScopes(m.getEnclosingModule(), other.getEnclosingModule(), _, _) and
not duplicateScopes(m.getScope(), other.getScope(), _, _)
from Function m, Function other, string message
where none()
select m, message, other, other.getName()

View File

@@ -16,11 +16,7 @@
*/
import python
import CodeDuplication
from Class c, Class other, string message
where
duplicateScopes(c, other, _, message) and
count(c.getAStmt()) > 3 and
not duplicateScopes(c.getEnclosingModule(), _, _, _)
where none()
select c, message, other, other.getName()

View File

@@ -16,8 +16,7 @@
*/
import python
import CodeDuplication
from Module m, Module other, int percent, string message
where duplicateScopes(m, other, percent, message)
from Module m, Module other, string message
where none()
select m, message, other, other.getName()

View File

@@ -16,8 +16,7 @@
*/
import python
import CodeDuplication
from Module m, Module other, string message
where similarScopes(m, other, _, message)
where none()
select m, message, other, other.getName()

View File

@@ -16,16 +16,7 @@
*/
import python
import CodeDuplication
predicate relevant(Function m) { m.getMetrics().getNumberOfLinesOfCode() > 10 }
from Function m, Function other, string message, int percent
where
similarScopes(m, other, percent, message) and
relevant(m) and
percent > 95.0 and
not duplicateScopes(m, other, _, _) and
not duplicateScopes(m.getEnclosingModule(), other.getEnclosingModule(), _, _) and
not duplicateScopes(m.getScope(), other.getScope(), _, _)
from Function m, Function other, string message
where none()
select m, message, other, other.getName()