mirror of
https://github.com/github/codeql.git
synced 2026-04-29 10:45:15 +02:00
Merge branch 'main' of github.com:github/codeql into python-api-enhancements
This commit is contained in:
@@ -1,14 +0,0 @@
|
||||
/**
|
||||
* @name Filter: non-generated files
|
||||
* @description Only keep results that aren't (or don't appear to be) generated.
|
||||
* @kind problem
|
||||
* @id py/not-generated-file-filter
|
||||
*/
|
||||
|
||||
import python
|
||||
import external.DefectFilter
|
||||
import semmle.python.filters.GeneratedCode
|
||||
|
||||
from DefectResult res
|
||||
where not exists(GeneratedFile f | res.getFile() = f)
|
||||
select res, res.getMessage()
|
||||
@@ -1,14 +0,0 @@
|
||||
/**
|
||||
* @name Filter: non-test files
|
||||
* @description Only keep results that aren't in tests
|
||||
* @kind problem
|
||||
* @id py/not-test-file-filter
|
||||
*/
|
||||
|
||||
import python
|
||||
import external.DefectFilter
|
||||
import semmle.python.filters.Tests
|
||||
|
||||
from DefectResult res
|
||||
where not exists(TestScope s | contains(s.getLocation(), res))
|
||||
select res, res.getMessage()
|
||||
@@ -4,7 +4,6 @@
|
||||
* @kind treemap
|
||||
* @treemap.warnOn highValues
|
||||
* @metricType file
|
||||
* @precision high
|
||||
* @tags maintainability
|
||||
* @id py/lines-of-commented-out-code-in-files
|
||||
*/
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
* @kind treemap
|
||||
* @treemap.warnOn highValues
|
||||
* @metricType externalDependency
|
||||
* @precision medium
|
||||
* @id py/external-dependencies
|
||||
*/
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
* @treemap.warnOn highValues
|
||||
* @metricType file
|
||||
* @metricAggregate avg sum max
|
||||
* @precision very-high
|
||||
* @tags maintainability
|
||||
* @id py/lines-of-code-in-files
|
||||
*/
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
* @treemap.warnOn lowValues
|
||||
* @metricType file
|
||||
* @metricAggregate avg sum max
|
||||
* @precision very-high
|
||||
* @id py/lines-of-comments-in-files
|
||||
*/
|
||||
|
||||
|
||||
@@ -7,21 +7,12 @@
|
||||
* @treemap.warnOn highValues
|
||||
* @metricType file
|
||||
* @metricAggregate avg sum max
|
||||
* @precision high
|
||||
* @tags testability
|
||||
* @id py/duplicated-lines-in-files
|
||||
*/
|
||||
|
||||
import python
|
||||
import external.CodeDuplication
|
||||
|
||||
from File f, int n
|
||||
where
|
||||
n =
|
||||
count(int line |
|
||||
exists(DuplicateBlock d | d.sourceFile() = f |
|
||||
line in [d.sourceStartLine() .. d.sourceEndLine()] and
|
||||
not allowlistedLineForDuplication(f, line)
|
||||
)
|
||||
)
|
||||
where none()
|
||||
select f, n order by n desc
|
||||
|
||||
@@ -7,21 +7,12 @@
|
||||
* @treemap.warnOn highValues
|
||||
* @metricType file
|
||||
* @metricAggregate avg sum max
|
||||
* @precision high
|
||||
* @tags testability
|
||||
* @id py/similar-lines-in-files
|
||||
*/
|
||||
|
||||
import python
|
||||
import external.CodeDuplication
|
||||
|
||||
from File f, int n
|
||||
where
|
||||
n =
|
||||
count(int line |
|
||||
exists(SimilarBlock d | d.sourceFile() = f |
|
||||
line in [d.sourceStartLine() .. d.sourceEndLine()] and
|
||||
not allowlistedLineForDuplication(f, line)
|
||||
)
|
||||
)
|
||||
where none()
|
||||
select f, n order by n desc
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
* @treemap.warnOn lowValues
|
||||
* @metricType file
|
||||
* @metricAggregate avg sum max
|
||||
* @precision medium
|
||||
* @id py/tests-in-files
|
||||
*/
|
||||
|
||||
|
||||
24
python/ql/src/Security/CWE-326/WeakCryptoKey.ql
Normal file
24
python/ql/src/Security/CWE-326/WeakCryptoKey.ql
Normal file
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* @name Use of weak cryptographic key
|
||||
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @precision high
|
||||
* @id py/weak-crypto-key
|
||||
* @tags security
|
||||
* external/cwe/cwe-326
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.filters.Tests
|
||||
|
||||
from Cryptography::PublicKey::KeyGeneration keyGen, int keySize, DataFlow::Node origin
|
||||
where
|
||||
keySize = keyGen.getKeySizeWithOrigin(origin) and
|
||||
keySize < keyGen.minimumSecureKeySize() and
|
||||
not origin.getScope().getScope*() instanceof TestScope
|
||||
select keyGen,
|
||||
"Creation of an " + keyGen.getName() + " key uses $@ bits, which is below " +
|
||||
keyGen.minimumSecureKeySize() + " and considered breakable.", origin, keySize.toString()
|
||||
@@ -3,6 +3,8 @@
|
||||
* @description Binding a socket to all interfaces opens it up to traffic from any IPv4 address
|
||||
* and is therefore associated with security risks.
|
||||
* @kind problem
|
||||
* @id py/old/bind-socket-all-network-interfaces
|
||||
* @problem.severity error
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
* @name OLD QUERY: Uncontrolled data used in path expression
|
||||
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/old/path-injection
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
* @description Using externally controlled strings in a command line may allow a malicious
|
||||
* user to change the meaning of the command.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/old/command-line-injection
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
* @description Writing user input directly to a web page
|
||||
* allows for a cross-site scripting vulnerability.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/old/reflective-xss
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious SQL code by the user.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/old/sql-injection
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
/**
|
||||
* @name Code injection
|
||||
* @description Interpreting unsanitized user input as code allows a malicious user arbitrary
|
||||
* @description OLD QUERY: Interpreting unsanitized user input as code allows a malicious user arbitrary
|
||||
* code execution.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/old/code-injection
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
/**
|
||||
* @name Use of weak cryptographic key
|
||||
* @name OLD QUERY: Use of weak cryptographic key
|
||||
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @precision high
|
||||
* @id py/weak-crypto-key
|
||||
* @tags security
|
||||
* external/cwe/cwe-326
|
||||
* @id py/old/weak-crypto-key
|
||||
*/
|
||||
|
||||
import python
|
||||
@@ -2,6 +2,8 @@
|
||||
* @name OLD QUERY: Deserializing untrusted input
|
||||
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
|
||||
* @kind path-problem
|
||||
* @id py/old/unsafe-deserialization
|
||||
* @problem.severity error
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
* @description URL redirection based on unvalidated user input
|
||||
* may cause redirection to malicious web sites.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/old/url-redirection
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
15
python/ql/src/experimental/semmle/python/Concepts.qll
Normal file
15
python/ql/src/experimental/semmle/python/Concepts.qll
Normal file
@@ -0,0 +1,15 @@
|
||||
/**
|
||||
* This version resides in the experimental area and provides a space for
|
||||
* external contributors to place new concepts, keeping to our preferred
|
||||
* structure while remaining in the experimental area.
|
||||
*
|
||||
* Provides abstract classes representing generic concepts such as file system
|
||||
* access or system command execution, for which individual framework libraries
|
||||
* provide concrete subclasses.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import experimental.semmle.python.Frameworks
|
||||
5
python/ql/src/experimental/semmle/python/Frameworks.qll
Normal file
5
python/ql/src/experimental/semmle/python/Frameworks.qll
Normal file
@@ -0,0 +1,5 @@
|
||||
/**
|
||||
* Helper file that imports all framework modeling.
|
||||
*/
|
||||
|
||||
private import experimental.semmle.python.frameworks.Stdlib
|
||||
@@ -0,0 +1,11 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the standard libraries.
|
||||
* Note: some modeling is done internally in the dataflow/taint tracking implementation.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
273
python/ql/src/external/CodeDuplication.qll
vendored
273
python/ql/src/external/CodeDuplication.qll
vendored
@@ -1,273 +0,0 @@
|
||||
/** Provides classes for detecting duplicate or similar code. */
|
||||
|
||||
import python
|
||||
|
||||
/** Gets the relative path of `file`, with backslashes replaced by forward slashes. */
|
||||
private string relativePath(File file) { result = file.getRelativePath().replaceAll("\\", "/") }
|
||||
|
||||
/**
|
||||
* Holds if the `index`-th token of block `copy` is in file `file`, spanning
|
||||
* column `sc` of line `sl` to column `ec` of line `el`.
|
||||
*
|
||||
* For more information, see [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
*/
|
||||
pragma[noinline, nomagic]
|
||||
private predicate tokenLocation(File file, int sl, int sc, int ec, int el, Copy copy, int index) {
|
||||
file = copy.sourceFile() and
|
||||
tokens(copy, index, sl, sc, ec, el)
|
||||
}
|
||||
|
||||
/** A token block used for detection of duplicate and similar code. */
|
||||
class Copy extends @duplication_or_similarity {
|
||||
private int lastToken() { result = max(int i | tokens(this, i, _, _, _, _) | i) }
|
||||
|
||||
/** Gets the index of the token in this block starting at the location `loc`, if any. */
|
||||
int tokenStartingAt(Location loc) {
|
||||
tokenLocation(loc.getFile(), loc.getStartLine(), loc.getStartColumn(), _, _, this, result)
|
||||
}
|
||||
|
||||
/** Gets the index of the token in this block ending at the location `loc`, if any. */
|
||||
int tokenEndingAt(Location loc) {
|
||||
tokenLocation(loc.getFile(), _, _, loc.getEndLine(), loc.getEndColumn(), this, result)
|
||||
}
|
||||
|
||||
/** Gets the line on which the first token in this block starts. */
|
||||
int sourceStartLine() { tokens(this, 0, result, _, _, _) }
|
||||
|
||||
/** Gets the column on which the first token in this block starts. */
|
||||
int sourceStartColumn() { tokens(this, 0, _, result, _, _) }
|
||||
|
||||
/** Gets the line on which the last token in this block ends. */
|
||||
int sourceEndLine() { tokens(this, this.lastToken(), _, _, result, _) }
|
||||
|
||||
/** Gets the column on which the last token in this block ends. */
|
||||
int sourceEndColumn() { tokens(this, this.lastToken(), _, _, _, result) }
|
||||
|
||||
/** Gets the number of lines containing at least (part of) one token in this block. */
|
||||
int sourceLines() { result = this.sourceEndLine() + 1 - this.sourceStartLine() }
|
||||
|
||||
/** Gets an opaque identifier for the equivalence class of this block. */
|
||||
int getEquivalenceClass() { duplicateCode(this, _, result) or similarCode(this, _, result) }
|
||||
|
||||
/** Gets the source file in which this block appears. */
|
||||
File sourceFile() {
|
||||
exists(string name | duplicateCode(this, name, _) or similarCode(this, name, _) |
|
||||
name.replaceAll("\\", "/") = relativePath(result)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
sourceFile().getAbsolutePath() = filepath and
|
||||
startline = sourceStartLine() and
|
||||
startcolumn = sourceStartColumn() and
|
||||
endline = sourceEndLine() and
|
||||
endcolumn = sourceEndColumn()
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "Copy" }
|
||||
|
||||
/**
|
||||
* Gets a block that extends this one, that is, its first token is also
|
||||
* covered by this block, but they are not the same block.
|
||||
*/
|
||||
Copy extendingBlock() {
|
||||
exists(File file, int sl, int sc, int ec, int el |
|
||||
tokenLocation(file, sl, sc, ec, el, this, _) and
|
||||
tokenLocation(file, sl, sc, ec, el, result, 0)
|
||||
) and
|
||||
this != result
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is a sequence of `SimilarBlock`s `start1, ..., end1` and another sequence
|
||||
* `start2, ..., end2` such that each block extends the previous one and corresponding blocks
|
||||
* have the same equivalence class, with `start` being the equivalence class of `start1` and
|
||||
* `start2`, and `end` the equivalence class of `end1` and `end2`.
|
||||
*/
|
||||
predicate similar_extension(
|
||||
SimilarBlock start1, SimilarBlock start2, SimilarBlock ext1, SimilarBlock ext2, int start, int ext
|
||||
) {
|
||||
start1.getEquivalenceClass() = start and
|
||||
start2.getEquivalenceClass() = start and
|
||||
ext1.getEquivalenceClass() = ext and
|
||||
ext2.getEquivalenceClass() = ext and
|
||||
start1 != start2 and
|
||||
(
|
||||
ext1 = start1 and ext2 = start2
|
||||
or
|
||||
similar_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is a sequence of `DuplicateBlock`s `start1, ..., end1` and another sequence
|
||||
* `start2, ..., end2` such that each block extends the previous one and corresponding blocks
|
||||
* have the same equivalence class, with `start` being the equivalence class of `start1` and
|
||||
* `start2`, and `end` the equivalence class of `end1` and `end2`.
|
||||
*/
|
||||
predicate duplicate_extension(
|
||||
DuplicateBlock start1, DuplicateBlock start2, DuplicateBlock ext1, DuplicateBlock ext2, int start,
|
||||
int ext
|
||||
) {
|
||||
start1.getEquivalenceClass() = start and
|
||||
start2.getEquivalenceClass() = start and
|
||||
ext1.getEquivalenceClass() = ext and
|
||||
ext2.getEquivalenceClass() = ext and
|
||||
start1 != start2 and
|
||||
(
|
||||
ext1 = start1 and ext2 = start2
|
||||
or
|
||||
duplicate_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext)
|
||||
)
|
||||
}
|
||||
|
||||
/** A block of duplicated code. */
|
||||
class DuplicateBlock extends Copy, @duplication {
|
||||
override string toString() { result = "Duplicate code: " + sourceLines() + " duplicated lines." }
|
||||
}
|
||||
|
||||
/** A block of similar code. */
|
||||
class SimilarBlock extends Copy, @similarity {
|
||||
override string toString() {
|
||||
result = "Similar code: " + sourceLines() + " almost duplicated lines."
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `stmt1` and `stmt2` are duplicate statements in function or toplevel `sc1` and `sc2`,
|
||||
* respectively, where `scope1` and `scope2` are not the same.
|
||||
*/
|
||||
predicate duplicateStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) {
|
||||
exists(int equivstart, int equivend, int first, int last |
|
||||
scope1.contains(stmt1) and
|
||||
scope2.contains(stmt2) and
|
||||
duplicateCoversStatement(equivstart, equivend, first, last, stmt1) and
|
||||
duplicateCoversStatement(equivstart, equivend, first, last, stmt2) and
|
||||
stmt1 != stmt2 and
|
||||
scope1 != scope2
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if statement `stmt` is covered by a sequence of `DuplicateBlock`s, where `first`
|
||||
* is the index of the token in the first block that starts at the beginning of `stmt`,
|
||||
* while `last` is the index of the token in the last block that ends at the end of `stmt`,
|
||||
* and `equivstart` and `equivend` are the equivalence classes of the first and the last
|
||||
* block, respectively.
|
||||
*/
|
||||
private predicate duplicateCoversStatement(
|
||||
int equivstart, int equivend, int first, int last, Stmt stmt
|
||||
) {
|
||||
exists(DuplicateBlock b1, DuplicateBlock b2, Location startloc, Location endloc |
|
||||
stmt.getLocation() = startloc and
|
||||
stmt.getLastStatement().getLocation() = endloc and
|
||||
first = b1.tokenStartingAt(startloc) and
|
||||
last = b2.tokenEndingAt(endloc) and
|
||||
b1.getEquivalenceClass() = equivstart and
|
||||
b2.getEquivalenceClass() = equivend and
|
||||
duplicate_extension(b1, _, b2, _, equivstart, equivend)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or
|
||||
* toplevel that has `duplicate` lines in common with `scope1`.
|
||||
*/
|
||||
predicate duplicateStatements(Scope scope1, Scope scope2, int duplicate, int total) {
|
||||
duplicate = strictcount(Stmt stmt | duplicateStatement(scope1, scope2, stmt, _)) and
|
||||
total = strictcount(Stmt stmt | scope1.contains(stmt))
|
||||
}
|
||||
|
||||
/**
|
||||
* Find pairs of scopes that are identical or almost identical
|
||||
*/
|
||||
predicate duplicateScopes(Scope s, Scope other, float percent, string message) {
|
||||
exists(int total, int duplicate | duplicateStatements(s, other, duplicate, total) |
|
||||
percent = 100.0 * duplicate / total and
|
||||
percent >= 80.0 and
|
||||
if duplicate = total
|
||||
then message = "All " + total + " statements in " + s.getName() + " are identical in $@."
|
||||
else
|
||||
message =
|
||||
duplicate + " out of " + total + " statements in " + s.getName() + " are duplicated in $@."
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `stmt1` and `stmt2` are similar statements in function or toplevel `scope1` and `scope2`,
|
||||
* respectively, where `scope1` and `scope2` are not the same.
|
||||
*/
|
||||
private predicate similarStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) {
|
||||
exists(int start, int end, int first, int last |
|
||||
scope1.contains(stmt1) and
|
||||
scope2.contains(stmt2) and
|
||||
similarCoversStatement(start, end, first, last, stmt1) and
|
||||
similarCoversStatement(start, end, first, last, stmt2) and
|
||||
stmt1 != stmt2 and
|
||||
scope1 != scope2
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if statement `stmt` is covered by a sequence of `SimilarBlock`s, where `first`
|
||||
* is the index of the token in the first block that starts at the beginning of `stmt`,
|
||||
* while `last` is the index of the token in the last block that ends at the end of `stmt`,
|
||||
* and `equivstart` and `equivend` are the equivalence classes of the first and the last
|
||||
* block, respectively.
|
||||
*/
|
||||
private predicate similarCoversStatement(
|
||||
int equivstart, int equivend, int first, int last, Stmt stmt
|
||||
) {
|
||||
exists(SimilarBlock b1, SimilarBlock b2, Location startloc, Location endloc |
|
||||
stmt.getLocation() = startloc and
|
||||
stmt.getLastStatement().getLocation() = endloc and
|
||||
first = b1.tokenStartingAt(startloc) and
|
||||
last = b2.tokenEndingAt(endloc) and
|
||||
b1.getEquivalenceClass() = equivstart and
|
||||
b2.getEquivalenceClass() = equivend and
|
||||
similar_extension(b1, _, b2, _, equivstart, equivend)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or
|
||||
* toplevel that has `similar` similar lines to `scope1`.
|
||||
*/
|
||||
private predicate similarStatements(Scope scope1, Scope scope2, int similar, int total) {
|
||||
similar = strictcount(Stmt stmt | similarStatement(scope1, scope2, stmt, _)) and
|
||||
total = strictcount(Stmt stmt | scope1.contains(stmt))
|
||||
}
|
||||
|
||||
/**
|
||||
* Find pairs of scopes that are similar
|
||||
*/
|
||||
predicate similarScopes(Scope s, Scope other, float percent, string message) {
|
||||
exists(int total, int similar | similarStatements(s, other, similar, total) |
|
||||
percent = 100.0 * similar / total and
|
||||
percent >= 80.0 and
|
||||
if similar = total
|
||||
then message = "All statements in " + s.getName() + " are similar in $@."
|
||||
else
|
||||
message =
|
||||
similar + " out of " + total + " statements in " + s.getName() + " are similar in $@."
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the line is acceptable as a duplicate.
|
||||
* This is true for blocks of import statements.
|
||||
*/
|
||||
predicate allowlistedLineForDuplication(File f, int line) {
|
||||
exists(ImportingStmt i | i.getLocation().getFile() = f and i.getLocation().getStartLine() = line)
|
||||
}
|
||||
18
python/ql/src/external/DuplicateBlock.ql
vendored
18
python/ql/src/external/DuplicateBlock.ql
vendored
@@ -16,19 +16,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import CodeDuplication
|
||||
|
||||
predicate sorted_by_location(DuplicateBlock x, DuplicateBlock y) {
|
||||
if x.sourceFile() = y.sourceFile()
|
||||
then x.sourceStartLine() < y.sourceStartLine()
|
||||
else x.sourceFile().getAbsolutePath() < y.sourceFile().getAbsolutePath()
|
||||
}
|
||||
|
||||
from DuplicateBlock d, DuplicateBlock other
|
||||
where
|
||||
d.sourceLines() > 10 and
|
||||
other.getEquivalenceClass() = d.getEquivalenceClass() and
|
||||
sorted_by_location(other, d)
|
||||
select d,
|
||||
"Duplicate code: " + d.sourceLines() + " lines are duplicated at " +
|
||||
other.sourceFile().getShortName() + ":" + other.sourceStartLine().toString()
|
||||
from BasicBlock d
|
||||
where none()
|
||||
select d, "Duplicate code: " + "-1" + " lines are duplicated at " + "<file>" + ":" + "-1"
|
||||
|
||||
12
python/ql/src/external/DuplicateFunction.ql
vendored
12
python/ql/src/external/DuplicateFunction.ql
vendored
@@ -16,15 +16,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import CodeDuplication
|
||||
|
||||
predicate relevant(Function m) { m.getMetrics().getNumberOfLinesOfCode() > 5 }
|
||||
|
||||
from Function m, Function other, string message, int percent
|
||||
where
|
||||
duplicateScopes(m, other, percent, message) and
|
||||
relevant(m) and
|
||||
percent > 95.0 and
|
||||
not duplicateScopes(m.getEnclosingModule(), other.getEnclosingModule(), _, _) and
|
||||
not duplicateScopes(m.getScope(), other.getScope(), _, _)
|
||||
from Function m, Function other, string message
|
||||
where none()
|
||||
select m, message, other, other.getName()
|
||||
|
||||
@@ -16,11 +16,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import CodeDuplication
|
||||
|
||||
from Class c, Class other, string message
|
||||
where
|
||||
duplicateScopes(c, other, _, message) and
|
||||
count(c.getAStmt()) > 3 and
|
||||
not duplicateScopes(c.getEnclosingModule(), _, _, _)
|
||||
where none()
|
||||
select c, message, other, other.getName()
|
||||
|
||||
@@ -16,8 +16,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import CodeDuplication
|
||||
|
||||
from Module m, Module other, int percent, string message
|
||||
where duplicateScopes(m, other, percent, message)
|
||||
from Module m, Module other, string message
|
||||
where none()
|
||||
select m, message, other, other.getName()
|
||||
|
||||
3
python/ql/src/external/MostlySimilarFile.ql
vendored
3
python/ql/src/external/MostlySimilarFile.ql
vendored
@@ -16,8 +16,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import CodeDuplication
|
||||
|
||||
from Module m, Module other, string message
|
||||
where similarScopes(m, other, _, message)
|
||||
where none()
|
||||
select m, message, other, other.getName()
|
||||
|
||||
13
python/ql/src/external/SimilarFunction.ql
vendored
13
python/ql/src/external/SimilarFunction.ql
vendored
@@ -16,16 +16,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import CodeDuplication
|
||||
|
||||
predicate relevant(Function m) { m.getMetrics().getNumberOfLinesOfCode() > 10 }
|
||||
|
||||
from Function m, Function other, string message, int percent
|
||||
where
|
||||
similarScopes(m, other, percent, message) and
|
||||
relevant(m) and
|
||||
percent > 95.0 and
|
||||
not duplicateScopes(m, other, _, _) and
|
||||
not duplicateScopes(m.getEnclosingModule(), other.getEnclosingModule(), _, _) and
|
||||
not duplicateScopes(m.getScope(), other.getScope(), _, _)
|
||||
from Function m, Function other, string message
|
||||
where none()
|
||||
select m, message, other, other.getName()
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
* directed and labeled; they specify how the components represented by nodes relate to each other.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
|
||||
/**
|
||||
@@ -55,7 +55,7 @@ module API {
|
||||
/**
|
||||
* Gets a call to the function represented by this API component.
|
||||
*/
|
||||
DataFlow::Node getACall() { result = getReturn().getAnImmediateUse() }
|
||||
DataFlow::CallCfgNode getACall() { result = getReturn().getAnImmediateUse() }
|
||||
|
||||
/**
|
||||
* Gets a node representing member `m` of this API component.
|
||||
|
||||
@@ -526,3 +526,118 @@ module HTTP {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides models for cryptographic things. */
|
||||
module Cryptography {
|
||||
/** Provides models for public-key cryptography, also called asymmetric cryptography. */
|
||||
module PublicKey {
|
||||
/**
|
||||
* A data-flow node that generates a new key-pair for use with public-key cryptography.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `KeyGeneration::Range` instead.
|
||||
*/
|
||||
class KeyGeneration extends DataFlow::Node {
|
||||
KeyGeneration::Range range;
|
||||
|
||||
KeyGeneration() { this = range }
|
||||
|
||||
/** Gets the name of the cryptographic algorithm (for example `"RSA"` or `"AES"`). */
|
||||
string getName() { result = range.getName() }
|
||||
|
||||
/** Gets the argument that specifies the size of the key in bits, if available. */
|
||||
DataFlow::Node getKeySizeArg() { result = range.getKeySizeArg() }
|
||||
|
||||
/**
|
||||
* Gets the size of the key generated (in bits), as well as the `origin` that
|
||||
* explains how we obtained this specific key size.
|
||||
*/
|
||||
int getKeySizeWithOrigin(DataFlow::Node origin) {
|
||||
result = range.getKeySizeWithOrigin(origin)
|
||||
}
|
||||
|
||||
/** Gets the minimum key size (in bits) for this algorithm to be considered secure. */
|
||||
int minimumSecureKeySize() { result = range.minimumSecureKeySize() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling new key-pair generation APIs. */
|
||||
module KeyGeneration {
|
||||
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
|
||||
private DataFlow::LocalSourceNode keysizeBacktracker(
|
||||
DataFlow::TypeBackTracker t, DataFlow::Node arg
|
||||
) {
|
||||
t.start() and
|
||||
arg = any(KeyGeneration::Range r).getKeySizeArg() and
|
||||
result = arg.getALocalSource()
|
||||
or
|
||||
// Due to bad performance when using normal setup with we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeBackTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
keysizeBacktracker_first_join(t2, arg, result, summary) and
|
||||
t = t2.prepend(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate keysizeBacktracker_first_join(
|
||||
DataFlow::TypeBackTracker t2, DataFlow::Node arg, DataFlow::Node res,
|
||||
DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(res, keysizeBacktracker(t2, arg), summary)
|
||||
}
|
||||
|
||||
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
|
||||
DataFlow::LocalSourceNode keysizeBacktracker(DataFlow::Node arg) {
|
||||
result = keysizeBacktracker(DataFlow::TypeBackTracker::end(), arg)
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that generates a new key-pair for use with public-key cryptography.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `KeyGeneration` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the name of the cryptographic algorithm (for example `"RSA"`). */
|
||||
abstract string getName();
|
||||
|
||||
/** Gets the argument that specifies the size of the key in bits, if available. */
|
||||
abstract DataFlow::Node getKeySizeArg();
|
||||
|
||||
/**
|
||||
* Gets the size of the key generated (in bits), as well as the `origin` that
|
||||
* explains how we obtained this specific key size.
|
||||
*/
|
||||
int getKeySizeWithOrigin(DataFlow::Node origin) {
|
||||
origin = keysizeBacktracker(this.getKeySizeArg()) and
|
||||
result = origin.asExpr().(IntegerLiteral).getValue()
|
||||
}
|
||||
|
||||
/** Gets the minimum key size (in bits) for this algorithm to be considered secure. */
|
||||
abstract int minimumSecureKeySize();
|
||||
}
|
||||
|
||||
/** A data-flow node that generates a new RSA key-pair. */
|
||||
abstract class RsaRange extends Range {
|
||||
final override string getName() { result = "RSA" }
|
||||
|
||||
final override int minimumSecureKeySize() { result = 2048 }
|
||||
}
|
||||
|
||||
/** A data-flow node that generates a new DSA key-pair. */
|
||||
abstract class DsaRange extends Range {
|
||||
final override string getName() { result = "DSA" }
|
||||
|
||||
final override int minimumSecureKeySize() { result = 2048 }
|
||||
}
|
||||
|
||||
/** A data-flow node that generates a new ECC key-pair. */
|
||||
abstract class EccRange extends Range {
|
||||
final override string getName() { result = "ECC" }
|
||||
|
||||
final override int minimumSecureKeySize() { result = 224 }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
* Helper file that imports all framework modeling.
|
||||
*/
|
||||
|
||||
private import semmle.python.frameworks.Cryptodome
|
||||
private import semmle.python.frameworks.Cryptography
|
||||
private import semmle.python.frameworks.Dill
|
||||
private import semmle.python.frameworks.Django
|
||||
private import semmle.python.frameworks.Fabric
|
||||
|
||||
@@ -180,7 +180,7 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeN
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```
|
||||
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
|
||||
* private DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
@@ -341,7 +341,7 @@ private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, Optional
|
||||
* for back-tracking some callback type `myCallback`:
|
||||
*
|
||||
* ```
|
||||
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
|
||||
* private DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
|
||||
* t.start() and
|
||||
* result = (< some API call >).getArgument(< n >).getALocalSource()
|
||||
* or
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import DataFlowUtil
|
||||
import DataFlowPublic
|
||||
private import DataFlowPrivate
|
||||
private import semmle.python.types.Builtins
|
||||
|
||||
/**
|
||||
* A data flow node that reads or writes an attribute of an object.
|
||||
@@ -84,8 +85,6 @@ private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
|
||||
override string getAttributeName() { result = node.getName() }
|
||||
}
|
||||
|
||||
import semmle.python.types.Builtins
|
||||
|
||||
/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */
|
||||
private class BuiltInCallNode extends CallNode {
|
||||
string name;
|
||||
|
||||
@@ -26,15 +26,243 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
|
||||
tupleLimit = 1000
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a simple data-flow analysis for resolving lambda calls. The analysis
|
||||
* currently excludes read-steps, store-steps, and flow-through.
|
||||
*
|
||||
* The analysis uses non-linear recursion: When computing a flow path in or out
|
||||
* of a call, we use the results of the analysis recursively to resolve lamba
|
||||
* calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
|
||||
*/
|
||||
private module LambdaFlow {
|
||||
private predicate viableParamNonLambda(DataFlowCall call, int i, ParameterNode p) {
|
||||
p.isParameterOf(viableCallable(call), i)
|
||||
}
|
||||
|
||||
private predicate viableParamLambda(DataFlowCall call, int i, ParameterNode p) {
|
||||
p.isParameterOf(viableCallableLambda(call, _), i)
|
||||
}
|
||||
|
||||
private predicate viableParamArgNonLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
|
||||
exists(int i |
|
||||
viableParamNonLambda(call, i, p) and
|
||||
arg.argumentOf(call, i)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate viableParamArgLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
|
||||
exists(int i |
|
||||
viableParamLambda(call, i, p) and
|
||||
arg.argumentOf(call, i)
|
||||
)
|
||||
}
|
||||
|
||||
private newtype TReturnPositionSimple =
|
||||
TReturnPositionSimple0(DataFlowCallable c, ReturnKind kind) {
|
||||
exists(ReturnNode ret |
|
||||
c = getNodeEnclosingCallable(ret) and
|
||||
kind = ret.getKind()
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private TReturnPositionSimple getReturnPositionSimple(ReturnNode ret, ReturnKind kind) {
|
||||
result = TReturnPositionSimple0(getNodeEnclosingCallable(ret), kind)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private TReturnPositionSimple viableReturnPosNonLambda(DataFlowCall call, ReturnKind kind) {
|
||||
result = TReturnPositionSimple0(viableCallable(call), kind)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private TReturnPositionSimple viableReturnPosLambda(
|
||||
DataFlowCall call, DataFlowCallOption lastCall, ReturnKind kind
|
||||
) {
|
||||
result = TReturnPositionSimple0(viableCallableLambda(call, lastCall), kind)
|
||||
}
|
||||
|
||||
private predicate viableReturnPosOutNonLambda(
|
||||
DataFlowCall call, TReturnPositionSimple pos, OutNode out
|
||||
) {
|
||||
exists(ReturnKind kind |
|
||||
pos = viableReturnPosNonLambda(call, kind) and
|
||||
out = getAnOutNode(call, kind)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate viableReturnPosOutLambda(
|
||||
DataFlowCall call, DataFlowCallOption lastCall, TReturnPositionSimple pos, OutNode out
|
||||
) {
|
||||
exists(ReturnKind kind |
|
||||
pos = viableReturnPosLambda(call, lastCall, kind) and
|
||||
out = getAnOutNode(call, kind)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if data can flow (inter-procedurally) from `node` (of type `t`) to
|
||||
* the lambda call `lambdaCall`.
|
||||
*
|
||||
* The parameter `toReturn` indicates whether the path from `node` to
|
||||
* `lambdaCall` goes through a return, and `toJump` whether the path goes
|
||||
* through a jump step.
|
||||
*
|
||||
* The call context `lastCall` records the last call on the path from `node`
|
||||
* to `lambdaCall`, if any. That is, `lastCall` is able to target the enclosing
|
||||
* callable of `lambdaCall`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate revLambdaFlow(
|
||||
DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn,
|
||||
boolean toJump, DataFlowCallOption lastCall
|
||||
) {
|
||||
revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and
|
||||
if node instanceof CastNode or node instanceof ArgumentNode or node instanceof ReturnNode
|
||||
then compatibleTypes(t, getNodeType(node))
|
||||
else any()
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
predicate revLambdaFlow0(
|
||||
DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn,
|
||||
boolean toJump, DataFlowCallOption lastCall
|
||||
) {
|
||||
lambdaCall(lambdaCall, kind, node) and
|
||||
t = getNodeType(node) and
|
||||
toReturn = false and
|
||||
toJump = false and
|
||||
lastCall = TDataFlowCallNone()
|
||||
or
|
||||
// local flow
|
||||
exists(Node mid, DataFlowType t0 |
|
||||
revLambdaFlow(lambdaCall, kind, mid, t0, toReturn, toJump, lastCall)
|
||||
|
|
||||
simpleLocalFlowStep(node, mid) and
|
||||
t = t0
|
||||
or
|
||||
exists(boolean preservesValue |
|
||||
additionalLambdaFlowStep(node, mid, preservesValue) and
|
||||
getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid)
|
||||
|
|
||||
preservesValue = false and
|
||||
t = getNodeType(node)
|
||||
or
|
||||
preservesValue = true and
|
||||
t = t0
|
||||
)
|
||||
)
|
||||
or
|
||||
// jump step
|
||||
exists(Node mid, DataFlowType t0 |
|
||||
revLambdaFlow(lambdaCall, kind, mid, t0, _, _, _) and
|
||||
toReturn = false and
|
||||
toJump = true and
|
||||
lastCall = TDataFlowCallNone()
|
||||
|
|
||||
jumpStep(node, mid) and
|
||||
t = t0
|
||||
or
|
||||
exists(boolean preservesValue |
|
||||
additionalLambdaFlowStep(node, mid, preservesValue) and
|
||||
getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid)
|
||||
|
|
||||
preservesValue = false and
|
||||
t = getNodeType(node)
|
||||
or
|
||||
preservesValue = true and
|
||||
t = t0
|
||||
)
|
||||
)
|
||||
or
|
||||
// flow into a callable
|
||||
exists(ParameterNode p, DataFlowCallOption lastCall0, DataFlowCall call |
|
||||
revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and
|
||||
(
|
||||
if lastCall0 = TDataFlowCallNone() and toJump = false
|
||||
then lastCall = TDataFlowCallSome(call)
|
||||
else lastCall = lastCall0
|
||||
) and
|
||||
toReturn = false
|
||||
|
|
||||
viableParamArgNonLambda(call, p, node)
|
||||
or
|
||||
viableParamArgLambda(call, p, node) // non-linear recursion
|
||||
)
|
||||
or
|
||||
// flow out of a callable
|
||||
exists(TReturnPositionSimple pos |
|
||||
revLambdaFlowOut(lambdaCall, kind, pos, t, toJump, lastCall) and
|
||||
getReturnPositionSimple(node, node.(ReturnNode).getKind()) = pos and
|
||||
toReturn = true
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
predicate revLambdaFlowOutLambdaCall(
|
||||
DataFlowCall lambdaCall, LambdaCallKind kind, OutNode out, DataFlowType t, boolean toJump,
|
||||
DataFlowCall call, DataFlowCallOption lastCall
|
||||
) {
|
||||
revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and
|
||||
exists(ReturnKindExt rk |
|
||||
out = rk.getAnOutNode(call) and
|
||||
lambdaCall(call, _, _)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
predicate revLambdaFlowOut(
|
||||
DataFlowCall lambdaCall, LambdaCallKind kind, TReturnPositionSimple pos, DataFlowType t,
|
||||
boolean toJump, DataFlowCallOption lastCall
|
||||
) {
|
||||
exists(DataFlowCall call, OutNode out |
|
||||
revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and
|
||||
viableReturnPosOutNonLambda(call, pos, out)
|
||||
or
|
||||
// non-linear recursion
|
||||
revLambdaFlowOutLambdaCall(lambdaCall, kind, out, t, toJump, call, lastCall) and
|
||||
viableReturnPosOutLambda(call, _, pos, out)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
predicate revLambdaFlowIn(
|
||||
DataFlowCall lambdaCall, LambdaCallKind kind, ParameterNode p, DataFlowType t, boolean toJump,
|
||||
DataFlowCallOption lastCall
|
||||
) {
|
||||
revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall)
|
||||
}
|
||||
}
|
||||
|
||||
private DataFlowCallable viableCallableExt(DataFlowCall call) {
|
||||
result = viableCallable(call)
|
||||
or
|
||||
result = viableCallableLambda(call, _)
|
||||
}
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
/**
|
||||
* Gets a viable target for the lambda call `call`.
|
||||
*
|
||||
* `lastCall` records the call required to reach `call` in order for the result
|
||||
* to be a viable target, if any.
|
||||
*/
|
||||
cached
|
||||
DataFlowCallable viableCallableLambda(DataFlowCall call, DataFlowCallOption lastCall) {
|
||||
exists(Node creation, LambdaCallKind kind |
|
||||
LambdaFlow::revLambdaFlow(call, kind, creation, _, _, _, lastCall) and
|
||||
lambdaCreation(creation, kind, result)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
|
||||
* The instance parameter is considered to have index `-1`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private predicate viableParam(DataFlowCall call, int i, ParameterNode p) {
|
||||
p.isParameterOf(viableCallable(call), i)
|
||||
p.isParameterOf(viableCallableExt(call), i)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -52,7 +280,7 @@ private module Cached {
|
||||
|
||||
pragma[nomagic]
|
||||
private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKindExt kind) {
|
||||
viableCallable(call) = result.getCallable() and
|
||||
viableCallableExt(call) = result.getCallable() and
|
||||
kind = result.getKind()
|
||||
}
|
||||
|
||||
@@ -317,6 +545,35 @@ private module Cached {
|
||||
|
||||
cached
|
||||
private module DispatchWithCallContext {
|
||||
/**
|
||||
* Holds if the set of viable implementations that can be called by `call`
|
||||
* might be improved by knowing the call context.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) {
|
||||
mayBenefitFromCallContext(call, callable)
|
||||
or
|
||||
callable = call.getEnclosingCallable() and
|
||||
exists(viableCallableLambda(call, TDataFlowCallSome(_)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a viable dispatch target of `call` in the context `ctx`. This is
|
||||
* restricted to those `call`s for which a context might make a difference.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private DataFlowCallable viableImplInCallContextExt(DataFlowCall call, DataFlowCall ctx) {
|
||||
result = viableImplInCallContext(call, ctx)
|
||||
or
|
||||
result = viableCallableLambda(call, TDataFlowCallSome(ctx))
|
||||
or
|
||||
exists(DataFlowCallable enclosing |
|
||||
mayBenefitFromCallContextExt(call, enclosing) and
|
||||
enclosing = viableCallableExt(ctx) and
|
||||
result = viableCallableLambda(call, TDataFlowCallNone())
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the call context `ctx` reduces the set of viable run-time
|
||||
* dispatch targets of call `call` in `c`.
|
||||
@@ -324,10 +581,10 @@ private module Cached {
|
||||
cached
|
||||
predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) {
|
||||
exists(int tgts, int ctxtgts |
|
||||
mayBenefitFromCallContext(call, c) and
|
||||
c = viableCallable(ctx) and
|
||||
ctxtgts = count(viableImplInCallContext(call, ctx)) and
|
||||
tgts = strictcount(viableCallable(call)) and
|
||||
mayBenefitFromCallContextExt(call, c) and
|
||||
c = viableCallableExt(ctx) and
|
||||
ctxtgts = count(viableImplInCallContextExt(call, ctx)) and
|
||||
tgts = strictcount(viableCallableExt(call)) and
|
||||
ctxtgts < tgts
|
||||
)
|
||||
}
|
||||
@@ -339,7 +596,7 @@ private module Cached {
|
||||
*/
|
||||
cached
|
||||
DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
|
||||
result = viableImplInCallContext(call, ctx) and
|
||||
result = viableImplInCallContextExt(call, ctx) and
|
||||
reducedViableImplInCallContext(call, _, ctx)
|
||||
}
|
||||
|
||||
@@ -351,10 +608,10 @@ private module Cached {
|
||||
cached
|
||||
predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) {
|
||||
exists(int tgts, int ctxtgts |
|
||||
mayBenefitFromCallContext(call, _) and
|
||||
c = viableCallable(call) and
|
||||
ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContext(call, ctx)) and
|
||||
tgts = strictcount(DataFlowCall ctx | viableCallable(ctx) = call.getEnclosingCallable()) and
|
||||
mayBenefitFromCallContextExt(call, _) and
|
||||
c = viableCallableExt(call) and
|
||||
ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and
|
||||
tgts = strictcount(DataFlowCall ctx | viableCallableExt(ctx) = call.getEnclosingCallable()) and
|
||||
ctxtgts < tgts
|
||||
)
|
||||
}
|
||||
@@ -367,7 +624,7 @@ private module Cached {
|
||||
*/
|
||||
cached
|
||||
DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) {
|
||||
result = viableImplInCallContext(call, ctx) and
|
||||
result = viableImplInCallContextExt(call, ctx) and
|
||||
reducedViableImplInReturn(result, call)
|
||||
}
|
||||
}
|
||||
@@ -481,6 +738,11 @@ private module Cached {
|
||||
TBooleanNone() or
|
||||
TBooleanSome(boolean b) { b = true or b = false }
|
||||
|
||||
cached
|
||||
newtype TDataFlowCallOption =
|
||||
TDataFlowCallNone() or
|
||||
TDataFlowCallSome(DataFlowCall call)
|
||||
|
||||
cached
|
||||
newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) }
|
||||
|
||||
@@ -777,7 +1039,7 @@ ReturnPosition getReturnPosition(ReturnNodeExt ret) {
|
||||
|
||||
bindingset[cc, callable]
|
||||
predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
|
||||
cc instanceof CallContextAny and callable = viableCallable(call)
|
||||
cc instanceof CallContextAny and callable = viableCallableExt(call)
|
||||
or
|
||||
exists(DataFlowCallable c0, DataFlowCall call0 |
|
||||
call0.getEnclosingCallable() = callable and
|
||||
@@ -791,14 +1053,14 @@ DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
|
||||
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
|
||||
if reducedViableImplInCallContext(call, _, ctx)
|
||||
then result = prunedViableImplInCallContext(call, ctx)
|
||||
else result = viableCallable(call)
|
||||
else result = viableCallableExt(call)
|
||||
)
|
||||
or
|
||||
result = viableCallable(call) and cc instanceof CallContextSomeCall
|
||||
result = viableCallableExt(call) and cc instanceof CallContextSomeCall
|
||||
or
|
||||
result = viableCallable(call) and cc instanceof CallContextAny
|
||||
result = viableCallableExt(call) and cc instanceof CallContextAny
|
||||
or
|
||||
result = viableCallable(call) and cc instanceof CallContextReturn
|
||||
result = viableCallableExt(call) and cc instanceof CallContextReturn
|
||||
}
|
||||
|
||||
predicate read = readStep/3;
|
||||
@@ -812,6 +1074,19 @@ class BooleanOption extends TBooleanOption {
|
||||
}
|
||||
}
|
||||
|
||||
/** An optional `DataFlowCall`. */
|
||||
class DataFlowCallOption extends TDataFlowCallOption {
|
||||
string toString() {
|
||||
this = TDataFlowCallNone() and
|
||||
result = "(none)"
|
||||
or
|
||||
exists(DataFlowCall call |
|
||||
this = TDataFlowCallSome(call) and
|
||||
result = call.toString()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Content tagged with the type of a containing object. */
|
||||
class TypedContent extends MkTypedContent {
|
||||
private Content c;
|
||||
|
||||
@@ -1517,10 +1517,13 @@ predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
|
||||
or
|
||||
c instanceof SetElementContent
|
||||
or
|
||||
c instanceof TupleElementContent
|
||||
c = small_tuple()
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
TupleElementContent small_tuple() { result.getIndex() <= 7 }
|
||||
|
||||
/**
|
||||
* Holds if `nodeTo` is a read of an attribute (corresponding to `c`) of the object in `nodeFrom`.
|
||||
*
|
||||
@@ -1605,3 +1608,14 @@ int accessPathLimit() { result = 5 }
|
||||
|
||||
/** Holds if `n` should be hidden from path explanations. */
|
||||
predicate nodeIsHidden(Node n) { none() }
|
||||
|
||||
class LambdaCallKind = Unit;
|
||||
|
||||
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
|
||||
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() }
|
||||
|
||||
/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
|
||||
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }
|
||||
|
||||
/** Extra data-flow steps needed for lamba flow analysis. */
|
||||
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
|
||||
|
||||
@@ -10,14 +10,22 @@ import python
|
||||
import DataFlowPublic
|
||||
private import DataFlowPrivate
|
||||
|
||||
private predicate comes_from_cfgnode(Node node) {
|
||||
exists(CfgNode first, Node second |
|
||||
simpleLocalFlowStep(first, second) and
|
||||
simpleLocalFlowStep*(second, node)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node that is a source of local flow. This includes things like
|
||||
* - Expressions
|
||||
* - Function parameters
|
||||
*/
|
||||
class LocalSourceNode extends Node {
|
||||
cached
|
||||
LocalSourceNode() {
|
||||
not simpleLocalFlowStep+(any(CfgNode n), this) and
|
||||
not comes_from_cfgnode(this) and
|
||||
not this instanceof ModuleVariableNode
|
||||
or
|
||||
this = any(ModuleVariableNode mvn).getARead()
|
||||
@@ -65,15 +73,12 @@ private module Cached {
|
||||
* The slightly backwards parametering ordering is to force correct indexing.
|
||||
*/
|
||||
cached
|
||||
predicate hasLocalSource(Node sink, Node source) {
|
||||
// Declaring `source` to be a `LocalSourceNode` currently causes a redundant check in the
|
||||
// recursive case, so instead we check it explicitly here.
|
||||
source = sink and
|
||||
source instanceof LocalSourceNode
|
||||
predicate hasLocalSource(Node sink, LocalSourceNode source) {
|
||||
source = sink
|
||||
or
|
||||
exists(Node mid |
|
||||
hasLocalSource(mid, source) and
|
||||
simpleLocalFlowStep(mid, sink)
|
||||
exists(Node second |
|
||||
simpleLocalFlowStep(source, second) and
|
||||
simpleLocalFlowStep*(second, sink)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
104
python/ql/src/semmle/python/frameworks/Cryptodome.qll
Normal file
104
python/ql/src/semmle/python/frameworks/Cryptodome.qll
Normal file
@@ -0,0 +1,104 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of
|
||||
* - the `pycryptodome` PyPI package (imported as `Crypto`)
|
||||
* - the `pycryptodomex` PyPI package (imported as `Cryptodome`)
|
||||
* See https://pycryptodome.readthedocs.io/en/latest/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for
|
||||
* - the `pycryptodome` PyPI package (imported as `Crypto`)
|
||||
* - the `pycryptodomex` PyPI package (imported as `Cryptodome`)
|
||||
* See https://pycryptodome.readthedocs.io/en/latest/
|
||||
*/
|
||||
private module CryptodomeModel {
|
||||
// ---------------------------------------------------------------------------
|
||||
/**
|
||||
* A call to `Cryptodome.PublicKey.RSA.generate`/`Crypto.PublicKey.RSA.generate`
|
||||
*
|
||||
* See https://pycryptodome.readthedocs.io/en/latest/src/public_key/rsa.html#Crypto.PublicKey.RSA.generate
|
||||
*/
|
||||
class CryptodomePublicKeyRsaGenerateCall extends Cryptography::PublicKey::KeyGeneration::RsaRange,
|
||||
DataFlow::CallCfgNode {
|
||||
CryptodomePublicKeyRsaGenerateCall() {
|
||||
this =
|
||||
API::moduleImport(["Crypto", "Cryptodome"])
|
||||
.getMember("PublicKey")
|
||||
.getMember("RSA")
|
||||
.getMember("generate")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getKeySizeArg() {
|
||||
result in [this.getArg(0), this.getArgByName("bits")]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `Cryptodome.PublicKey.DSA.generate`/`Crypto.PublicKey.DSA.generate`
|
||||
*
|
||||
* See https://pycryptodome.readthedocs.io/en/latest/src/public_key/dsa.html#Crypto.PublicKey.DSA.generate
|
||||
*/
|
||||
class CryptodomePublicKeyDsaGenerateCall extends Cryptography::PublicKey::KeyGeneration::DsaRange,
|
||||
DataFlow::CallCfgNode {
|
||||
CryptodomePublicKeyDsaGenerateCall() {
|
||||
this =
|
||||
API::moduleImport(["Crypto", "Cryptodome"])
|
||||
.getMember("PublicKey")
|
||||
.getMember("DSA")
|
||||
.getMember("generate")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getKeySizeArg() {
|
||||
result in [this.getArg(0), this.getArgByName("bits")]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `Cryptodome.PublicKey.ECC.generate`/`Crypto.PublicKey.ECC.generate`
|
||||
*
|
||||
* See https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html#Crypto.PublicKey.ECC.generate
|
||||
*/
|
||||
class CryptodomePublicKeyEccGenerateCall extends Cryptography::PublicKey::KeyGeneration::EccRange,
|
||||
DataFlow::CallCfgNode {
|
||||
CryptodomePublicKeyEccGenerateCall() {
|
||||
this =
|
||||
API::moduleImport(["Crypto", "Cryptodome"])
|
||||
.getMember("PublicKey")
|
||||
.getMember("ECC")
|
||||
.getMember("generate")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
/** Gets the argument that specifies the curve to use (a string). */
|
||||
DataFlow::Node getCurveArg() { result = this.getArgByName("curve") }
|
||||
|
||||
/** Gets the name of the curve to use, as well as the origin that explains how we obtained this name. */
|
||||
string getCurveWithOrigin(DataFlow::Node origin) {
|
||||
exists(StrConst str | origin = DataFlow::exprNode(str) |
|
||||
origin = this.getCurveArg().getALocalSource() and
|
||||
result = str.getText()
|
||||
)
|
||||
}
|
||||
|
||||
override int getKeySizeWithOrigin(DataFlow::Node origin) {
|
||||
exists(string curve | curve = this.getCurveWithOrigin(origin) |
|
||||
// using list from https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html
|
||||
curve in ["NIST P-256", "p256", "P-256", "prime256v1", "secp256r1"] and result = 256
|
||||
or
|
||||
curve in ["NIST P-384", "p384", "P-384", "prime384v1", "secp384r1"] and result = 384
|
||||
or
|
||||
curve in ["NIST P-521", "p521", "P-521", "prime521v1", "secp521r1"] and result = 521
|
||||
)
|
||||
}
|
||||
|
||||
// Note: There is not really a key-size argument, since it's always specified by the curve.
|
||||
override DataFlow::Node getKeySizeArg() { none() }
|
||||
}
|
||||
}
|
||||
184
python/ql/src/semmle/python/frameworks/Cryptography.qll
Normal file
184
python/ql/src/semmle/python/frameworks/Cryptography.qll
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `cryptography` PyPI package.
|
||||
* See https://cryptography.io/en/latest/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `cryptography` PyPI package.
|
||||
* See https://cryptography.io/en/latest/.
|
||||
*/
|
||||
private module CryptographyModel {
|
||||
/**
|
||||
* Provides helper predicates for the eliptic curve cryptography parts in
|
||||
* `cryptography.hazmat.primitives.asymmetric.ec`.
|
||||
*/
|
||||
module Ecc {
|
||||
/**
|
||||
* Gets a predefined curve class from
|
||||
* `cryptography.hazmat.primitives.asymmetric.ec` with a specific key size (in bits).
|
||||
*/
|
||||
private DataFlow::Node curveClassWithKeySize(int keySize) {
|
||||
exists(string curveName |
|
||||
result =
|
||||
API::moduleImport("cryptography")
|
||||
.getMember("hazmat")
|
||||
.getMember("primitives")
|
||||
.getMember("asymmetric")
|
||||
.getMember("ec")
|
||||
.getMember(curveName)
|
||||
.getAUse()
|
||||
|
|
||||
// obtained by manually looking at source code in
|
||||
// https://github.com/pyca/cryptography/blob/cba69f1922803f4f29a3fde01741890d88b8e217/src/cryptography/hazmat/primitives/asymmetric/ec.py#L208-L300
|
||||
curveName = "SECT571R1" and keySize = 570 // Indeed the numbers do not match.
|
||||
or
|
||||
curveName = "SECT409R1" and keySize = 409
|
||||
or
|
||||
curveName = "SECT283R1" and keySize = 283
|
||||
or
|
||||
curveName = "SECT233R1" and keySize = 233
|
||||
or
|
||||
curveName = "SECT163R2" and keySize = 163
|
||||
or
|
||||
curveName = "SECT571K1" and keySize = 571
|
||||
or
|
||||
curveName = "SECT409K1" and keySize = 409
|
||||
or
|
||||
curveName = "SECT283K1" and keySize = 283
|
||||
or
|
||||
curveName = "SECT233K1" and keySize = 233
|
||||
or
|
||||
curveName = "SECT163K1" and keySize = 163
|
||||
or
|
||||
curveName = "SECP521R1" and keySize = 521
|
||||
or
|
||||
curveName = "SECP384R1" and keySize = 384
|
||||
or
|
||||
curveName = "SECP256R1" and keySize = 256
|
||||
or
|
||||
curveName = "SECP256K1" and keySize = 256
|
||||
or
|
||||
curveName = "SECP224R1" and keySize = 224
|
||||
or
|
||||
curveName = "SECP192R1" and keySize = 192
|
||||
or
|
||||
curveName = "BrainpoolP256R1" and keySize = 256
|
||||
or
|
||||
curveName = "BrainpoolP384R1" and keySize = 384
|
||||
or
|
||||
curveName = "BrainpoolP512R1" and keySize = 512
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */
|
||||
private DataFlow::LocalSourceNode curveClassInstanceWithKeySize(
|
||||
DataFlow::TypeTracker t, int keySize, DataFlow::Node origin
|
||||
) {
|
||||
t.start() and
|
||||
result.(DataFlow::CallCfgNode).getFunction() = curveClassWithKeySize(keySize) and
|
||||
origin = result
|
||||
or
|
||||
// Due to bad performance when using normal setup with we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
curveClassInstanceWithKeySize_first_join(t2, keySize, origin, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate curveClassInstanceWithKeySize_first_join(
|
||||
DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res,
|
||||
DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(curveClassInstanceWithKeySize(t2, keySize, origin), res, summary)
|
||||
}
|
||||
|
||||
/** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */
|
||||
DataFlow::Node curveClassInstanceWithKeySize(int keySize, DataFlow::Node origin) {
|
||||
curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize, origin).flowsTo(result)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
/**
|
||||
* A call to `cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key`
|
||||
*
|
||||
* See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/rsa.html#cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key
|
||||
*/
|
||||
class CryptographyRsaGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::RsaRange,
|
||||
DataFlow::CallCfgNode {
|
||||
CryptographyRsaGeneratePrivateKeyCall() {
|
||||
this =
|
||||
API::moduleImport("cryptography")
|
||||
.getMember("hazmat")
|
||||
.getMember("primitives")
|
||||
.getMember("asymmetric")
|
||||
.getMember("rsa")
|
||||
.getMember("generate_private_key")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getKeySizeArg() {
|
||||
result in [this.getArg(1), this.getArgByName("key_size")]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key`
|
||||
*
|
||||
* See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/dsa.html#cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key
|
||||
*/
|
||||
class CryptographyDsaGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::DsaRange,
|
||||
DataFlow::CallCfgNode {
|
||||
CryptographyDsaGeneratePrivateKeyCall() {
|
||||
this =
|
||||
API::moduleImport("cryptography")
|
||||
.getMember("hazmat")
|
||||
.getMember("primitives")
|
||||
.getMember("asymmetric")
|
||||
.getMember("dsa")
|
||||
.getMember("generate_private_key")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getKeySizeArg() {
|
||||
result in [this.getArg(0), this.getArgByName("key_size")]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `cryptography.hazmat.primitives.asymmetric.ec.generate_private_key`
|
||||
*
|
||||
* See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/ec.html#cryptography.hazmat.primitives.asymmetric.ec.generate_private_key
|
||||
*/
|
||||
class CryptographyEcGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::EccRange,
|
||||
DataFlow::CallCfgNode {
|
||||
CryptographyEcGeneratePrivateKeyCall() {
|
||||
this =
|
||||
API::moduleImport("cryptography")
|
||||
.getMember("hazmat")
|
||||
.getMember("primitives")
|
||||
.getMember("asymmetric")
|
||||
.getMember("ec")
|
||||
.getMember("generate_private_key")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
/** Gets the argument that specifies the curve to use. */
|
||||
DataFlow::Node getCurveArg() { result in [this.getArg(0), this.getArgByName("curve")] }
|
||||
|
||||
override int getKeySizeWithOrigin(DataFlow::Node origin) {
|
||||
this.getCurveArg() = Ecc::curveClassInstanceWithKeySize(result, origin)
|
||||
}
|
||||
|
||||
// Note: There is not really a key-size argument, since it's always specified by the curve.
|
||||
override DataFlow::Node getKeySizeArg() { none() }
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.frameworks.PEP249
|
||||
private import semmle.python.regex
|
||||
|
||||
@@ -1975,6 +1976,205 @@ private module Django {
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides models for django forms (defined in the `django.forms` module) */
|
||||
module Forms {
|
||||
/**
|
||||
* Provides models for the `django.forms.forms.BaseForm` class and subclasses. This
|
||||
* is usually used by the `django.forms.forms.Form` class, which is also available
|
||||
* under the more commonly used alias `django.forms.Form`.
|
||||
*
|
||||
* See https://docs.djangoproject.com/en/3.1/ref/forms/api/
|
||||
*/
|
||||
module Form {
|
||||
/** Gets a reference to the `django.forms.forms.BaseForm` class or any subclass. */
|
||||
API::Node subclassRef() {
|
||||
// canonical definition
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("forms")
|
||||
.getMember("forms")
|
||||
.getMember(["BaseForm", "Form"])
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("forms")
|
||||
.getMember("models")
|
||||
.getMember(["BaseModelForm", "ModelForm"])
|
||||
.getASubclass*()
|
||||
or
|
||||
// aliases from `django.forms`
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("forms")
|
||||
.getMember(["BaseForm", "Form", "BaseModelForm", "ModelForm"])
|
||||
.getASubclass*()
|
||||
or
|
||||
// other Form subclasses defined in Django
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("contrib")
|
||||
.getMember("admin")
|
||||
.getMember("forms")
|
||||
.getMember(["AdminAuthenticationForm", "AdminPasswordChangeForm"])
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("contrib")
|
||||
.getMember("admin")
|
||||
.getMember("helpers")
|
||||
.getMember("ActionForm")
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("contrib")
|
||||
.getMember("admin")
|
||||
.getMember("views")
|
||||
.getMember("main")
|
||||
.getMember("ChangeListSearchForm")
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("contrib")
|
||||
.getMember("auth")
|
||||
.getMember("forms")
|
||||
.getMember([
|
||||
"PasswordResetForm", "UserChangeForm", "SetPasswordForm",
|
||||
"AdminPasswordChangeForm", "PasswordChangeForm", "AuthenticationForm",
|
||||
"UserCreationForm"
|
||||
])
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("contrib")
|
||||
.getMember("flatpages")
|
||||
.getMember("forms")
|
||||
.getMember("FlatpageForm")
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("forms")
|
||||
.getMember("formsets")
|
||||
.getMember("ManagementForm")
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("forms")
|
||||
.getMember("models")
|
||||
.getMember(["ModelForm", "BaseModelForm"])
|
||||
.getASubclass*()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides models for the `django.forms.fields.Field` class and subclasses. This is
|
||||
* also available under the more commonly used alias `django.forms.Field`.
|
||||
*
|
||||
* See https://docs.djangoproject.com/en/3.1/ref/forms/fields/
|
||||
*/
|
||||
module Field {
|
||||
/** Gets a reference to the `django.forms.fields.Field` class or any subclass. */
|
||||
API::Node subclassRef() {
|
||||
exists(string modName, string clsName |
|
||||
// canonical definition
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("forms")
|
||||
.getMember(modName)
|
||||
.getMember(clsName)
|
||||
.getASubclass*()
|
||||
or
|
||||
// alias from `django.forms`
|
||||
result = API::moduleImport("django").getMember("forms").getMember(clsName).getASubclass*()
|
||||
|
|
||||
modName = "fields" and
|
||||
clsName in [
|
||||
"Field",
|
||||
// Known subclasses
|
||||
"BooleanField", "IntegerField", "CharField", "SlugField", "DateTimeField",
|
||||
"EmailField", "DateField", "TimeField", "DurationField", "DecimalField", "FloatField",
|
||||
"GenericIPAddressField", "UUIDField", "JSONField", "FilePathField",
|
||||
"NullBooleanField", "URLField", "TypedChoiceField", "FileField", "ImageField",
|
||||
"RegexField", "ChoiceField", "MultipleChoiceField", "ComboField", "MultiValueField",
|
||||
"SplitDateTimeField", "TypedMultipleChoiceField", "BaseTemporalField"
|
||||
]
|
||||
or
|
||||
// Known subclasses from `django.forms.models`
|
||||
modName = "models" and
|
||||
clsName in ["ModelChoiceField", "ModelMultipleChoiceField", "InlineForeignKeyField"]
|
||||
)
|
||||
or
|
||||
// other Field subclasses defined in Django
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("contrib")
|
||||
.getMember("auth")
|
||||
.getMember("forms")
|
||||
.getMember(["ReadOnlyPasswordHashField", "UsernameField"])
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("contrib")
|
||||
.getMember("gis")
|
||||
.getMember("forms")
|
||||
.getMember("fields")
|
||||
.getMember([
|
||||
"GeometryCollectionField", "GeometryField", "LineStringField",
|
||||
"MultiLineStringField", "MultiPointField", "MultiPolygonField", "PointField",
|
||||
"PolygonField"
|
||||
])
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("contrib")
|
||||
.getMember("postgres")
|
||||
.getMember("forms")
|
||||
.getMember("array")
|
||||
.getMember(["SimpleArrayField", "SplitArrayField"])
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("contrib")
|
||||
.getMember("postgres")
|
||||
.getMember("forms")
|
||||
.getMember("hstore")
|
||||
.getMember("HStoreField")
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("contrib")
|
||||
.getMember("postgres")
|
||||
.getMember("forms")
|
||||
.getMember("ranges")
|
||||
.getMember([
|
||||
"BaseRangeField", "DateRangeField", "DateTimeRangeField", "DecimalRangeField",
|
||||
"IntegerRangeField"
|
||||
])
|
||||
.getASubclass*()
|
||||
or
|
||||
result =
|
||||
API::moduleImport("django")
|
||||
.getMember("forms")
|
||||
.getMember("models")
|
||||
.getMember(["InlineForeignKeyField", "ModelChoiceField", "ModelMultipleChoiceField"])
|
||||
.getASubclass*()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
/**
|
||||
* Gets the last decorator call for the function `func`, if `func` has decorators.
|
||||
*/
|
||||
@@ -1983,6 +2183,96 @@ private module Django {
|
||||
not exists(Call other_decorator | other_decorator.getArg(0) = result)
|
||||
}
|
||||
|
||||
/** Adds the `getASelfRef` member predicate when modeling a class. */
|
||||
abstract private class SelfRefMixin extends Class {
|
||||
/**
|
||||
* Gets a reference to instances of this class, originating from a self parameter of
|
||||
* a method defined on this class.
|
||||
*
|
||||
* Note: TODO: This doesn't take MRO into account
|
||||
* Note: TODO: This doesn't take staticmethod/classmethod into account
|
||||
*/
|
||||
private DataFlow::Node getASelfRef(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result.(DataFlow::ParameterNode).getParameter() = this.getAMethod().getArg(0)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = this.getASelfRef(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to instances of this class, originating from a self parameter of
|
||||
* a method defined on this class.
|
||||
*
|
||||
* Note: TODO: This doesn't take MRO into account
|
||||
* Note: TODO: This doesn't take staticmethod/classmethod into account
|
||||
*/
|
||||
DataFlow::Node getASelfRef() { result = this.getASelfRef(DataFlow::TypeTracker::end()) }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Form and form field modeling
|
||||
// ---------------------------------------------------------------------------
|
||||
/**
|
||||
* A class that is a subclass of the `django.forms.Form` class,
|
||||
* thereby handling user input.
|
||||
*/
|
||||
class DjangoFormClass extends Class, SelfRefMixin {
|
||||
DjangoFormClass() { this.getABase() = Django::Forms::Form::subclassRef().getAUse().asExpr() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A source of cleaned_data (either the return value from `super().clean()`, or a reference to `self.cleaned_data`)
|
||||
*
|
||||
* See https://docs.djangoproject.com/en/3.1/ref/forms/validation/#form-and-field-validation
|
||||
*/
|
||||
private class DjangoFormCleanedData extends RemoteFlowSource::Range, DataFlow::Node {
|
||||
DjangoFormCleanedData() {
|
||||
exists(DjangoFormClass cls, Function meth |
|
||||
cls.getAMethod() = meth and
|
||||
(
|
||||
this = API::builtin("super").getReturn().getMember("clean").getACall() and
|
||||
this.getScope() = meth
|
||||
or
|
||||
this.(DataFlow::AttrRead).getAttributeName() = "cleaned_data" and
|
||||
this.(DataFlow::AttrRead).getObject() = cls.getASelfRef()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
override string getSourceType() {
|
||||
result = "django.forms.Field subclass, value parameter in method"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class that is a subclass of the `django.forms.Field` class,
|
||||
* thereby handling user input.
|
||||
*/
|
||||
class DjangoFormFieldClass extends Class {
|
||||
DjangoFormFieldClass() {
|
||||
this.getABase() = Django::Forms::Field::subclassRef().getAUse().asExpr()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A parameter in a method on a `DjangoFormFieldClass` that receives the user-supplied value for this field.
|
||||
*
|
||||
* See https://docs.djangoproject.com/en/3.1/ref/forms/validation/#form-and-field-validation
|
||||
*/
|
||||
private class DjangoFormFieldValueParam extends RemoteFlowSource::Range, DataFlow::ParameterNode {
|
||||
DjangoFormFieldValueParam() {
|
||||
exists(DjangoFormFieldClass cls, Function meth |
|
||||
cls.getAMethod() = meth and
|
||||
meth.getName() in ["to_python", "validate", "run_validators", "clean"] and
|
||||
this.getParameter() = meth.getArg(1)
|
||||
)
|
||||
}
|
||||
|
||||
override string getSourceType() {
|
||||
result = "django.forms.Field subclass, value parameter in method"
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// routing modeling
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -2068,7 +2358,7 @@ private module Django {
|
||||
}
|
||||
|
||||
/** A class that we consider a django View class. */
|
||||
abstract class DjangoViewClass extends DjangoViewClassHelper {
|
||||
abstract class DjangoViewClass extends DjangoViewClassHelper, SelfRefMixin {
|
||||
/** Gets a function that could handle incoming requests, if any. */
|
||||
Function getARequestHandler() {
|
||||
// TODO: This doesn't handle attribute assignment. Should be OK, but analysis is not as complete as with
|
||||
@@ -2080,29 +2370,6 @@ private module Django {
|
||||
result.getName() = "get_redirect_url"
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to instances of this class, originating from a self parameter of
|
||||
* a method defined on this class.
|
||||
*
|
||||
* Note: TODO: This doesn't take MRO into account
|
||||
* Note: TODO: This doesn't take staticmethod/classmethod into account
|
||||
*/
|
||||
private DataFlow::Node getASelfRef(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result.(DataFlow::ParameterNode).getParameter() = this.getAMethod().getArg(0)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = this.getASelfRef(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to instances of this class, originating from a self parameter of
|
||||
* a method defined on this class.
|
||||
*
|
||||
* Note: TODO: This doesn't take MRO into account
|
||||
* Note: TODO: This doesn't take staticmethod/classmethod into account
|
||||
*/
|
||||
DataFlow::Node getASelfRef() { result = this.getASelfRef(DataFlow::TypeTracker::end()) }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2393,7 +2660,7 @@ private module Django {
|
||||
}
|
||||
|
||||
override string getSourceType() {
|
||||
result = "django.http.request.HttpRequest (attribute on self in View class)"
|
||||
result = "django HttpRequest from self.request in View class"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2413,7 +2680,7 @@ private module Django {
|
||||
}
|
||||
|
||||
override string getSourceType() {
|
||||
result = "django routed param from attribute on self in View class"
|
||||
result = "django routed param from self.args/kwargs in View class"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
private import PEP249
|
||||
|
||||
/** Provides models for the Python standard library. */
|
||||
@@ -684,96 +685,35 @@ private module Stdlib {
|
||||
// ---------------------------------------------------------------------------
|
||||
// builtins
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `builtins` module (called `__builtin__` in Python 2). */
|
||||
private DataFlow::Node builtins(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode(["builtins", "__builtin__"])
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = builtins(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `builtins` module. */
|
||||
DataFlow::Node builtins() { result = builtins(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `builtins` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node builtins_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in ["exec", "eval", "compile", "open"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode(["builtins", "__builtin__"] + "." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = DataFlow::importNode(["builtins", "__builtin__"])
|
||||
or
|
||||
// special handling of builtins, that are in scope without any imports
|
||||
// TODO: Take care of overrides, either `def eval: ...`, `eval = ...`, or `builtins.eval = ...`
|
||||
t.start() and
|
||||
exists(NameNode ref | result.asCfgNode() = ref |
|
||||
ref.isGlobal() and
|
||||
ref.getId() = attr_name and
|
||||
ref.isLoad()
|
||||
)
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `builtins_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
builtins_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate builtins_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(builtins_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `builtins` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node builtins_attr(string attr_name) {
|
||||
result = builtins_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the builtin `exec` function.
|
||||
* See https://docs.python.org/3/library/functions.html#exec
|
||||
*/
|
||||
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CallCfgNode {
|
||||
BuiltinsExecCall() { this = API::builtin("exec").getACall() }
|
||||
|
||||
BuiltinsExecCall() { node.getFunction() = builtins_attr("exec").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
|
||||
override DataFlow::Node getCode() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the builtin `eval` function.
|
||||
* See https://docs.python.org/3/library/functions.html#eval
|
||||
*/
|
||||
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CfgNode {
|
||||
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CallCfgNode {
|
||||
override CallNode node;
|
||||
|
||||
BuiltinsEvalCall() { node.getFunction() = builtins_attr("eval").asCfgNode() }
|
||||
BuiltinsEvalCall() { this = API::builtin("eval").getACall() }
|
||||
|
||||
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
|
||||
override DataFlow::Node getCode() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/** An additional taint step for calls to the builtin function `compile` */
|
||||
private class BuiltinsCompileCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
exists(CallNode call |
|
||||
nodeTo.asCfgNode() = call and
|
||||
call.getFunction() = builtins_attr("compile").asCfgNode() and
|
||||
nodeFrom.asCfgNode() in [call.getArg(0), call.getArgByName("source")]
|
||||
exists(DataFlow::CallCfgNode call |
|
||||
nodeTo = call and
|
||||
call = API::builtin("compile").getACall() and
|
||||
nodeFrom in [call.getArg(0), call.getArgByName("source")]
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -782,23 +722,22 @@ private module Stdlib {
|
||||
* A call to the builtin `open` function.
|
||||
* See https://docs.python.org/3/library/functions.html#open
|
||||
*/
|
||||
private class OpenCall extends FileSystemAccess::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
private class OpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
|
||||
OpenCall() {
|
||||
node.getFunction() = builtins_attr("open").asCfgNode()
|
||||
this = API::builtin("open").getACall()
|
||||
or
|
||||
node.getFunction() = io_attr("open").asCfgNode()
|
||||
// io.open is a special case, since it is an alias for the builtin `open`
|
||||
this = API::moduleImport("io").getMember("open").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() {
|
||||
result.asCfgNode() in [node.getArg(0), node.getArgByName("file")]
|
||||
result in [this.getArg(0), this.getArgByName("file")]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An exec statement (only Python 2).
|
||||
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
|
||||
* See https://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
|
||||
*/
|
||||
private class ExecStatement extends CodeExecution::Range {
|
||||
ExecStatement() {
|
||||
@@ -942,59 +881,6 @@ private module Stdlib {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// io
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `io` module. */
|
||||
private DataFlow::Node io(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("io")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = io(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `io` module. */
|
||||
DataFlow::Node io() { result = io(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `io` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node io_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in ["open"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode("io" + "." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = io()
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `io_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
io_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate io_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(io_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `io` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node io_attr(string attr_name) {
|
||||
result = io_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// json
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -1,119 +1,72 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the PyYAML package
|
||||
* https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`).
|
||||
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
|
||||
* via `import yaml`)
|
||||
*
|
||||
* See
|
||||
* - https://pyyaml.org/wiki/PyYAMLDocumentation
|
||||
* - https://pyyaml.docsforge.com/master/documentation/
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
|
||||
private module Yaml {
|
||||
/** Gets a reference to the `yaml` module. */
|
||||
private DataFlow::Node yaml(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("yaml")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `yaml` module. */
|
||||
DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/** Provides models for the `yaml` module. */
|
||||
module yaml {
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `yaml` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*
|
||||
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
|
||||
*/
|
||||
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in [
|
||||
// functions
|
||||
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
|
||||
"safe_load", "safe_load_all",
|
||||
// Classes
|
||||
"SafeLoader", "BaseLoader"
|
||||
] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode("yaml." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = yaml()
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
yaml_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate yaml_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `yaml` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*
|
||||
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
|
||||
*/
|
||||
DataFlow::Node yaml_attr(string attr_name) {
|
||||
result = yaml_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
}
|
||||
}
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
|
||||
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
|
||||
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
|
||||
* via `import yaml`)
|
||||
*
|
||||
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
|
||||
* See
|
||||
* - https://pyyaml.org/wiki/PyYAMLDocumentation
|
||||
* - https://pyyaml.docsforge.com/master/documentation/
|
||||
*/
|
||||
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
string func_name;
|
||||
|
||||
YamlLoadCall() {
|
||||
func_name in [
|
||||
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
|
||||
"safe_load", "safe_load_all"
|
||||
] and
|
||||
node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode()
|
||||
}
|
||||
|
||||
private module Yaml {
|
||||
/**
|
||||
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
|
||||
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
|
||||
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
|
||||
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
|
||||
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
|
||||
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
|
||||
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
|
||||
*
|
||||
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
|
||||
*/
|
||||
override predicate mayExecuteInput() {
|
||||
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
|
||||
or
|
||||
func_name in ["load", "load_all"] and
|
||||
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
|
||||
// then the default loader will be used, which is not safe.
|
||||
not exists(DataFlow::Node loader_arg |
|
||||
loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")]
|
||||
|
|
||||
loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"])
|
||||
)
|
||||
private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
override CallNode node;
|
||||
string func_name;
|
||||
|
||||
YamlLoadCall() {
|
||||
func_name in [
|
||||
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
|
||||
"safe_load", "safe_load_all"
|
||||
] and
|
||||
this = API::moduleImport("yaml").getMember(func_name).getACall()
|
||||
}
|
||||
|
||||
/**
|
||||
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
|
||||
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
|
||||
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
|
||||
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
|
||||
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
|
||||
*/
|
||||
override predicate mayExecuteInput() {
|
||||
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
|
||||
or
|
||||
func_name in ["load", "load_all"] and
|
||||
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
|
||||
// then the default loader will be used, which is not safe.
|
||||
not exists(DataFlow::Node loader_arg |
|
||||
loader_arg in [this.getArg(1), this.getArgByName("Loader")]
|
||||
|
|
||||
loader_arg =
|
||||
API::moduleImport("yaml")
|
||||
.getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"])
|
||||
.getAUse()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "YAML" }
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "YAML" }
|
||||
}
|
||||
|
||||
@@ -61,8 +61,8 @@ abstract class ClassObjectInternal extends ObjectInternal {
|
||||
pragma[noinline]
|
||||
override predicate binds(ObjectInternal instance, string name, ObjectInternal descriptor) {
|
||||
instance = this and
|
||||
PointsToInternal::attributeRequired(this, name) and
|
||||
this.lookup(name, descriptor, _) and
|
||||
PointsToInternal::attributeRequired(this, pragma[only_bind_into](name)) and
|
||||
this.lookup(pragma[only_bind_into](name), descriptor, _) and
|
||||
descriptor.isDescriptor() = true
|
||||
}
|
||||
|
||||
|
||||
@@ -34,9 +34,11 @@ abstract class ConstantObjectInternal extends ObjectInternal {
|
||||
|
||||
pragma[noinline]
|
||||
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
|
||||
PointsToInternal::attributeRequired(this, name) and
|
||||
PointsToInternal::attributeRequired(pragma[only_bind_into](this), pragma[only_bind_into](name)) and
|
||||
exists(ObjectInternal cls_attr, CfgOrigin attr_orig |
|
||||
this.getClass().(ClassObjectInternal).lookup(name, cls_attr, attr_orig) and
|
||||
this.getClass()
|
||||
.(ClassObjectInternal)
|
||||
.lookup(pragma[only_bind_into](name), cls_attr, attr_orig) and
|
||||
cls_attr.isDescriptor() = true and
|
||||
cls_attr.descriptorGetInstance(this, value, origin)
|
||||
)
|
||||
|
||||
@@ -30,18 +30,19 @@ abstract class InstanceObject extends ObjectInternal {
|
||||
|
||||
pragma[noinline]
|
||||
private predicate classAttribute(string name, ObjectInternal cls_attr) {
|
||||
PointsToInternal::attributeRequired(this, name) and
|
||||
this.getClass().(ClassObjectInternal).lookup(name, cls_attr, _)
|
||||
PointsToInternal::attributeRequired(this, pragma[only_bind_into](name)) and
|
||||
this.getClass().(ClassObjectInternal).lookup(pragma[only_bind_into](name), cls_attr, _)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate selfAttribute(string name, ObjectInternal value, CfgOrigin origin) {
|
||||
PointsToInternal::attributeRequired(this, name) and
|
||||
PointsToInternal::attributeRequired(this, pragma[only_bind_into](name)) and
|
||||
exists(EssaVariable self, PythonFunctionObjectInternal init, Context callee |
|
||||
this.initializer(init, callee) and
|
||||
self_variable_reaching_init_exit(self) and
|
||||
self.getScope() = init.getScope() and
|
||||
AttributePointsTo::variableAttributePointsTo(self, callee, name, value, origin)
|
||||
AttributePointsTo::variableAttributePointsTo(self, callee, pragma[only_bind_into](name),
|
||||
value, origin)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -316,9 +317,11 @@ class UnknownInstanceInternal extends TUnknownInstance, ObjectInternal {
|
||||
|
||||
pragma[noinline]
|
||||
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
|
||||
PointsToInternal::attributeRequired(this, name) and
|
||||
PointsToInternal::attributeRequired(this, pragma[only_bind_into](name)) and
|
||||
exists(ObjectInternal cls_attr, CfgOrigin attr_orig |
|
||||
this.getClass().(ClassObjectInternal).lookup(name, cls_attr, attr_orig)
|
||||
this.getClass()
|
||||
.(ClassObjectInternal)
|
||||
.lookup(pragma[only_bind_into](name), cls_attr, attr_orig)
|
||||
|
|
||||
cls_attr.isDescriptor() = false and value = cls_attr and origin = attr_orig
|
||||
or
|
||||
@@ -456,8 +459,8 @@ class SuperInstance extends TSuperInstance, ObjectInternal {
|
||||
/* Helper for `attribute` */
|
||||
pragma[noinline]
|
||||
private predicate attribute_descriptor(string name, ObjectInternal cls_attr, CfgOrigin attr_orig) {
|
||||
PointsToInternal::attributeRequired(this, name) and
|
||||
this.lookup(name, cls_attr, attr_orig)
|
||||
PointsToInternal::attributeRequired(this, pragma[only_bind_into](name)) and
|
||||
this.lookup(pragma[only_bind_into](name), cls_attr, attr_orig)
|
||||
}
|
||||
|
||||
private predicate lookup(string name, ObjectInternal value, CfgOrigin origin) {
|
||||
|
||||
@@ -524,6 +524,7 @@ module PointsToInternal {
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private boolean ssa_filter_definition_bool(
|
||||
PyEdgeRefinement def, PointsToContext context, ObjectInternal value, ControlFlowNode origin
|
||||
) {
|
||||
|
||||
@@ -184,7 +184,11 @@ class PointsToContext extends TPointsToContext {
|
||||
|
||||
/** Holds if this context can apply to the CFG node `n`. */
|
||||
pragma[inline]
|
||||
predicate appliesTo(ControlFlowNode n) { this.appliesToScope(n.getScope()) }
|
||||
predicate appliesTo(ControlFlowNode n) {
|
||||
exists(Scope s |
|
||||
this.appliesToScope(pragma[only_bind_into](s)) and pragma[only_bind_into](s) = n.getScope()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if this context is a call context. */
|
||||
predicate isCall() { this = TCallContext(_, _, _) }
|
||||
|
||||
Reference in New Issue
Block a user