Merge branch 'main' of github.com:github/codeql into python-api-enhancements

This commit is contained in:
Rasmus Lerchedahl Petersen
2021-04-06 09:31:26 +02:00
827 changed files with 21638 additions and 13241 deletions

View File

@@ -1,14 +0,0 @@
/**
* @name Filter: non-generated files
* @description Only keep results that aren't (or don't appear to be) generated.
* @kind problem
* @id py/not-generated-file-filter
*/
import python
import external.DefectFilter
import semmle.python.filters.GeneratedCode
from DefectResult res
where not exists(GeneratedFile f | res.getFile() = f)
select res, res.getMessage()

View File

@@ -1,14 +0,0 @@
/**
* @name Filter: non-test files
* @description Only keep results that aren't in tests
* @kind problem
* @id py/not-test-file-filter
*/
import python
import external.DefectFilter
import semmle.python.filters.Tests
from DefectResult res
where not exists(TestScope s | contains(s.getLocation(), res))
select res, res.getMessage()

View File

@@ -4,7 +4,6 @@
* @kind treemap
* @treemap.warnOn highValues
* @metricType file
* @precision high
* @tags maintainability
* @id py/lines-of-commented-out-code-in-files
*/

View File

@@ -5,7 +5,6 @@
* @kind treemap
* @treemap.warnOn highValues
* @metricType externalDependency
* @precision medium
* @id py/external-dependencies
*/

View File

@@ -6,7 +6,6 @@
* @treemap.warnOn highValues
* @metricType file
* @metricAggregate avg sum max
* @precision very-high
* @tags maintainability
* @id py/lines-of-code-in-files
*/

View File

@@ -6,7 +6,6 @@
* @treemap.warnOn lowValues
* @metricType file
* @metricAggregate avg sum max
* @precision very-high
* @id py/lines-of-comments-in-files
*/

View File

@@ -7,21 +7,12 @@
* @treemap.warnOn highValues
* @metricType file
* @metricAggregate avg sum max
* @precision high
* @tags testability
* @id py/duplicated-lines-in-files
*/
import python
import external.CodeDuplication
from File f, int n
where
n =
count(int line |
exists(DuplicateBlock d | d.sourceFile() = f |
line in [d.sourceStartLine() .. d.sourceEndLine()] and
not allowlistedLineForDuplication(f, line)
)
)
where none()
select f, n order by n desc

View File

@@ -7,21 +7,12 @@
* @treemap.warnOn highValues
* @metricType file
* @metricAggregate avg sum max
* @precision high
* @tags testability
* @id py/similar-lines-in-files
*/
import python
import external.CodeDuplication
from File f, int n
where
n =
count(int line |
exists(SimilarBlock d | d.sourceFile() = f |
line in [d.sourceStartLine() .. d.sourceEndLine()] and
not allowlistedLineForDuplication(f, line)
)
)
where none()
select f, n order by n desc

View File

@@ -5,7 +5,6 @@
* @treemap.warnOn lowValues
* @metricType file
* @metricAggregate avg sum max
* @precision medium
* @id py/tests-in-files
*/

View File

@@ -0,0 +1,24 @@
/**
* @name Use of weak cryptographic key
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
* @kind problem
* @problem.severity error
* @precision high
* @id py/weak-crypto-key
* @tags security
* external/cwe/cwe-326
*/
import python
import semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.filters.Tests
from Cryptography::PublicKey::KeyGeneration keyGen, int keySize, DataFlow::Node origin
where
keySize = keyGen.getKeySizeWithOrigin(origin) and
keySize < keyGen.minimumSecureKeySize() and
not origin.getScope().getScope*() instanceof TestScope
select keyGen,
"Creation of an " + keyGen.getName() + " key uses $@ bits, which is below " +
keyGen.minimumSecureKeySize() + " and considered breakable.", origin, keySize.toString()

View File

@@ -3,6 +3,8 @@
* @description Binding a socket to all interfaces opens it up to traffic from any IPv4 address
* and is therefore associated with security risks.
* @kind problem
* @id py/old/bind-socket-all-network-interfaces
* @problem.severity error
*/
import python

View File

@@ -2,6 +2,8 @@
* @name OLD QUERY: Uncontrolled data used in path expression
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
* @kind path-problem
* @problem.severity error
* @id py/old/path-injection
*/
import python

View File

@@ -3,6 +3,8 @@
* @description Using externally controlled strings in a command line may allow a malicious
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @id py/old/command-line-injection
*/
import python

View File

@@ -3,6 +3,8 @@
* @description Writing user input directly to a web page
* allows for a cross-site scripting vulnerability.
* @kind path-problem
* @problem.severity error
* @id py/old/reflective-xss
*/
import python

View File

@@ -3,6 +3,8 @@
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @id py/old/sql-injection
*/
import python

View File

@@ -1,8 +1,10 @@
/**
* @name Code injection
* @description Interpreting unsanitized user input as code allows a malicious user arbitrary
* @description OLD QUERY: Interpreting unsanitized user input as code allows a malicious user arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
* @id py/old/code-injection
*/
import python

View File

@@ -1,12 +1,9 @@
/**
* @name Use of weak cryptographic key
* @name OLD QUERY: Use of weak cryptographic key
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
* @kind problem
* @problem.severity error
* @precision high
* @id py/weak-crypto-key
* @tags security
* external/cwe/cwe-326
* @id py/old/weak-crypto-key
*/
import python

View File

@@ -2,6 +2,8 @@
* @name OLD QUERY: Deserializing untrusted input
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
* @kind path-problem
* @id py/old/unsafe-deserialization
* @problem.severity error
*/
import python

View File

@@ -3,6 +3,8 @@
* @description URL redirection based on unvalidated user input
* may cause redirection to malicious web sites.
* @kind path-problem
* @problem.severity error
* @id py/old/url-redirection
*/
import python

View File

@@ -0,0 +1,15 @@
/**
* This version resides in the experimental area and provides a space for
* external contributors to place new concepts, keeping to our preferred
* structure while remaining in the experimental area.
*
* Provides abstract classes representing generic concepts such as file system
* access or system command execution, for which individual framework libraries
* provide concrete subclasses.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks

View File

@@ -0,0 +1,5 @@
/**
* Helper file that imports all framework modeling.
*/
private import experimental.semmle.python.frameworks.Stdlib

View File

@@ -0,0 +1,11 @@
/**
* Provides classes modeling security-relevant aspects of the standard libraries.
* Note: some modeling is done internally in the dataflow/taint tracking implementation.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs

View File

@@ -1,273 +0,0 @@
/** Provides classes for detecting duplicate or similar code. */
import python
/** Gets the relative path of `file`, with backslashes replaced by forward slashes. */
private string relativePath(File file) { result = file.getRelativePath().replaceAll("\\", "/") }
/**
* Holds if the `index`-th token of block `copy` is in file `file`, spanning
* column `sc` of line `sl` to column `ec` of line `el`.
*
* For more information, see [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
pragma[noinline, nomagic]
private predicate tokenLocation(File file, int sl, int sc, int ec, int el, Copy copy, int index) {
file = copy.sourceFile() and
tokens(copy, index, sl, sc, ec, el)
}
/** A token block used for detection of duplicate and similar code. */
class Copy extends @duplication_or_similarity {
private int lastToken() { result = max(int i | tokens(this, i, _, _, _, _) | i) }
/** Gets the index of the token in this block starting at the location `loc`, if any. */
int tokenStartingAt(Location loc) {
tokenLocation(loc.getFile(), loc.getStartLine(), loc.getStartColumn(), _, _, this, result)
}
/** Gets the index of the token in this block ending at the location `loc`, if any. */
int tokenEndingAt(Location loc) {
tokenLocation(loc.getFile(), _, _, loc.getEndLine(), loc.getEndColumn(), this, result)
}
/** Gets the line on which the first token in this block starts. */
int sourceStartLine() { tokens(this, 0, result, _, _, _) }
/** Gets the column on which the first token in this block starts. */
int sourceStartColumn() { tokens(this, 0, _, result, _, _) }
/** Gets the line on which the last token in this block ends. */
int sourceEndLine() { tokens(this, this.lastToken(), _, _, result, _) }
/** Gets the column on which the last token in this block ends. */
int sourceEndColumn() { tokens(this, this.lastToken(), _, _, _, result) }
/** Gets the number of lines containing at least (part of) one token in this block. */
int sourceLines() { result = this.sourceEndLine() + 1 - this.sourceStartLine() }
/** Gets an opaque identifier for the equivalence class of this block. */
int getEquivalenceClass() { duplicateCode(this, _, result) or similarCode(this, _, result) }
/** Gets the source file in which this block appears. */
File sourceFile() {
exists(string name | duplicateCode(this, name, _) or similarCode(this, name, _) |
name.replaceAll("\\", "/") = relativePath(result)
)
}
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
sourceFile().getAbsolutePath() = filepath and
startline = sourceStartLine() and
startcolumn = sourceStartColumn() and
endline = sourceEndLine() and
endcolumn = sourceEndColumn()
}
/** Gets a textual representation of this element. */
string toString() { result = "Copy" }
/**
* Gets a block that extends this one, that is, its first token is also
* covered by this block, but they are not the same block.
*/
Copy extendingBlock() {
exists(File file, int sl, int sc, int ec, int el |
tokenLocation(file, sl, sc, ec, el, this, _) and
tokenLocation(file, sl, sc, ec, el, result, 0)
) and
this != result
}
}
/**
* Holds if there is a sequence of `SimilarBlock`s `start1, ..., end1` and another sequence
* `start2, ..., end2` such that each block extends the previous one and corresponding blocks
* have the same equivalence class, with `start` being the equivalence class of `start1` and
* `start2`, and `end` the equivalence class of `end1` and `end2`.
*/
predicate similar_extension(
SimilarBlock start1, SimilarBlock start2, SimilarBlock ext1, SimilarBlock ext2, int start, int ext
) {
start1.getEquivalenceClass() = start and
start2.getEquivalenceClass() = start and
ext1.getEquivalenceClass() = ext and
ext2.getEquivalenceClass() = ext and
start1 != start2 and
(
ext1 = start1 and ext2 = start2
or
similar_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext)
)
}
/**
* Holds if there is a sequence of `DuplicateBlock`s `start1, ..., end1` and another sequence
* `start2, ..., end2` such that each block extends the previous one and corresponding blocks
* have the same equivalence class, with `start` being the equivalence class of `start1` and
* `start2`, and `end` the equivalence class of `end1` and `end2`.
*/
predicate duplicate_extension(
DuplicateBlock start1, DuplicateBlock start2, DuplicateBlock ext1, DuplicateBlock ext2, int start,
int ext
) {
start1.getEquivalenceClass() = start and
start2.getEquivalenceClass() = start and
ext1.getEquivalenceClass() = ext and
ext2.getEquivalenceClass() = ext and
start1 != start2 and
(
ext1 = start1 and ext2 = start2
or
duplicate_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext)
)
}
/** A block of duplicated code. */
class DuplicateBlock extends Copy, @duplication {
override string toString() { result = "Duplicate code: " + sourceLines() + " duplicated lines." }
}
/** A block of similar code. */
class SimilarBlock extends Copy, @similarity {
override string toString() {
result = "Similar code: " + sourceLines() + " almost duplicated lines."
}
}
/**
* Holds if `stmt1` and `stmt2` are duplicate statements in function or toplevel `sc1` and `sc2`,
* respectively, where `scope1` and `scope2` are not the same.
*/
predicate duplicateStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) {
exists(int equivstart, int equivend, int first, int last |
scope1.contains(stmt1) and
scope2.contains(stmt2) and
duplicateCoversStatement(equivstart, equivend, first, last, stmt1) and
duplicateCoversStatement(equivstart, equivend, first, last, stmt2) and
stmt1 != stmt2 and
scope1 != scope2
)
}
/**
* Holds if statement `stmt` is covered by a sequence of `DuplicateBlock`s, where `first`
* is the index of the token in the first block that starts at the beginning of `stmt`,
* while `last` is the index of the token in the last block that ends at the end of `stmt`,
* and `equivstart` and `equivend` are the equivalence classes of the first and the last
* block, respectively.
*/
private predicate duplicateCoversStatement(
int equivstart, int equivend, int first, int last, Stmt stmt
) {
exists(DuplicateBlock b1, DuplicateBlock b2, Location startloc, Location endloc |
stmt.getLocation() = startloc and
stmt.getLastStatement().getLocation() = endloc and
first = b1.tokenStartingAt(startloc) and
last = b2.tokenEndingAt(endloc) and
b1.getEquivalenceClass() = equivstart and
b2.getEquivalenceClass() = equivend and
duplicate_extension(b1, _, b2, _, equivstart, equivend)
)
}
/**
* Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or
* toplevel that has `duplicate` lines in common with `scope1`.
*/
predicate duplicateStatements(Scope scope1, Scope scope2, int duplicate, int total) {
duplicate = strictcount(Stmt stmt | duplicateStatement(scope1, scope2, stmt, _)) and
total = strictcount(Stmt stmt | scope1.contains(stmt))
}
/**
* Find pairs of scopes that are identical or almost identical
*/
predicate duplicateScopes(Scope s, Scope other, float percent, string message) {
exists(int total, int duplicate | duplicateStatements(s, other, duplicate, total) |
percent = 100.0 * duplicate / total and
percent >= 80.0 and
if duplicate = total
then message = "All " + total + " statements in " + s.getName() + " are identical in $@."
else
message =
duplicate + " out of " + total + " statements in " + s.getName() + " are duplicated in $@."
)
}
/**
* Holds if `stmt1` and `stmt2` are similar statements in function or toplevel `scope1` and `scope2`,
* respectively, where `scope1` and `scope2` are not the same.
*/
private predicate similarStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) {
exists(int start, int end, int first, int last |
scope1.contains(stmt1) and
scope2.contains(stmt2) and
similarCoversStatement(start, end, first, last, stmt1) and
similarCoversStatement(start, end, first, last, stmt2) and
stmt1 != stmt2 and
scope1 != scope2
)
}
/**
* Holds if statement `stmt` is covered by a sequence of `SimilarBlock`s, where `first`
* is the index of the token in the first block that starts at the beginning of `stmt`,
* while `last` is the index of the token in the last block that ends at the end of `stmt`,
* and `equivstart` and `equivend` are the equivalence classes of the first and the last
* block, respectively.
*/
private predicate similarCoversStatement(
int equivstart, int equivend, int first, int last, Stmt stmt
) {
exists(SimilarBlock b1, SimilarBlock b2, Location startloc, Location endloc |
stmt.getLocation() = startloc and
stmt.getLastStatement().getLocation() = endloc and
first = b1.tokenStartingAt(startloc) and
last = b2.tokenEndingAt(endloc) and
b1.getEquivalenceClass() = equivstart and
b2.getEquivalenceClass() = equivend and
similar_extension(b1, _, b2, _, equivstart, equivend)
)
}
/**
* Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or
* toplevel that has `similar` similar lines to `scope1`.
*/
private predicate similarStatements(Scope scope1, Scope scope2, int similar, int total) {
similar = strictcount(Stmt stmt | similarStatement(scope1, scope2, stmt, _)) and
total = strictcount(Stmt stmt | scope1.contains(stmt))
}
/**
* Find pairs of scopes that are similar
*/
predicate similarScopes(Scope s, Scope other, float percent, string message) {
exists(int total, int similar | similarStatements(s, other, similar, total) |
percent = 100.0 * similar / total and
percent >= 80.0 and
if similar = total
then message = "All statements in " + s.getName() + " are similar in $@."
else
message =
similar + " out of " + total + " statements in " + s.getName() + " are similar in $@."
)
}
/**
* Holds if the line is acceptable as a duplicate.
* This is true for blocks of import statements.
*/
predicate allowlistedLineForDuplication(File f, int line) {
exists(ImportingStmt i | i.getLocation().getFile() = f and i.getLocation().getStartLine() = line)
}

View File

@@ -16,19 +16,7 @@
*/
import python
import CodeDuplication
predicate sorted_by_location(DuplicateBlock x, DuplicateBlock y) {
if x.sourceFile() = y.sourceFile()
then x.sourceStartLine() < y.sourceStartLine()
else x.sourceFile().getAbsolutePath() < y.sourceFile().getAbsolutePath()
}
from DuplicateBlock d, DuplicateBlock other
where
d.sourceLines() > 10 and
other.getEquivalenceClass() = d.getEquivalenceClass() and
sorted_by_location(other, d)
select d,
"Duplicate code: " + d.sourceLines() + " lines are duplicated at " +
other.sourceFile().getShortName() + ":" + other.sourceStartLine().toString()
from BasicBlock d
where none()
select d, "Duplicate code: " + "-1" + " lines are duplicated at " + "<file>" + ":" + "-1"

View File

@@ -16,15 +16,7 @@
*/
import python
import CodeDuplication
predicate relevant(Function m) { m.getMetrics().getNumberOfLinesOfCode() > 5 }
from Function m, Function other, string message, int percent
where
duplicateScopes(m, other, percent, message) and
relevant(m) and
percent > 95.0 and
not duplicateScopes(m.getEnclosingModule(), other.getEnclosingModule(), _, _) and
not duplicateScopes(m.getScope(), other.getScope(), _, _)
from Function m, Function other, string message
where none()
select m, message, other, other.getName()

View File

@@ -16,11 +16,7 @@
*/
import python
import CodeDuplication
from Class c, Class other, string message
where
duplicateScopes(c, other, _, message) and
count(c.getAStmt()) > 3 and
not duplicateScopes(c.getEnclosingModule(), _, _, _)
where none()
select c, message, other, other.getName()

View File

@@ -16,8 +16,7 @@
*/
import python
import CodeDuplication
from Module m, Module other, int percent, string message
where duplicateScopes(m, other, percent, message)
from Module m, Module other, string message
where none()
select m, message, other, other.getName()

View File

@@ -16,8 +16,7 @@
*/
import python
import CodeDuplication
from Module m, Module other, string message
where similarScopes(m, other, _, message)
where none()
select m, message, other, other.getName()

View File

@@ -16,16 +16,7 @@
*/
import python
import CodeDuplication
predicate relevant(Function m) { m.getMetrics().getNumberOfLinesOfCode() > 10 }
from Function m, Function other, string message, int percent
where
similarScopes(m, other, percent, message) and
relevant(m) and
percent > 95.0 and
not duplicateScopes(m, other, _, _) and
not duplicateScopes(m.getEnclosingModule(), other.getEnclosingModule(), _, _) and
not duplicateScopes(m.getScope(), other.getScope(), _, _)
from Function m, Function other, string message
where none()
select m, message, other, other.getName()

View File

@@ -6,7 +6,7 @@
* directed and labeled; they specify how the components represented by nodes relate to each other.
*/
import python
private import python
import semmle.python.dataflow.new.DataFlow
/**
@@ -55,7 +55,7 @@ module API {
/**
* Gets a call to the function represented by this API component.
*/
DataFlow::Node getACall() { result = getReturn().getAnImmediateUse() }
DataFlow::CallCfgNode getACall() { result = getReturn().getAnImmediateUse() }
/**
* Gets a node representing member `m` of this API component.

View File

@@ -526,3 +526,118 @@ module HTTP {
}
}
}
/** Provides models for cryptographic things. */
module Cryptography {
/** Provides models for public-key cryptography, also called asymmetric cryptography. */
module PublicKey {
/**
* A data-flow node that generates a new key-pair for use with public-key cryptography.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `KeyGeneration::Range` instead.
*/
class KeyGeneration extends DataFlow::Node {
KeyGeneration::Range range;
KeyGeneration() { this = range }
/** Gets the name of the cryptographic algorithm (for example `"RSA"` or `"AES"`). */
string getName() { result = range.getName() }
/** Gets the argument that specifies the size of the key in bits, if available. */
DataFlow::Node getKeySizeArg() { result = range.getKeySizeArg() }
/**
* Gets the size of the key generated (in bits), as well as the `origin` that
* explains how we obtained this specific key size.
*/
int getKeySizeWithOrigin(DataFlow::Node origin) {
result = range.getKeySizeWithOrigin(origin)
}
/** Gets the minimum key size (in bits) for this algorithm to be considered secure. */
int minimumSecureKeySize() { result = range.minimumSecureKeySize() }
}
/** Provides classes for modeling new key-pair generation APIs. */
module KeyGeneration {
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
private DataFlow::LocalSourceNode keysizeBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
arg = any(KeyGeneration::Range r).getKeySizeArg() and
result = arg.getALocalSource()
or
// Due to bad performance when using normal setup with we have inlined that code and forced a join
exists(DataFlow::TypeBackTracker t2 |
exists(DataFlow::StepSummary summary |
keysizeBacktracker_first_join(t2, arg, result, summary) and
t = t2.prepend(summary)
)
)
}
pragma[nomagic]
private predicate keysizeBacktracker_first_join(
DataFlow::TypeBackTracker t2, DataFlow::Node arg, DataFlow::Node res,
DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(res, keysizeBacktracker(t2, arg), summary)
}
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
DataFlow::LocalSourceNode keysizeBacktracker(DataFlow::Node arg) {
result = keysizeBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
/**
* A data-flow node that generates a new key-pair for use with public-key cryptography.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `KeyGeneration` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the name of the cryptographic algorithm (for example `"RSA"`). */
abstract string getName();
/** Gets the argument that specifies the size of the key in bits, if available. */
abstract DataFlow::Node getKeySizeArg();
/**
* Gets the size of the key generated (in bits), as well as the `origin` that
* explains how we obtained this specific key size.
*/
int getKeySizeWithOrigin(DataFlow::Node origin) {
origin = keysizeBacktracker(this.getKeySizeArg()) and
result = origin.asExpr().(IntegerLiteral).getValue()
}
/** Gets the minimum key size (in bits) for this algorithm to be considered secure. */
abstract int minimumSecureKeySize();
}
/** A data-flow node that generates a new RSA key-pair. */
abstract class RsaRange extends Range {
final override string getName() { result = "RSA" }
final override int minimumSecureKeySize() { result = 2048 }
}
/** A data-flow node that generates a new DSA key-pair. */
abstract class DsaRange extends Range {
final override string getName() { result = "DSA" }
final override int minimumSecureKeySize() { result = 2048 }
}
/** A data-flow node that generates a new ECC key-pair. */
abstract class EccRange extends Range {
final override string getName() { result = "ECC" }
final override int minimumSecureKeySize() { result = 224 }
}
}
}
}

View File

@@ -2,6 +2,8 @@
* Helper file that imports all framework modeling.
*/
private import semmle.python.frameworks.Cryptodome
private import semmle.python.frameworks.Cryptography
private import semmle.python.frameworks.Dill
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric

View File

@@ -180,7 +180,7 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeN
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
* private DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
@@ -341,7 +341,7 @@ private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, Optional
* for back-tracking some callback type `myCallback`:
*
* ```
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
* private DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
* t.start() and
* result = (< some API call >).getArgument(< n >).getALocalSource()
* or

View File

@@ -3,6 +3,7 @@
import DataFlowUtil
import DataFlowPublic
private import DataFlowPrivate
private import semmle.python.types.Builtins
/**
* A data flow node that reads or writes an attribute of an object.
@@ -84,8 +85,6 @@ private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
override string getAttributeName() { result = node.getName() }
}
import semmle.python.types.Builtins
/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */
private class BuiltInCallNode extends CallNode {
string name;

View File

@@ -26,15 +26,243 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
tupleLimit = 1000
}
/**
* Provides a simple data-flow analysis for resolving lambda calls. The analysis
* currently excludes read-steps, store-steps, and flow-through.
*
* The analysis uses non-linear recursion: When computing a flow path in or out
* of a call, we use the results of the analysis recursively to resolve lamba
* calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
*/
private module LambdaFlow {
private predicate viableParamNonLambda(DataFlowCall call, int i, ParameterNode p) {
p.isParameterOf(viableCallable(call), i)
}
private predicate viableParamLambda(DataFlowCall call, int i, ParameterNode p) {
p.isParameterOf(viableCallableLambda(call, _), i)
}
private predicate viableParamArgNonLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
exists(int i |
viableParamNonLambda(call, i, p) and
arg.argumentOf(call, i)
)
}
private predicate viableParamArgLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
exists(int i |
viableParamLambda(call, i, p) and
arg.argumentOf(call, i)
)
}
private newtype TReturnPositionSimple =
TReturnPositionSimple0(DataFlowCallable c, ReturnKind kind) {
exists(ReturnNode ret |
c = getNodeEnclosingCallable(ret) and
kind = ret.getKind()
)
}
pragma[noinline]
private TReturnPositionSimple getReturnPositionSimple(ReturnNode ret, ReturnKind kind) {
result = TReturnPositionSimple0(getNodeEnclosingCallable(ret), kind)
}
pragma[nomagic]
private TReturnPositionSimple viableReturnPosNonLambda(DataFlowCall call, ReturnKind kind) {
result = TReturnPositionSimple0(viableCallable(call), kind)
}
pragma[nomagic]
private TReturnPositionSimple viableReturnPosLambda(
DataFlowCall call, DataFlowCallOption lastCall, ReturnKind kind
) {
result = TReturnPositionSimple0(viableCallableLambda(call, lastCall), kind)
}
private predicate viableReturnPosOutNonLambda(
DataFlowCall call, TReturnPositionSimple pos, OutNode out
) {
exists(ReturnKind kind |
pos = viableReturnPosNonLambda(call, kind) and
out = getAnOutNode(call, kind)
)
}
private predicate viableReturnPosOutLambda(
DataFlowCall call, DataFlowCallOption lastCall, TReturnPositionSimple pos, OutNode out
) {
exists(ReturnKind kind |
pos = viableReturnPosLambda(call, lastCall, kind) and
out = getAnOutNode(call, kind)
)
}
/**
* Holds if data can flow (inter-procedurally) from `node` (of type `t`) to
* the lambda call `lambdaCall`.
*
* The parameter `toReturn` indicates whether the path from `node` to
* `lambdaCall` goes through a return, and `toJump` whether the path goes
* through a jump step.
*
* The call context `lastCall` records the last call on the path from `node`
* to `lambdaCall`, if any. That is, `lastCall` is able to target the enclosing
* callable of `lambdaCall`.
*/
pragma[nomagic]
predicate revLambdaFlow(
DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn,
boolean toJump, DataFlowCallOption lastCall
) {
revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and
if node instanceof CastNode or node instanceof ArgumentNode or node instanceof ReturnNode
then compatibleTypes(t, getNodeType(node))
else any()
}
pragma[nomagic]
predicate revLambdaFlow0(
DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn,
boolean toJump, DataFlowCallOption lastCall
) {
lambdaCall(lambdaCall, kind, node) and
t = getNodeType(node) and
toReturn = false and
toJump = false and
lastCall = TDataFlowCallNone()
or
// local flow
exists(Node mid, DataFlowType t0 |
revLambdaFlow(lambdaCall, kind, mid, t0, toReturn, toJump, lastCall)
|
simpleLocalFlowStep(node, mid) and
t = t0
or
exists(boolean preservesValue |
additionalLambdaFlowStep(node, mid, preservesValue) and
getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid)
|
preservesValue = false and
t = getNodeType(node)
or
preservesValue = true and
t = t0
)
)
or
// jump step
exists(Node mid, DataFlowType t0 |
revLambdaFlow(lambdaCall, kind, mid, t0, _, _, _) and
toReturn = false and
toJump = true and
lastCall = TDataFlowCallNone()
|
jumpStep(node, mid) and
t = t0
or
exists(boolean preservesValue |
additionalLambdaFlowStep(node, mid, preservesValue) and
getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid)
|
preservesValue = false and
t = getNodeType(node)
or
preservesValue = true and
t = t0
)
)
or
// flow into a callable
exists(ParameterNode p, DataFlowCallOption lastCall0, DataFlowCall call |
revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and
(
if lastCall0 = TDataFlowCallNone() and toJump = false
then lastCall = TDataFlowCallSome(call)
else lastCall = lastCall0
) and
toReturn = false
|
viableParamArgNonLambda(call, p, node)
or
viableParamArgLambda(call, p, node) // non-linear recursion
)
or
// flow out of a callable
exists(TReturnPositionSimple pos |
revLambdaFlowOut(lambdaCall, kind, pos, t, toJump, lastCall) and
getReturnPositionSimple(node, node.(ReturnNode).getKind()) = pos and
toReturn = true
)
}
pragma[nomagic]
predicate revLambdaFlowOutLambdaCall(
DataFlowCall lambdaCall, LambdaCallKind kind, OutNode out, DataFlowType t, boolean toJump,
DataFlowCall call, DataFlowCallOption lastCall
) {
revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and
exists(ReturnKindExt rk |
out = rk.getAnOutNode(call) and
lambdaCall(call, _, _)
)
}
pragma[nomagic]
predicate revLambdaFlowOut(
DataFlowCall lambdaCall, LambdaCallKind kind, TReturnPositionSimple pos, DataFlowType t,
boolean toJump, DataFlowCallOption lastCall
) {
exists(DataFlowCall call, OutNode out |
revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and
viableReturnPosOutNonLambda(call, pos, out)
or
// non-linear recursion
revLambdaFlowOutLambdaCall(lambdaCall, kind, out, t, toJump, call, lastCall) and
viableReturnPosOutLambda(call, _, pos, out)
)
}
pragma[nomagic]
predicate revLambdaFlowIn(
DataFlowCall lambdaCall, LambdaCallKind kind, ParameterNode p, DataFlowType t, boolean toJump,
DataFlowCallOption lastCall
) {
revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall)
}
}
private DataFlowCallable viableCallableExt(DataFlowCall call) {
result = viableCallable(call)
or
result = viableCallableLambda(call, _)
}
cached
private module Cached {
/**
* Gets a viable target for the lambda call `call`.
*
* `lastCall` records the call required to reach `call` in order for the result
* to be a viable target, if any.
*/
cached
DataFlowCallable viableCallableLambda(DataFlowCall call, DataFlowCallOption lastCall) {
exists(Node creation, LambdaCallKind kind |
LambdaFlow::revLambdaFlow(call, kind, creation, _, _, _, lastCall) and
lambdaCreation(creation, kind, result)
)
}
/**
* Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
* The instance parameter is considered to have index `-1`.
*/
pragma[nomagic]
private predicate viableParam(DataFlowCall call, int i, ParameterNode p) {
p.isParameterOf(viableCallable(call), i)
p.isParameterOf(viableCallableExt(call), i)
}
/**
@@ -52,7 +280,7 @@ private module Cached {
pragma[nomagic]
private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKindExt kind) {
viableCallable(call) = result.getCallable() and
viableCallableExt(call) = result.getCallable() and
kind = result.getKind()
}
@@ -317,6 +545,35 @@ private module Cached {
cached
private module DispatchWithCallContext {
/**
* Holds if the set of viable implementations that can be called by `call`
* might be improved by knowing the call context.
*/
pragma[nomagic]
private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) {
mayBenefitFromCallContext(call, callable)
or
callable = call.getEnclosingCallable() and
exists(viableCallableLambda(call, TDataFlowCallSome(_)))
}
/**
* Gets a viable dispatch target of `call` in the context `ctx`. This is
* restricted to those `call`s for which a context might make a difference.
*/
pragma[nomagic]
private DataFlowCallable viableImplInCallContextExt(DataFlowCall call, DataFlowCall ctx) {
result = viableImplInCallContext(call, ctx)
or
result = viableCallableLambda(call, TDataFlowCallSome(ctx))
or
exists(DataFlowCallable enclosing |
mayBenefitFromCallContextExt(call, enclosing) and
enclosing = viableCallableExt(ctx) and
result = viableCallableLambda(call, TDataFlowCallNone())
)
}
/**
* Holds if the call context `ctx` reduces the set of viable run-time
* dispatch targets of call `call` in `c`.
@@ -324,10 +581,10 @@ private module Cached {
cached
predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) {
exists(int tgts, int ctxtgts |
mayBenefitFromCallContext(call, c) and
c = viableCallable(ctx) and
ctxtgts = count(viableImplInCallContext(call, ctx)) and
tgts = strictcount(viableCallable(call)) and
mayBenefitFromCallContextExt(call, c) and
c = viableCallableExt(ctx) and
ctxtgts = count(viableImplInCallContextExt(call, ctx)) and
tgts = strictcount(viableCallableExt(call)) and
ctxtgts < tgts
)
}
@@ -339,7 +596,7 @@ private module Cached {
*/
cached
DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
result = viableImplInCallContext(call, ctx) and
result = viableImplInCallContextExt(call, ctx) and
reducedViableImplInCallContext(call, _, ctx)
}
@@ -351,10 +608,10 @@ private module Cached {
cached
predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) {
exists(int tgts, int ctxtgts |
mayBenefitFromCallContext(call, _) and
c = viableCallable(call) and
ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContext(call, ctx)) and
tgts = strictcount(DataFlowCall ctx | viableCallable(ctx) = call.getEnclosingCallable()) and
mayBenefitFromCallContextExt(call, _) and
c = viableCallableExt(call) and
ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and
tgts = strictcount(DataFlowCall ctx | viableCallableExt(ctx) = call.getEnclosingCallable()) and
ctxtgts < tgts
)
}
@@ -367,7 +624,7 @@ private module Cached {
*/
cached
DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) {
result = viableImplInCallContext(call, ctx) and
result = viableImplInCallContextExt(call, ctx) and
reducedViableImplInReturn(result, call)
}
}
@@ -481,6 +738,11 @@ private module Cached {
TBooleanNone() or
TBooleanSome(boolean b) { b = true or b = false }
cached
newtype TDataFlowCallOption =
TDataFlowCallNone() or
TDataFlowCallSome(DataFlowCall call)
cached
newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) }
@@ -777,7 +1039,7 @@ ReturnPosition getReturnPosition(ReturnNodeExt ret) {
bindingset[cc, callable]
predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
cc instanceof CallContextAny and callable = viableCallable(call)
cc instanceof CallContextAny and callable = viableCallableExt(call)
or
exists(DataFlowCallable c0, DataFlowCall call0 |
call0.getEnclosingCallable() = callable and
@@ -791,14 +1053,14 @@ DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
if reducedViableImplInCallContext(call, _, ctx)
then result = prunedViableImplInCallContext(call, ctx)
else result = viableCallable(call)
else result = viableCallableExt(call)
)
or
result = viableCallable(call) and cc instanceof CallContextSomeCall
result = viableCallableExt(call) and cc instanceof CallContextSomeCall
or
result = viableCallable(call) and cc instanceof CallContextAny
result = viableCallableExt(call) and cc instanceof CallContextAny
or
result = viableCallable(call) and cc instanceof CallContextReturn
result = viableCallableExt(call) and cc instanceof CallContextReturn
}
predicate read = readStep/3;
@@ -812,6 +1074,19 @@ class BooleanOption extends TBooleanOption {
}
}
/** An optional `DataFlowCall`. */
class DataFlowCallOption extends TDataFlowCallOption {
string toString() {
this = TDataFlowCallNone() and
result = "(none)"
or
exists(DataFlowCall call |
this = TDataFlowCallSome(call) and
result = call.toString()
)
}
}
/** Content tagged with the type of a containing object. */
class TypedContent extends MkTypedContent {
private Content c;

View File

@@ -1517,10 +1517,13 @@ predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
or
c instanceof SetElementContent
or
c instanceof TupleElementContent
c = small_tuple()
)
}
pragma[noinline]
TupleElementContent small_tuple() { result.getIndex() <= 7 }
/**
* Holds if `nodeTo` is a read of an attribute (corresponding to `c`) of the object in `nodeFrom`.
*
@@ -1605,3 +1608,14 @@ int accessPathLimit() { result = 5 }
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) { none() }
class LambdaCallKind = Unit;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() }
/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }
/** Extra data-flow steps needed for lamba flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }

View File

@@ -10,14 +10,22 @@ import python
import DataFlowPublic
private import DataFlowPrivate
private predicate comes_from_cfgnode(Node node) {
exists(CfgNode first, Node second |
simpleLocalFlowStep(first, second) and
simpleLocalFlowStep*(second, node)
)
}
/**
* A data flow node that is a source of local flow. This includes things like
* - Expressions
* - Function parameters
*/
class LocalSourceNode extends Node {
cached
LocalSourceNode() {
not simpleLocalFlowStep+(any(CfgNode n), this) and
not comes_from_cfgnode(this) and
not this instanceof ModuleVariableNode
or
this = any(ModuleVariableNode mvn).getARead()
@@ -65,15 +73,12 @@ private module Cached {
* The slightly backwards parametering ordering is to force correct indexing.
*/
cached
predicate hasLocalSource(Node sink, Node source) {
// Declaring `source` to be a `LocalSourceNode` currently causes a redundant check in the
// recursive case, so instead we check it explicitly here.
source = sink and
source instanceof LocalSourceNode
predicate hasLocalSource(Node sink, LocalSourceNode source) {
source = sink
or
exists(Node mid |
hasLocalSource(mid, source) and
simpleLocalFlowStep(mid, sink)
exists(Node second |
simpleLocalFlowStep(source, second) and
simpleLocalFlowStep*(second, sink)
)
}

View File

@@ -0,0 +1,104 @@
/**
* Provides classes modeling security-relevant aspects of
* - the `pycryptodome` PyPI package (imported as `Crypto`)
* - the `pycryptodomex` PyPI package (imported as `Cryptodome`)
* See https://pycryptodome.readthedocs.io/en/latest/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for
* - the `pycryptodome` PyPI package (imported as `Crypto`)
* - the `pycryptodomex` PyPI package (imported as `Cryptodome`)
* See https://pycryptodome.readthedocs.io/en/latest/
*/
private module CryptodomeModel {
// ---------------------------------------------------------------------------
/**
* A call to `Cryptodome.PublicKey.RSA.generate`/`Crypto.PublicKey.RSA.generate`
*
* See https://pycryptodome.readthedocs.io/en/latest/src/public_key/rsa.html#Crypto.PublicKey.RSA.generate
*/
class CryptodomePublicKeyRsaGenerateCall extends Cryptography::PublicKey::KeyGeneration::RsaRange,
DataFlow::CallCfgNode {
CryptodomePublicKeyRsaGenerateCall() {
this =
API::moduleImport(["Crypto", "Cryptodome"])
.getMember("PublicKey")
.getMember("RSA")
.getMember("generate")
.getACall()
}
override DataFlow::Node getKeySizeArg() {
result in [this.getArg(0), this.getArgByName("bits")]
}
}
/**
* A call to `Cryptodome.PublicKey.DSA.generate`/`Crypto.PublicKey.DSA.generate`
*
* See https://pycryptodome.readthedocs.io/en/latest/src/public_key/dsa.html#Crypto.PublicKey.DSA.generate
*/
class CryptodomePublicKeyDsaGenerateCall extends Cryptography::PublicKey::KeyGeneration::DsaRange,
DataFlow::CallCfgNode {
CryptodomePublicKeyDsaGenerateCall() {
this =
API::moduleImport(["Crypto", "Cryptodome"])
.getMember("PublicKey")
.getMember("DSA")
.getMember("generate")
.getACall()
}
override DataFlow::Node getKeySizeArg() {
result in [this.getArg(0), this.getArgByName("bits")]
}
}
/**
* A call to `Cryptodome.PublicKey.ECC.generate`/`Crypto.PublicKey.ECC.generate`
*
* See https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html#Crypto.PublicKey.ECC.generate
*/
class CryptodomePublicKeyEccGenerateCall extends Cryptography::PublicKey::KeyGeneration::EccRange,
DataFlow::CallCfgNode {
CryptodomePublicKeyEccGenerateCall() {
this =
API::moduleImport(["Crypto", "Cryptodome"])
.getMember("PublicKey")
.getMember("ECC")
.getMember("generate")
.getACall()
}
/** Gets the argument that specifies the curve to use (a string). */
DataFlow::Node getCurveArg() { result = this.getArgByName("curve") }
/** Gets the name of the curve to use, as well as the origin that explains how we obtained this name. */
string getCurveWithOrigin(DataFlow::Node origin) {
exists(StrConst str | origin = DataFlow::exprNode(str) |
origin = this.getCurveArg().getALocalSource() and
result = str.getText()
)
}
override int getKeySizeWithOrigin(DataFlow::Node origin) {
exists(string curve | curve = this.getCurveWithOrigin(origin) |
// using list from https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html
curve in ["NIST P-256", "p256", "P-256", "prime256v1", "secp256r1"] and result = 256
or
curve in ["NIST P-384", "p384", "P-384", "prime384v1", "secp384r1"] and result = 384
or
curve in ["NIST P-521", "p521", "P-521", "prime521v1", "secp521r1"] and result = 521
)
}
// Note: There is not really a key-size argument, since it's always specified by the curve.
override DataFlow::Node getKeySizeArg() { none() }
}
}

View File

@@ -0,0 +1,184 @@
/**
* Provides classes modeling security-relevant aspects of the `cryptography` PyPI package.
* See https://cryptography.io/en/latest/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `cryptography` PyPI package.
* See https://cryptography.io/en/latest/.
*/
private module CryptographyModel {
/**
* Provides helper predicates for the eliptic curve cryptography parts in
* `cryptography.hazmat.primitives.asymmetric.ec`.
*/
module Ecc {
/**
* Gets a predefined curve class from
* `cryptography.hazmat.primitives.asymmetric.ec` with a specific key size (in bits).
*/
private DataFlow::Node curveClassWithKeySize(int keySize) {
exists(string curveName |
result =
API::moduleImport("cryptography")
.getMember("hazmat")
.getMember("primitives")
.getMember("asymmetric")
.getMember("ec")
.getMember(curveName)
.getAUse()
|
// obtained by manually looking at source code in
// https://github.com/pyca/cryptography/blob/cba69f1922803f4f29a3fde01741890d88b8e217/src/cryptography/hazmat/primitives/asymmetric/ec.py#L208-L300
curveName = "SECT571R1" and keySize = 570 // Indeed the numbers do not match.
or
curveName = "SECT409R1" and keySize = 409
or
curveName = "SECT283R1" and keySize = 283
or
curveName = "SECT233R1" and keySize = 233
or
curveName = "SECT163R2" and keySize = 163
or
curveName = "SECT571K1" and keySize = 571
or
curveName = "SECT409K1" and keySize = 409
or
curveName = "SECT283K1" and keySize = 283
or
curveName = "SECT233K1" and keySize = 233
or
curveName = "SECT163K1" and keySize = 163
or
curveName = "SECP521R1" and keySize = 521
or
curveName = "SECP384R1" and keySize = 384
or
curveName = "SECP256R1" and keySize = 256
or
curveName = "SECP256K1" and keySize = 256
or
curveName = "SECP224R1" and keySize = 224
or
curveName = "SECP192R1" and keySize = 192
or
curveName = "BrainpoolP256R1" and keySize = 256
or
curveName = "BrainpoolP384R1" and keySize = 384
or
curveName = "BrainpoolP512R1" and keySize = 512
)
}
/** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */
private DataFlow::LocalSourceNode curveClassInstanceWithKeySize(
DataFlow::TypeTracker t, int keySize, DataFlow::Node origin
) {
t.start() and
result.(DataFlow::CallCfgNode).getFunction() = curveClassWithKeySize(keySize) and
origin = result
or
// Due to bad performance when using normal setup with we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
curveClassInstanceWithKeySize_first_join(t2, keySize, origin, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate curveClassInstanceWithKeySize_first_join(
DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res,
DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(curveClassInstanceWithKeySize(t2, keySize, origin), res, summary)
}
/** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */
DataFlow::Node curveClassInstanceWithKeySize(int keySize, DataFlow::Node origin) {
curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize, origin).flowsTo(result)
}
}
// ---------------------------------------------------------------------------
/**
* A call to `cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key`
*
* See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/rsa.html#cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key
*/
class CryptographyRsaGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::RsaRange,
DataFlow::CallCfgNode {
CryptographyRsaGeneratePrivateKeyCall() {
this =
API::moduleImport("cryptography")
.getMember("hazmat")
.getMember("primitives")
.getMember("asymmetric")
.getMember("rsa")
.getMember("generate_private_key")
.getACall()
}
override DataFlow::Node getKeySizeArg() {
result in [this.getArg(1), this.getArgByName("key_size")]
}
}
/**
* A call to `cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key`
*
* See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/dsa.html#cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key
*/
class CryptographyDsaGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::DsaRange,
DataFlow::CallCfgNode {
CryptographyDsaGeneratePrivateKeyCall() {
this =
API::moduleImport("cryptography")
.getMember("hazmat")
.getMember("primitives")
.getMember("asymmetric")
.getMember("dsa")
.getMember("generate_private_key")
.getACall()
}
override DataFlow::Node getKeySizeArg() {
result in [this.getArg(0), this.getArgByName("key_size")]
}
}
/**
* A call to `cryptography.hazmat.primitives.asymmetric.ec.generate_private_key`
*
* See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/ec.html#cryptography.hazmat.primitives.asymmetric.ec.generate_private_key
*/
class CryptographyEcGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::EccRange,
DataFlow::CallCfgNode {
CryptographyEcGeneratePrivateKeyCall() {
this =
API::moduleImport("cryptography")
.getMember("hazmat")
.getMember("primitives")
.getMember("asymmetric")
.getMember("ec")
.getMember("generate_private_key")
.getACall()
}
/** Gets the argument that specifies the curve to use. */
DataFlow::Node getCurveArg() { result in [this.getArg(0), this.getArgByName("curve")] }
override int getKeySizeWithOrigin(DataFlow::Node origin) {
this.getCurveArg() = Ecc::curveClassInstanceWithKeySize(result, origin)
}
// Note: There is not really a key-size argument, since it's always specified by the curve.
override DataFlow::Node getKeySizeArg() { none() }
}
}

View File

@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
private import semmle.python.regex
@@ -1975,6 +1976,205 @@ private module Django {
}
}
/** Provides models for django forms (defined in the `django.forms` module) */
module Forms {
/**
* Provides models for the `django.forms.forms.BaseForm` class and subclasses. This
* is usually used by the `django.forms.forms.Form` class, which is also available
* under the more commonly used alias `django.forms.Form`.
*
* See https://docs.djangoproject.com/en/3.1/ref/forms/api/
*/
module Form {
/** Gets a reference to the `django.forms.forms.BaseForm` class or any subclass. */
API::Node subclassRef() {
// canonical definition
result =
API::moduleImport("django")
.getMember("forms")
.getMember("forms")
.getMember(["BaseForm", "Form"])
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("forms")
.getMember("models")
.getMember(["BaseModelForm", "ModelForm"])
.getASubclass*()
or
// aliases from `django.forms`
result =
API::moduleImport("django")
.getMember("forms")
.getMember(["BaseForm", "Form", "BaseModelForm", "ModelForm"])
.getASubclass*()
or
// other Form subclasses defined in Django
result =
API::moduleImport("django")
.getMember("contrib")
.getMember("admin")
.getMember("forms")
.getMember(["AdminAuthenticationForm", "AdminPasswordChangeForm"])
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("contrib")
.getMember("admin")
.getMember("helpers")
.getMember("ActionForm")
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("contrib")
.getMember("admin")
.getMember("views")
.getMember("main")
.getMember("ChangeListSearchForm")
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("contrib")
.getMember("auth")
.getMember("forms")
.getMember([
"PasswordResetForm", "UserChangeForm", "SetPasswordForm",
"AdminPasswordChangeForm", "PasswordChangeForm", "AuthenticationForm",
"UserCreationForm"
])
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("contrib")
.getMember("flatpages")
.getMember("forms")
.getMember("FlatpageForm")
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("forms")
.getMember("formsets")
.getMember("ManagementForm")
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("forms")
.getMember("models")
.getMember(["ModelForm", "BaseModelForm"])
.getASubclass*()
}
}
/**
* Provides models for the `django.forms.fields.Field` class and subclasses. This is
* also available under the more commonly used alias `django.forms.Field`.
*
* See https://docs.djangoproject.com/en/3.1/ref/forms/fields/
*/
module Field {
/** Gets a reference to the `django.forms.fields.Field` class or any subclass. */
API::Node subclassRef() {
exists(string modName, string clsName |
// canonical definition
result =
API::moduleImport("django")
.getMember("forms")
.getMember(modName)
.getMember(clsName)
.getASubclass*()
or
// alias from `django.forms`
result = API::moduleImport("django").getMember("forms").getMember(clsName).getASubclass*()
|
modName = "fields" and
clsName in [
"Field",
// Known subclasses
"BooleanField", "IntegerField", "CharField", "SlugField", "DateTimeField",
"EmailField", "DateField", "TimeField", "DurationField", "DecimalField", "FloatField",
"GenericIPAddressField", "UUIDField", "JSONField", "FilePathField",
"NullBooleanField", "URLField", "TypedChoiceField", "FileField", "ImageField",
"RegexField", "ChoiceField", "MultipleChoiceField", "ComboField", "MultiValueField",
"SplitDateTimeField", "TypedMultipleChoiceField", "BaseTemporalField"
]
or
// Known subclasses from `django.forms.models`
modName = "models" and
clsName in ["ModelChoiceField", "ModelMultipleChoiceField", "InlineForeignKeyField"]
)
or
// other Field subclasses defined in Django
result =
API::moduleImport("django")
.getMember("contrib")
.getMember("auth")
.getMember("forms")
.getMember(["ReadOnlyPasswordHashField", "UsernameField"])
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("contrib")
.getMember("gis")
.getMember("forms")
.getMember("fields")
.getMember([
"GeometryCollectionField", "GeometryField", "LineStringField",
"MultiLineStringField", "MultiPointField", "MultiPolygonField", "PointField",
"PolygonField"
])
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("contrib")
.getMember("postgres")
.getMember("forms")
.getMember("array")
.getMember(["SimpleArrayField", "SplitArrayField"])
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("contrib")
.getMember("postgres")
.getMember("forms")
.getMember("hstore")
.getMember("HStoreField")
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("contrib")
.getMember("postgres")
.getMember("forms")
.getMember("ranges")
.getMember([
"BaseRangeField", "DateRangeField", "DateTimeRangeField", "DecimalRangeField",
"IntegerRangeField"
])
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("forms")
.getMember("models")
.getMember(["InlineForeignKeyField", "ModelChoiceField", "ModelMultipleChoiceField"])
.getASubclass*()
}
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/**
* Gets the last decorator call for the function `func`, if `func` has decorators.
*/
@@ -1983,6 +2183,96 @@ private module Django {
not exists(Call other_decorator | other_decorator.getArg(0) = result)
}
/** Adds the `getASelfRef` member predicate when modeling a class. */
abstract private class SelfRefMixin extends Class {
/**
* Gets a reference to instances of this class, originating from a self parameter of
* a method defined on this class.
*
* Note: TODO: This doesn't take MRO into account
* Note: TODO: This doesn't take staticmethod/classmethod into account
*/
private DataFlow::Node getASelfRef(DataFlow::TypeTracker t) {
t.start() and
result.(DataFlow::ParameterNode).getParameter() = this.getAMethod().getArg(0)
or
exists(DataFlow::TypeTracker t2 | result = this.getASelfRef(t2).track(t2, t))
}
/**
* Gets a reference to instances of this class, originating from a self parameter of
* a method defined on this class.
*
* Note: TODO: This doesn't take MRO into account
* Note: TODO: This doesn't take staticmethod/classmethod into account
*/
DataFlow::Node getASelfRef() { result = this.getASelfRef(DataFlow::TypeTracker::end()) }
}
// ---------------------------------------------------------------------------
// Form and form field modeling
// ---------------------------------------------------------------------------
/**
* A class that is a subclass of the `django.forms.Form` class,
* thereby handling user input.
*/
class DjangoFormClass extends Class, SelfRefMixin {
DjangoFormClass() { this.getABase() = Django::Forms::Form::subclassRef().getAUse().asExpr() }
}
/**
* A source of cleaned_data (either the return value from `super().clean()`, or a reference to `self.cleaned_data`)
*
* See https://docs.djangoproject.com/en/3.1/ref/forms/validation/#form-and-field-validation
*/
private class DjangoFormCleanedData extends RemoteFlowSource::Range, DataFlow::Node {
DjangoFormCleanedData() {
exists(DjangoFormClass cls, Function meth |
cls.getAMethod() = meth and
(
this = API::builtin("super").getReturn().getMember("clean").getACall() and
this.getScope() = meth
or
this.(DataFlow::AttrRead).getAttributeName() = "cleaned_data" and
this.(DataFlow::AttrRead).getObject() = cls.getASelfRef()
)
)
}
override string getSourceType() {
result = "django.forms.Field subclass, value parameter in method"
}
}
/**
* A class that is a subclass of the `django.forms.Field` class,
* thereby handling user input.
*/
class DjangoFormFieldClass extends Class {
DjangoFormFieldClass() {
this.getABase() = Django::Forms::Field::subclassRef().getAUse().asExpr()
}
}
/**
* A parameter in a method on a `DjangoFormFieldClass` that receives the user-supplied value for this field.
*
* See https://docs.djangoproject.com/en/3.1/ref/forms/validation/#form-and-field-validation
*/
private class DjangoFormFieldValueParam extends RemoteFlowSource::Range, DataFlow::ParameterNode {
DjangoFormFieldValueParam() {
exists(DjangoFormFieldClass cls, Function meth |
cls.getAMethod() = meth and
meth.getName() in ["to_python", "validate", "run_validators", "clean"] and
this.getParameter() = meth.getArg(1)
)
}
override string getSourceType() {
result = "django.forms.Field subclass, value parameter in method"
}
}
// ---------------------------------------------------------------------------
// routing modeling
// ---------------------------------------------------------------------------
@@ -2068,7 +2358,7 @@ private module Django {
}
/** A class that we consider a django View class. */
abstract class DjangoViewClass extends DjangoViewClassHelper {
abstract class DjangoViewClass extends DjangoViewClassHelper, SelfRefMixin {
/** Gets a function that could handle incoming requests, if any. */
Function getARequestHandler() {
// TODO: This doesn't handle attribute assignment. Should be OK, but analysis is not as complete as with
@@ -2080,29 +2370,6 @@ private module Django {
result.getName() = "get_redirect_url"
)
}
/**
* Gets a reference to instances of this class, originating from a self parameter of
* a method defined on this class.
*
* Note: TODO: This doesn't take MRO into account
* Note: TODO: This doesn't take staticmethod/classmethod into account
*/
private DataFlow::Node getASelfRef(DataFlow::TypeTracker t) {
t.start() and
result.(DataFlow::ParameterNode).getParameter() = this.getAMethod().getArg(0)
or
exists(DataFlow::TypeTracker t2 | result = this.getASelfRef(t2).track(t2, t))
}
/**
* Gets a reference to instances of this class, originating from a self parameter of
* a method defined on this class.
*
* Note: TODO: This doesn't take MRO into account
* Note: TODO: This doesn't take staticmethod/classmethod into account
*/
DataFlow::Node getASelfRef() { result = this.getASelfRef(DataFlow::TypeTracker::end()) }
}
/**
@@ -2393,7 +2660,7 @@ private module Django {
}
override string getSourceType() {
result = "django.http.request.HttpRequest (attribute on self in View class)"
result = "django HttpRequest from self.request in View class"
}
}
@@ -2413,7 +2680,7 @@ private module Django {
}
override string getSourceType() {
result = "django routed param from attribute on self in View class"
result = "django routed param from self.args/kwargs in View class"
}
}

View File

@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import PEP249
/** Provides models for the Python standard library. */
@@ -684,96 +685,35 @@ private module Stdlib {
// ---------------------------------------------------------------------------
// builtins
// ---------------------------------------------------------------------------
/** Gets a reference to the `builtins` module (called `__builtin__` in Python 2). */
private DataFlow::Node builtins(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode(["builtins", "__builtin__"])
or
exists(DataFlow::TypeTracker t2 | result = builtins(t2).track(t2, t))
}
/** Gets a reference to the `builtins` module. */
DataFlow::Node builtins() { result = builtins(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `builtins` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node builtins_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["exec", "eval", "compile", "open"] and
(
t.start() and
result = DataFlow::importNode(["builtins", "__builtin__"] + "." + attr_name)
or
t.startInAttr(attr_name) and
result = DataFlow::importNode(["builtins", "__builtin__"])
or
// special handling of builtins, that are in scope without any imports
// TODO: Take care of overrides, either `def eval: ...`, `eval = ...`, or `builtins.eval = ...`
t.start() and
exists(NameNode ref | result.asCfgNode() = ref |
ref.isGlobal() and
ref.getId() = attr_name and
ref.isLoad()
)
)
or
// Due to bad performance when using normal setup with `builtins_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
builtins_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate builtins_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(builtins_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `builtins` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node builtins_attr(string attr_name) {
result = builtins_attr(DataFlow::TypeTracker::end(), attr_name)
}
/**
* A call to the builtin `exec` function.
* See https://docs.python.org/3/library/functions.html#exec
*/
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CfgNode {
override CallNode node;
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CallCfgNode {
BuiltinsExecCall() { this = API::builtin("exec").getACall() }
BuiltinsExecCall() { node.getFunction() = builtins_attr("exec").asCfgNode() }
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getCode() { result = this.getArg(0) }
}
/**
* A call to the builtin `eval` function.
* See https://docs.python.org/3/library/functions.html#eval
*/
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CfgNode {
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CallCfgNode {
override CallNode node;
BuiltinsEvalCall() { node.getFunction() = builtins_attr("eval").asCfgNode() }
BuiltinsEvalCall() { this = API::builtin("eval").getACall() }
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getCode() { result = this.getArg(0) }
}
/** An additional taint step for calls to the builtin function `compile` */
private class BuiltinsCompileCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(CallNode call |
nodeTo.asCfgNode() = call and
call.getFunction() = builtins_attr("compile").asCfgNode() and
nodeFrom.asCfgNode() in [call.getArg(0), call.getArgByName("source")]
exists(DataFlow::CallCfgNode call |
nodeTo = call and
call = API::builtin("compile").getACall() and
nodeFrom in [call.getArg(0), call.getArgByName("source")]
)
}
}
@@ -782,23 +722,22 @@ private module Stdlib {
* A call to the builtin `open` function.
* See https://docs.python.org/3/library/functions.html#open
*/
private class OpenCall extends FileSystemAccess::Range, DataFlow::CfgNode {
override CallNode node;
private class OpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
OpenCall() {
node.getFunction() = builtins_attr("open").asCfgNode()
this = API::builtin("open").getACall()
or
node.getFunction() = io_attr("open").asCfgNode()
// io.open is a special case, since it is an alias for the builtin `open`
this = API::moduleImport("io").getMember("open").getACall()
}
override DataFlow::Node getAPathArgument() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("file")]
result in [this.getArg(0), this.getArgByName("file")]
}
}
/**
* An exec statement (only Python 2).
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
* See https://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
*/
private class ExecStatement extends CodeExecution::Range {
ExecStatement() {
@@ -942,59 +881,6 @@ private module Stdlib {
}
}
// ---------------------------------------------------------------------------
// io
// ---------------------------------------------------------------------------
/** Gets a reference to the `io` module. */
private DataFlow::Node io(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("io")
or
exists(DataFlow::TypeTracker t2 | result = io(t2).track(t2, t))
}
/** Gets a reference to the `io` module. */
DataFlow::Node io() { result = io(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `io` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node io_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["open"] and
(
t.start() and
result = DataFlow::importNode("io" + "." + attr_name)
or
t.startInAttr(attr_name) and
result = io()
)
or
// Due to bad performance when using normal setup with `io_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
io_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate io_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(io_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `io` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node io_attr(string attr_name) {
result = io_attr(DataFlow::TypeTracker::end(), attr_name)
}
// ---------------------------------------------------------------------------
// json
// ---------------------------------------------------------------------------

View File

@@ -1,119 +1,72 @@
/**
* Provides classes modeling security-relevant aspects of the PyYAML package
* https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`).
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
* via `import yaml`)
*
* See
* - https://pyyaml.org/wiki/PyYAMLDocumentation
* - https://pyyaml.docsforge.com/master/documentation/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private module Yaml {
/** Gets a reference to the `yaml` module. */
private DataFlow::Node yaml(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("yaml")
or
exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t))
}
/** Gets a reference to the `yaml` module. */
DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) }
/** Provides models for the `yaml` module. */
module yaml {
/**
* Gets a reference to the attribute `attr_name` of the `yaml` module.
* WARNING: Only holds for a few predefined attributes.
*
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
*/
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in [
// functions
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
"safe_load", "safe_load_all",
// Classes
"SafeLoader", "BaseLoader"
] and
(
t.start() and
result = DataFlow::importNode("yaml." + attr_name)
or
t.startInAttr(attr_name) and
result = yaml()
)
or
// Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
yaml_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate yaml_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `yaml` module.
* WARNING: Only holds for a few predefined attributes.
*
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
*/
DataFlow::Node yaml_attr(string attr_name) {
result = yaml_attr(DataFlow::TypeTracker::end(), attr_name)
}
}
}
private import semmle.python.ApiGraphs
/**
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
* via `import yaml`)
*
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
* See
* - https://pyyaml.org/wiki/PyYAMLDocumentation
* - https://pyyaml.docsforge.com/master/documentation/
*/
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
override CallNode node;
string func_name;
YamlLoadCall() {
func_name in [
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
"safe_load", "safe_load_all"
] and
node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode()
}
private module Yaml {
/**
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
*
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
*/
override predicate mayExecuteInput() {
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
or
func_name in ["load", "load_all"] and
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
// then the default loader will be used, which is not safe.
not exists(DataFlow::Node loader_arg |
loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")]
|
loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"])
)
private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
override CallNode node;
string func_name;
YamlLoadCall() {
func_name in [
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
"safe_load", "safe_load_all"
] and
this = API::moduleImport("yaml").getMember(func_name).getACall()
}
/**
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
*/
override predicate mayExecuteInput() {
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
or
func_name in ["load", "load_all"] and
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
// then the default loader will be used, which is not safe.
not exists(DataFlow::Node loader_arg |
loader_arg in [this.getArg(1), this.getArgByName("Loader")]
|
loader_arg =
API::moduleImport("yaml")
.getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"])
.getAUse()
)
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "YAML" }
}
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "YAML" }
}

View File

@@ -61,8 +61,8 @@ abstract class ClassObjectInternal extends ObjectInternal {
pragma[noinline]
override predicate binds(ObjectInternal instance, string name, ObjectInternal descriptor) {
instance = this and
PointsToInternal::attributeRequired(this, name) and
this.lookup(name, descriptor, _) and
PointsToInternal::attributeRequired(this, pragma[only_bind_into](name)) and
this.lookup(pragma[only_bind_into](name), descriptor, _) and
descriptor.isDescriptor() = true
}

View File

@@ -34,9 +34,11 @@ abstract class ConstantObjectInternal extends ObjectInternal {
pragma[noinline]
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
PointsToInternal::attributeRequired(this, name) and
PointsToInternal::attributeRequired(pragma[only_bind_into](this), pragma[only_bind_into](name)) and
exists(ObjectInternal cls_attr, CfgOrigin attr_orig |
this.getClass().(ClassObjectInternal).lookup(name, cls_attr, attr_orig) and
this.getClass()
.(ClassObjectInternal)
.lookup(pragma[only_bind_into](name), cls_attr, attr_orig) and
cls_attr.isDescriptor() = true and
cls_attr.descriptorGetInstance(this, value, origin)
)

View File

@@ -30,18 +30,19 @@ abstract class InstanceObject extends ObjectInternal {
pragma[noinline]
private predicate classAttribute(string name, ObjectInternal cls_attr) {
PointsToInternal::attributeRequired(this, name) and
this.getClass().(ClassObjectInternal).lookup(name, cls_attr, _)
PointsToInternal::attributeRequired(this, pragma[only_bind_into](name)) and
this.getClass().(ClassObjectInternal).lookup(pragma[only_bind_into](name), cls_attr, _)
}
pragma[noinline]
private predicate selfAttribute(string name, ObjectInternal value, CfgOrigin origin) {
PointsToInternal::attributeRequired(this, name) and
PointsToInternal::attributeRequired(this, pragma[only_bind_into](name)) and
exists(EssaVariable self, PythonFunctionObjectInternal init, Context callee |
this.initializer(init, callee) and
self_variable_reaching_init_exit(self) and
self.getScope() = init.getScope() and
AttributePointsTo::variableAttributePointsTo(self, callee, name, value, origin)
AttributePointsTo::variableAttributePointsTo(self, callee, pragma[only_bind_into](name),
value, origin)
)
}
@@ -316,9 +317,11 @@ class UnknownInstanceInternal extends TUnknownInstance, ObjectInternal {
pragma[noinline]
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
PointsToInternal::attributeRequired(this, name) and
PointsToInternal::attributeRequired(this, pragma[only_bind_into](name)) and
exists(ObjectInternal cls_attr, CfgOrigin attr_orig |
this.getClass().(ClassObjectInternal).lookup(name, cls_attr, attr_orig)
this.getClass()
.(ClassObjectInternal)
.lookup(pragma[only_bind_into](name), cls_attr, attr_orig)
|
cls_attr.isDescriptor() = false and value = cls_attr and origin = attr_orig
or
@@ -456,8 +459,8 @@ class SuperInstance extends TSuperInstance, ObjectInternal {
/* Helper for `attribute` */
pragma[noinline]
private predicate attribute_descriptor(string name, ObjectInternal cls_attr, CfgOrigin attr_orig) {
PointsToInternal::attributeRequired(this, name) and
this.lookup(name, cls_attr, attr_orig)
PointsToInternal::attributeRequired(this, pragma[only_bind_into](name)) and
this.lookup(pragma[only_bind_into](name), cls_attr, attr_orig)
}
private predicate lookup(string name, ObjectInternal value, CfgOrigin origin) {

View File

@@ -524,6 +524,7 @@ module PointsToInternal {
)
}
pragma[noinline]
private boolean ssa_filter_definition_bool(
PyEdgeRefinement def, PointsToContext context, ObjectInternal value, ControlFlowNode origin
) {

View File

@@ -184,7 +184,11 @@ class PointsToContext extends TPointsToContext {
/** Holds if this context can apply to the CFG node `n`. */
pragma[inline]
predicate appliesTo(ControlFlowNode n) { this.appliesToScope(n.getScope()) }
predicate appliesTo(ControlFlowNode n) {
exists(Scope s |
this.appliesToScope(pragma[only_bind_into](s)) and pragma[only_bind_into](s) = n.getScope()
)
}
/** Holds if this context is a call context. */
predicate isCall() { this = TCallContext(_, _, _) }