mirror of
https://github.com/github/codeql.git
synced 2025-12-18 18:10:39 +01:00
Resolve merge conflict
This commit is contained in:
@@ -31,7 +31,7 @@ updated to use a context manager.</p>
|
||||
</example>
|
||||
<references>
|
||||
|
||||
<li>Effbot: <a href="http://effbot.org/zone/python-with-statement.htm">Python with statement</a>.</li>
|
||||
<li>Effbot: <a href="https://web.archive.org/web/20201012110738/http://effbot.org/zone/python-with-statement.htm">Python with statement</a>.</li>
|
||||
<li>Python Standard Library: <a href="http://docs.python.org/library/stdtypes.html#context-manager-types">Context manager
|
||||
</a>.</li>
|
||||
<li>Python Language Reference: <a href="http://docs.python.org/2.7/reference/datamodel.html#with-statement-context-managers">
|
||||
|
||||
23
python/ql/src/Diagnostics/ExtractionErrors.ql
Normal file
23
python/ql/src/Diagnostics/ExtractionErrors.ql
Normal file
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* @name Python extraction errors
|
||||
* @description List all extraction errors for Python files in the source code directory.
|
||||
* @kind diagnostic
|
||||
* @id py/diagnostics/extraction-errors
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
/**
|
||||
* Gets the SARIF severity for errors.
|
||||
*
|
||||
* See point 3.27.10 in https://docs.oasis-open.org/sarif/sarif/v2.0/sarif-v2.0.html for
|
||||
* what error means.
|
||||
*/
|
||||
int getErrorSeverity() { result = 2 }
|
||||
|
||||
from SyntaxError error, File file
|
||||
where
|
||||
file = error.getFile() and
|
||||
exists(file.getRelativePath())
|
||||
select error, "Extraction failed in " + file + " with error " + error.getMessage(),
|
||||
getErrorSeverity()
|
||||
15
python/ql/src/Diagnostics/SuccessfullyExtractedFiles.ql
Normal file
15
python/ql/src/Diagnostics/SuccessfullyExtractedFiles.ql
Normal file
@@ -0,0 +1,15 @@
|
||||
/**
|
||||
* @name Successfully extracted Python files
|
||||
* @description Lists all Python files in the source code directory that were extracted
|
||||
* without encountering an error.
|
||||
* @kind diagnostic
|
||||
* @id py/diagnostics/successfully-extracted-files
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
from File file
|
||||
where
|
||||
not exists(SyntaxError e | e.getFile() = file) and
|
||||
exists(file.getRelativePath())
|
||||
select file, ""
|
||||
@@ -4,18 +4,21 @@
|
||||
* @kind problem
|
||||
* @tags security
|
||||
* correctness
|
||||
* security/cwe/cwe-78
|
||||
* @problem.severity error
|
||||
* @security-severity 5.9
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/use-of-input
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.ApiGraphs
|
||||
|
||||
from CallNode call, Context context, ControlFlowNode func
|
||||
from DataFlow::CallCfgNode call
|
||||
where
|
||||
context.getAVersion().includes(2, _) and
|
||||
call.getFunction() = func and
|
||||
func.pointsTo(context, Value::named("input"), _) and
|
||||
not func.pointsTo(context, Value::named("raw_input"), _)
|
||||
major_version() = 2 and
|
||||
call = API::builtin("input").getACall() and
|
||||
call != API::builtin("raw_input").getACall()
|
||||
select call, "The unsafe built-in function 'input' is used in Python 2."
|
||||
|
||||
@@ -36,7 +36,7 @@ function with a default of <code>default=None</code>, check if the parameter is
|
||||
</example>
|
||||
<references>
|
||||
|
||||
<li>Effbot: <a href="http://effbot.org/zone/default-values.htm">Default Parameter Values in Python</a>.</li>
|
||||
<li>Effbot: <a href="https://web.archive.org/web/20201112004749/http://effbot.org/zone/default-values.htm">Default Parameter Values in Python</a>.</li>
|
||||
<li>Python Language Reference: <a href="http://docs.python.org/2/reference/compound_stmts.html#function-definitions">Function definitions</a>.</li>
|
||||
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ information being thrown away.</p>
|
||||
|
||||
<p>A return value is considered to be trivial if it is <code>None</code> or it is a parameter (parameters, usually <code>self</code> are often
|
||||
returned to assist with method chaining, but can be ignored).
|
||||
A return value is also assumed to be trivial if it is ignored for 75% or more of calls.
|
||||
A return value is also assumed to be trivial if it is ignored for more than 25% of calls.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
|
||||
@@ -29,7 +29,7 @@ import that.
|
||||
|
||||
<li>Python Language Reference: <a href="http://docs.python.org/2/reference/simple_stmts.html#import">The import statement</a>.</li>
|
||||
<li>Python: <a href="http://docs.python.org/2/tutorial/modules.html">Modules</a>.</li>
|
||||
<li> Effbot: <a href="http://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>
|
||||
<li> Effbot: <a href="https://web.archive.org/web/20200917011425/https://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>
|
||||
|
||||
|
||||
</references>
|
||||
|
||||
@@ -33,7 +33,7 @@ import that.
|
||||
|
||||
<li>Python Language Reference: <a href="http://docs.python.org/2/reference/simple_stmts.html#import">The import statement</a>.</li>
|
||||
<li>Python: <a href="http://docs.python.org/2/tutorial/modules.html">Modules</a>.</li>
|
||||
<li> Effbot: <a href="http://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>
|
||||
<li> Effbot: <a href="https://web.archive.org/web/20200917011425/https://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>
|
||||
|
||||
|
||||
</references>
|
||||
|
||||
@@ -49,7 +49,7 @@ so the general technique is quite widely applicable.
|
||||
|
||||
|
||||
<li>
|
||||
IBM developerWorks: <a href="http://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
|
||||
IBM developerWorks: <a href="https://web.archive.org/web/20190919085934/https://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
|
||||
</li>
|
||||
<li>
|
||||
R. Martin, <em>Agile Software Development: Principles, Patterns and Practices</em>. Pearson, 2011.
|
||||
|
||||
@@ -29,7 +29,7 @@ You can reduce efferent coupling by splitting up a module so that each part depe
|
||||
|
||||
|
||||
<li>
|
||||
IBM developerWorks: <a href="http://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
|
||||
IBM developerWorks: <a href="https://web.archive.org/web/20190919085934/https://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
|
||||
</li>
|
||||
<li>
|
||||
R. Martin, <em>Agile Software Development: Principles, Patterns and Practices</em>. Pearson, 2011.
|
||||
|
||||
@@ -4,7 +4,9 @@
|
||||
* and is therefore associated with security risks.
|
||||
* @kind problem
|
||||
* @tags security
|
||||
* external/cwe/cwe-200
|
||||
* @problem.severity error
|
||||
* @security-severity 3.6
|
||||
* @sub-severity low
|
||||
* @precision high
|
||||
* @id py/bind-socket-all-network-interfaces
|
||||
@@ -32,21 +34,7 @@ private DataFlow::LocalSourceNode vulnerableHostnameRef(DataFlow::TypeTracker t,
|
||||
result.asExpr() = allInterfacesStrConst
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `vulnerableHostnameRef(t2, hostname).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
vulnerableHostnameRef_first_join(t2, hostname, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate vulnerableHostnameRef_first_join(
|
||||
DataFlow::TypeTracker t2, string hostname, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(vulnerableHostnameRef(t2, hostname), res, summary)
|
||||
exists(DataFlow::TypeTracker t2 | result = vulnerableHostnameRef(t2, hostname).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to a hostname that can be used to bind to all interfaces. */
|
||||
@@ -59,21 +47,7 @@ private DataFlow::LocalSourceNode vulnerableAddressTuple(DataFlow::TypeTracker t
|
||||
t.start() and
|
||||
result.asExpr() = any(Tuple tup | tup.getElt(0) = vulnerableHostnameRef(hostname).asExpr())
|
||||
or
|
||||
// Due to bad performance when using normal setup with `vulnerableAddressTuple(t2, hostname).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
vulnerableAddressTuple_first_join(t2, hostname, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate vulnerableAddressTuple_first_join(
|
||||
DataFlow::TypeTracker t2, string hostname, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(vulnerableAddressTuple(t2, hostname), res, summary)
|
||||
exists(DataFlow::TypeTracker t2 | result = vulnerableAddressTuple(t2, hostname).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to a tuple for which the first element is a hostname that can be used to bind to all interfaces. */
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
* @kind path-problem
|
||||
* @precision low
|
||||
* @problem.severity error
|
||||
* @security-severity 5.9
|
||||
* @tags security external/cwe/cwe-20
|
||||
*/
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 5.9
|
||||
* @precision high
|
||||
* @id py/incomplete-hostname-regexp
|
||||
* @tags correctness
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 5.9
|
||||
* @precision high
|
||||
* @id py/incomplete-url-substring-sanitization
|
||||
* @tags correctness
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 6.4
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/path-injection
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
* @kind path-problem
|
||||
* @id py/tarslip
|
||||
* @problem.severity error
|
||||
* @security-severity 6.4
|
||||
* @precision medium
|
||||
* @tags security
|
||||
* external/cwe/cwe-022
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* user to change the meaning of the command.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 5.9
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/command-line-injection
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* cause a cross-site scripting vulnerability.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 2.9
|
||||
* @precision medium
|
||||
* @id py/jinja2/autoescape-false
|
||||
* @tags security
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* allows for a cross-site scripting vulnerability.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 2.9
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/reflective-xss
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* malicious SQL code by the user.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 6.4
|
||||
* @precision high
|
||||
* @id py/sql-injection
|
||||
* @tags security
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* code execution.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 10.0
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/code-injection
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
* developing a subsequent exploit.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 3.6
|
||||
* @precision high
|
||||
* @id py/stack-trace-exposure
|
||||
* @tags security
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Running a Flask app in debug mode may allow an attacker to run arbitrary code through the Werkzeug debugger.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 6.4
|
||||
* @precision high
|
||||
* @id py/flask-debug
|
||||
* @tags security
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Accepting unknown host keys can allow man-in-the-middle attacks.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 5.2
|
||||
* @precision high
|
||||
* @id py/paramiko-missing-host-key-validation
|
||||
* @tags security
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Making a request without certificate validation can allow man-in-the-middle attacks.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 5.2
|
||||
* @precision medium
|
||||
* @id py/request-without-cert-validation
|
||||
* @tags security
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* expose it to an attacker.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 5.9
|
||||
* @precision high
|
||||
* @id py/clear-text-logging-sensitive-data
|
||||
* @tags security
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* attacker.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 5.9
|
||||
* @precision high
|
||||
* @id py/clear-text-storage-sensitive-data
|
||||
* @tags security
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 5.2
|
||||
* @precision high
|
||||
* @id py/weak-crypto-key
|
||||
* @tags security
|
||||
|
||||
@@ -15,22 +15,28 @@
|
||||
secure than it appears to be.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
This query alerts on any use of a weak cryptographic algorithm, that is
|
||||
not a hashing algorithm. Use of broken or weak cryptographic hash
|
||||
functions are handled by the
|
||||
<code>py/weak-sensitive-data-hashing</code> query.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
Ensure that you use a strong, modern cryptographic
|
||||
algorithm. Use at least AES-128 or RSA-2048 for
|
||||
encryption, and SHA-2 or SHA-3 for secure hashing.
|
||||
algorithm, such as AES-128 or RSA-2048.
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
<example>
|
||||
|
||||
<p>
|
||||
The following code uses the <code>pycrypto</code>
|
||||
The following code uses the <code>pycryptodome</code>
|
||||
library to encrypt some secret data. When you create a cipher using
|
||||
<code>pycrypto</code> you must specify the encryption
|
||||
<code>pycryptodome</code> you must specify the encryption
|
||||
algorithm to use. The first example uses DES, which is an
|
||||
older algorithm that is now considered weak. The second
|
||||
example uses AES, which is a stronger modern algorithm.
|
||||
@@ -39,8 +45,12 @@
|
||||
<sample src="examples/broken_crypto.py" />
|
||||
|
||||
<p>
|
||||
WARNING: Although the second example above is more robust,
|
||||
pycrypto is no longer actively maintained so we recommend using <code>cryptography</code> instead.
|
||||
NOTICE: the original
|
||||
<code><a href="https://pypi.org/project/pycrypto/">pycrypto</a></code>
|
||||
PyPI package that provided the <code>Crypto</code> module is not longer
|
||||
actively maintained, so you should use the
|
||||
<code><a href="https://pypi.org/project/pycryptodome/">pycryptodome</a></code>
|
||||
PyPI package instead (which has a compatible API).
|
||||
</p>
|
||||
|
||||
</example>
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
/**
|
||||
* @name Use of a broken or weak cryptographic algorithm
|
||||
* @description Using broken or weak cryptographic algorithms can compromise security.
|
||||
* @kind path-problem
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 5.2
|
||||
* @precision high
|
||||
* @id py/weak-cryptographic-algorithm
|
||||
* @tags security
|
||||
@@ -10,21 +11,15 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.security.SensitiveData
|
||||
import semmle.python.security.Crypto
|
||||
import semmle.python.Concepts
|
||||
|
||||
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
|
||||
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
|
||||
|
||||
override predicate isSource(TaintTracking::Source source) {
|
||||
source instanceof SensitiveDataSource
|
||||
}
|
||||
|
||||
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
|
||||
}
|
||||
|
||||
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
|
||||
where config.hasFlowPath(src, sink)
|
||||
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
|
||||
src.getSource(), "Sensitive data"
|
||||
from Cryptography::CryptographicOperation operation, Cryptography::CryptographicAlgorithm algorithm
|
||||
where
|
||||
algorithm = operation.getAlgorithm() and
|
||||
algorithm.isWeak() and
|
||||
// `Cryptography::HashingAlgorithm` and `Cryptography::PasswordHashingAlgorithm` are
|
||||
// handled by `py/weak-sensitive-data-hashing`
|
||||
algorithm instanceof Cryptography::EncryptionAlgorithm
|
||||
select operation,
|
||||
"The cryptographic algorithm " + algorithm.getName() +
|
||||
" is broken or weak, and should not be used."
|
||||
|
||||
103
python/ql/src/Security/CWE-327/FluentApiModel.qll
Normal file
103
python/ql/src/Security/CWE-327/FluentApiModel.qll
Normal file
@@ -0,0 +1,103 @@
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
import TlsLibraryModel
|
||||
|
||||
/**
|
||||
* Configuration to determine the state of a context being used to create
|
||||
* a connection. There is one configuration for each pair of `TlsLibrary` and `ProtocolVersion`,
|
||||
* such that a single configuration only tracks contexts where a specific `ProtocolVersion` is allowed.
|
||||
*
|
||||
* The state is in terms of whether a specific protocol is allowed. This is
|
||||
* either true or false when the context is created and can then be modified
|
||||
* later by either restricting or unrestricting the protocol (see the predicates
|
||||
* `isRestriction` and `isUnrestriction`).
|
||||
*
|
||||
* Since we are interested in the final state, we want the flow to start from
|
||||
* the last unrestriction, so we disallow flow into unrestrictions. We also
|
||||
* model the creation as an unrestriction of everything it allows, to account
|
||||
* for the common case where the creation plays the role of "last unrestriction".
|
||||
*
|
||||
* Since we really want "the last unrestriction, not nullified by a restriction",
|
||||
* we also disallow flow into restrictions.
|
||||
*/
|
||||
class InsecureContextConfiguration extends DataFlow::Configuration {
|
||||
TlsLibrary library;
|
||||
ProtocolVersion tracked_version;
|
||||
|
||||
InsecureContextConfiguration() {
|
||||
this = library + "Allows" + tracked_version and
|
||||
tracked_version.isInsecure()
|
||||
}
|
||||
|
||||
ProtocolVersion getTrackedVersion() { result = tracked_version }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { this.isUnrestriction(source) }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
sink = library.connection_creation().getContext()
|
||||
}
|
||||
|
||||
override predicate isBarrierIn(DataFlow::Node node) {
|
||||
this.isRestriction(node)
|
||||
or
|
||||
this.isUnrestriction(node)
|
||||
}
|
||||
|
||||
private predicate isRestriction(DataFlow::Node node) {
|
||||
exists(ProtocolRestriction r |
|
||||
r = library.protocol_restriction() and
|
||||
r.getRestriction() = tracked_version
|
||||
|
|
||||
node = r.getContext()
|
||||
)
|
||||
}
|
||||
|
||||
private predicate isUnrestriction(DataFlow::Node node) {
|
||||
exists(ProtocolUnrestriction pu |
|
||||
pu = library.protocol_unrestriction() and
|
||||
pu.getUnrestriction() = tracked_version
|
||||
|
|
||||
node = pu.getContext()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `conectionCreation` marks the creation of a connetion based on the contex
|
||||
* found at `contextOrigin` and allowing `insecure_version`.
|
||||
*
|
||||
* `specific` is true iff the context is configured for a specific protocol version (`ssl.PROTOCOL_TLSv1_2`) rather
|
||||
* than for a family of protocols (`ssl.PROTOCOL_TLS`).
|
||||
*/
|
||||
predicate unsafe_connection_creation_with_context(
|
||||
DataFlow::Node connectionCreation, ProtocolVersion insecure_version, DataFlow::Node contextOrigin,
|
||||
boolean specific
|
||||
) {
|
||||
// Connection created from a context allowing `insecure_version`.
|
||||
exists(InsecureContextConfiguration c | c.hasFlow(contextOrigin, connectionCreation) |
|
||||
insecure_version = c.getTrackedVersion() and
|
||||
specific = false
|
||||
)
|
||||
or
|
||||
// Connection created from a context specifying `insecure_version`.
|
||||
exists(TlsLibrary l |
|
||||
connectionCreation = l.insecure_connection_creation(insecure_version) and
|
||||
contextOrigin = connectionCreation and
|
||||
specific = true
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `conectionCreation` marks the creation of a connetion witout reference to a context
|
||||
* and allowing `insecure_version`.
|
||||
*/
|
||||
predicate unsafe_connection_creation_without_context(
|
||||
DataFlow::CallCfgNode connectionCreation, string insecure_version
|
||||
) {
|
||||
exists(TlsLibrary l | connectionCreation = l.insecure_connection_creation(insecure_version))
|
||||
}
|
||||
|
||||
/** Holds if `contextCreation` is creating a context tied to a specific insecure version. */
|
||||
predicate unsafe_context_creation(DataFlow::CallCfgNode contextCreation, string insecure_version) {
|
||||
exists(TlsLibrary l | contextCreation = l.insecure_context_creation(insecure_version))
|
||||
}
|
||||
@@ -5,6 +5,7 @@
|
||||
* @id py/insecure-default-protocol
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 5.2
|
||||
* @precision high
|
||||
* @tags security
|
||||
* external/cwe/cwe-327
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
|
||||
<p>
|
||||
Ensure that a modern, strong protocol is used. All versions of SSL,
|
||||
and TLS 1.0 are known to be vulnerable to attacks. Using TLS 1.1 or
|
||||
above is strongly recommended.
|
||||
and TLS versions 1.0 and 1.1 are known to be vulnerable to attacks.
|
||||
Using TLS 1.2 or above is strongly recommended.
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
@@ -30,20 +30,35 @@
|
||||
|
||||
<p>
|
||||
All cases should be updated to use a secure protocol, such as
|
||||
<code>PROTOCOL_TLSv1_1</code>.
|
||||
<code>PROTOCOL_TLSv1_2</code>.
|
||||
</p>
|
||||
<p>
|
||||
Note that <code>ssl.wrap_socket</code> has been deprecated in
|
||||
Python 3.7. A preferred alternative is to use
|
||||
<code>ssl.SSLContext</code>, which is supported in Python 2.7.9 and
|
||||
3.2 and later versions.
|
||||
Python 3.7. The recommended alternatives are:
|
||||
</p>
|
||||
<ul>
|
||||
<li><code>ssl.SSLContext</code> - supported in Python 2.7.9,
|
||||
3.2, and later versions</li>
|
||||
<li><code>ssl.create_default_context</code> - a convenience function,
|
||||
supported in Python 3.4 and later versions.</li>
|
||||
</ul>
|
||||
<p>
|
||||
Even when you use these alternatives, you should
|
||||
ensure that a safe protocol is used. The following code illustrates
|
||||
how to use flags (available since Python 3.2) or the `minimum_version`
|
||||
field (favored since Python 3.7) to restrict the protocols accepted when
|
||||
creating a connection.
|
||||
</p>
|
||||
|
||||
<sample src="examples/secure_default_protocol.py" />
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Transport_Layer_Security"> Transport Layer Security</a>.</li>
|
||||
<li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#ssl.SSLContext"> class ssl.SSLContext</a>.</li>
|
||||
<li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#ssl.wrap_socket"> ssl.wrap_socket</a>.</li>
|
||||
<li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#functions-constants-and-exceptions"> notes on context creation</a>.</li>
|
||||
<li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#ssl-security"> notes on security considerations</a>.</li>
|
||||
<li>pyOpenSSL documentation: <a href="https://pyopenssl.org/en/stable/api/ssl.html"> An interface to the SSL-specific parts of OpenSSL</a>.</li>
|
||||
</references>
|
||||
|
||||
|
||||
@@ -4,92 +4,84 @@
|
||||
* @id py/insecure-protocol
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 5.2
|
||||
* @precision high
|
||||
* @tags security
|
||||
* external/cwe/cwe-327
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import FluentApiModel
|
||||
|
||||
private ModuleValue the_ssl_module() { result = Module::named("ssl") }
|
||||
|
||||
FunctionValue ssl_wrap_socket() { result = the_ssl_module().attr("wrap_socket") }
|
||||
|
||||
ClassValue ssl_Context_class() { result = the_ssl_module().attr("SSLContext") }
|
||||
|
||||
private ModuleValue the_pyOpenSSL_module() { result = Value::named("pyOpenSSL.SSL") }
|
||||
|
||||
ClassValue the_pyOpenSSL_Context_class() { result = Value::named("pyOpenSSL.SSL.Context") }
|
||||
|
||||
string insecure_version_name() {
|
||||
// For `pyOpenSSL.SSL`
|
||||
result = "SSLv2_METHOD" or
|
||||
result = "SSLv23_METHOD" or
|
||||
result = "SSLv3_METHOD" or
|
||||
result = "TLSv1_METHOD" or
|
||||
// For the `ssl` module
|
||||
result = "PROTOCOL_SSLv2" or
|
||||
result = "PROTOCOL_SSLv3" or
|
||||
result = "PROTOCOL_SSLv23" or
|
||||
result = "PROTOCOL_TLS" or
|
||||
result = "PROTOCOL_TLSv1"
|
||||
}
|
||||
|
||||
/*
|
||||
* A syntactic check for cases where points-to analysis cannot infer the presence of
|
||||
* a protocol constant, e.g. if it has been removed in later versions of the `ssl`
|
||||
* library.
|
||||
*/
|
||||
|
||||
bindingset[named_argument]
|
||||
predicate probable_insecure_ssl_constant(
|
||||
CallNode call, string insecure_version, string named_argument
|
||||
) {
|
||||
exists(ControlFlowNode arg |
|
||||
arg = call.getArgByName(named_argument) or
|
||||
arg = call.getArg(0)
|
||||
|
|
||||
arg.(AttrNode).getObject(insecure_version).pointsTo(the_ssl_module())
|
||||
// Helper for pretty printer `configName`.
|
||||
// This is a consequence of missing pretty priting.
|
||||
// We do not want to evaluate our bespoke pretty printer
|
||||
// for all `DataFlow::Node`s so we define a sub class of interesting ones.
|
||||
class ProtocolConfiguration extends DataFlow::Node {
|
||||
ProtocolConfiguration() {
|
||||
unsafe_connection_creation_with_context(_, _, this, _)
|
||||
or
|
||||
arg.(NameNode).getId() = insecure_version and
|
||||
exists(Import imp |
|
||||
imp.getAnImportedModuleName() = "ssl" and
|
||||
imp.getAName().getAsname().(Name).getId() = insecure_version
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
predicate unsafe_ssl_wrap_socket_call(
|
||||
CallNode call, string method_name, string insecure_version, string named_argument
|
||||
) {
|
||||
(
|
||||
call = ssl_wrap_socket().getACall() and
|
||||
method_name = "deprecated method ssl.wrap_socket" and
|
||||
named_argument = "ssl_version"
|
||||
unsafe_connection_creation_without_context(this, _)
|
||||
or
|
||||
call = ssl_Context_class().getACall() and
|
||||
named_argument = "protocol" and
|
||||
method_name = "ssl.SSLContext"
|
||||
) and
|
||||
insecure_version = insecure_version_name() and
|
||||
(
|
||||
call.getArgByName(named_argument).pointsTo(the_ssl_module().attr(insecure_version))
|
||||
unsafe_context_creation(this, _)
|
||||
}
|
||||
|
||||
AstNode getNode() { result = this.asCfgNode().(CallNode).getFunction().getNode() }
|
||||
}
|
||||
|
||||
// Helper for pretty printer `callName`.
|
||||
// This is a consequence of missing pretty priting.
|
||||
// We do not want to evaluate our bespoke pretty printer
|
||||
// for all `AstNode`s so we define a sub class of interesting ones.
|
||||
//
|
||||
// Note that AstNode is abstract and AstNode_ is a library class, so
|
||||
// we have to extend @py_ast_node.
|
||||
class Nameable extends @py_ast_node {
|
||||
Nameable() {
|
||||
this = any(ProtocolConfiguration pc).getNode()
|
||||
or
|
||||
probable_insecure_ssl_constant(call, insecure_version, named_argument)
|
||||
)
|
||||
exists(Nameable attr | this = attr.(Attribute).getObject())
|
||||
}
|
||||
|
||||
string toString() { result = "AstNode" }
|
||||
}
|
||||
|
||||
predicate unsafe_pyOpenSSL_Context_call(CallNode call, string insecure_version) {
|
||||
call = the_pyOpenSSL_Context_class().getACall() and
|
||||
insecure_version = insecure_version_name() and
|
||||
call.getArg(0).pointsTo(the_pyOpenSSL_module().attr(insecure_version))
|
||||
}
|
||||
|
||||
from CallNode call, string method_name, string insecure_version
|
||||
where
|
||||
unsafe_ssl_wrap_socket_call(call, method_name, insecure_version, _)
|
||||
string callName(Nameable call) {
|
||||
result = call.(Name).getId()
|
||||
or
|
||||
unsafe_pyOpenSSL_Context_call(call, insecure_version) and method_name = "pyOpenSSL.SSL.Context"
|
||||
select call,
|
||||
"Insecure SSL/TLS protocol version " + insecure_version + " specified in call to " + method_name +
|
||||
"."
|
||||
exists(Attribute a | a = call | result = callName(a.getObject()) + "." + a.getName())
|
||||
}
|
||||
|
||||
string configName(ProtocolConfiguration protocolConfiguration) {
|
||||
result =
|
||||
"call to " + callName(protocolConfiguration.asCfgNode().(CallNode).getFunction().getNode())
|
||||
or
|
||||
not protocolConfiguration.asCfgNode() instanceof CallNode and
|
||||
not protocolConfiguration instanceof ContextCreation and
|
||||
result = "context modification"
|
||||
}
|
||||
|
||||
string verb(boolean specific) {
|
||||
specific = true and result = "specified"
|
||||
or
|
||||
specific = false and result = "allowed"
|
||||
}
|
||||
|
||||
from
|
||||
DataFlow::Node connectionCreation, string insecure_version, DataFlow::Node protocolConfiguration,
|
||||
boolean specific
|
||||
where
|
||||
unsafe_connection_creation_with_context(connectionCreation, insecure_version,
|
||||
protocolConfiguration, specific)
|
||||
or
|
||||
unsafe_connection_creation_without_context(connectionCreation, insecure_version) and
|
||||
protocolConfiguration = connectionCreation and
|
||||
specific = true
|
||||
or
|
||||
unsafe_context_creation(protocolConfiguration, insecure_version) and
|
||||
connectionCreation = protocolConfiguration and
|
||||
specific = true
|
||||
select connectionCreation,
|
||||
"Insecure SSL/TLS protocol version " + insecure_version + " " + verb(specific) + " by $@ ",
|
||||
protocolConfiguration, configName(protocolConfiguration)
|
||||
|
||||
83
python/ql/src/Security/CWE-327/PyOpenSSL.qll
Normal file
83
python/ql/src/Security/CWE-327/PyOpenSSL.qll
Normal file
@@ -0,0 +1,83 @@
|
||||
/**
|
||||
* Provides modeling of SSL/TLS functionality of the `OpenSSL` module from the `pyOpenSSL` PyPI package.
|
||||
* See https://www.pyopenssl.org/en/stable/
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.ApiGraphs
|
||||
import TlsLibraryModel
|
||||
|
||||
class PyOpenSSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
|
||||
PyOpenSSLContextCreation() {
|
||||
this = API::moduleImport("OpenSSL").getMember("SSL").getMember("Context").getACall()
|
||||
}
|
||||
|
||||
override string getProtocol() {
|
||||
exists(ControlFlowNode protocolArg, PyOpenSSL pyo |
|
||||
protocolArg in [node.getArg(0), node.getArgByName("method")]
|
||||
|
|
||||
protocolArg =
|
||||
[pyo.specific_version(result).getAUse(), pyo.unspecific_version(result).getAUse()]
|
||||
.asCfgNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class ConnectionCall extends ConnectionCreation, DataFlow::CallCfgNode {
|
||||
ConnectionCall() {
|
||||
this = API::moduleImport("OpenSSL").getMember("SSL").getMember("Connection").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::CfgNode getContext() {
|
||||
result.getNode() in [node.getArg(0), node.getArgByName("context")]
|
||||
}
|
||||
}
|
||||
|
||||
// This cannot be used to unrestrict,
|
||||
// see https://www.pyopenssl.org/en/stable/api/ssl.html#OpenSSL.SSL.Context.set_options
|
||||
class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode {
|
||||
SetOptionsCall() { node.getFunction().(AttrNode).getName() = "set_options" }
|
||||
|
||||
override DataFlow::CfgNode getContext() {
|
||||
result.getNode() = node.getFunction().(AttrNode).getObject()
|
||||
}
|
||||
|
||||
override ProtocolVersion getRestriction() {
|
||||
API::moduleImport("OpenSSL").getMember("SSL").getMember("OP_NO_" + result).getAUse().asCfgNode() in [
|
||||
node.getArg(0), node.getArgByName("options")
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
class UnspecificPyOpenSSLContextCreation extends PyOpenSSLContextCreation, UnspecificContextCreation {
|
||||
UnspecificPyOpenSSLContextCreation() { library instanceof PyOpenSSL }
|
||||
}
|
||||
|
||||
class PyOpenSSL extends TlsLibrary {
|
||||
PyOpenSSL() { this = "pyOpenSSL" }
|
||||
|
||||
override string specific_version_name(ProtocolVersion version) { result = version + "_METHOD" }
|
||||
|
||||
override string unspecific_version_name(ProtocolFamily family) {
|
||||
// `"TLS_METHOD"` is not actually available in pyOpenSSL yet, but should be coming soon..
|
||||
result = family + "_METHOD"
|
||||
}
|
||||
|
||||
override API::Node version_constants() { result = API::moduleImport("OpenSSL").getMember("SSL") }
|
||||
|
||||
override ContextCreation default_context_creation() { none() }
|
||||
|
||||
override ContextCreation specific_context_creation() {
|
||||
result instanceof PyOpenSSLContextCreation
|
||||
}
|
||||
|
||||
override DataFlow::Node insecure_connection_creation(ProtocolVersion version) { none() }
|
||||
|
||||
override ConnectionCreation connection_creation() { result instanceof ConnectionCall }
|
||||
|
||||
override ProtocolRestriction protocol_restriction() { result instanceof SetOptionsCall }
|
||||
|
||||
override ProtocolUnrestriction protocol_unrestriction() {
|
||||
result instanceof UnspecificPyOpenSSLContextCreation
|
||||
}
|
||||
}
|
||||
24
python/ql/src/Security/CWE-327/README.md
Normal file
24
python/ql/src/Security/CWE-327/README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Current status (Feb 2021)
|
||||
|
||||
This should be kept up to date; the world is moving fast and protocols are being broken.
|
||||
|
||||
## Protocols
|
||||
|
||||
- All versions of SSL are insecure
|
||||
- TLS 1.0 and TLS 1.1 are insecure
|
||||
- TLS 1.2 have some issues. but TLS 1.3 is not widely supported
|
||||
|
||||
## Conection methods
|
||||
|
||||
- `ssl.wrap_socket` is creating insecure connections, use `SSLContext.wrap_socket` instead. [link](https://docs.python.org/3/library/ssl.html#ssl.wrap_socket)
|
||||
> Deprecated since version 3.7: Since Python 3.2 and 2.7.9, it is recommended to use the `SSLContext.wrap_socket()` instead of `wrap_socket()`. The top-level function is limited and creates an insecure client socket without server name indication or hostname matching.
|
||||
- Default constructors are fine, a fluent API is used to constrain possible protocols later.
|
||||
|
||||
## Current recomendation
|
||||
|
||||
TLS 1.2 or TLS 1.3
|
||||
|
||||
## Queries
|
||||
|
||||
- `InsecureProtocol` detects uses of insecure protocols.
|
||||
- `InsecureDefaultProtocol` detect default constructions, this is no longer unsafe.
|
||||
214
python/ql/src/Security/CWE-327/Ssl.qll
Normal file
214
python/ql/src/Security/CWE-327/Ssl.qll
Normal file
@@ -0,0 +1,214 @@
|
||||
/**
|
||||
* Provides modeling of SSL/TLS functionality of the `ssl` module from the standard library.
|
||||
* See https://docs.python.org/3.9/library/ssl.html
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.ApiGraphs
|
||||
import TlsLibraryModel
|
||||
|
||||
class SSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
|
||||
SSLContextCreation() { this = API::moduleImport("ssl").getMember("SSLContext").getACall() }
|
||||
|
||||
override string getProtocol() {
|
||||
exists(ControlFlowNode protocolArg, Ssl ssl |
|
||||
protocolArg in [node.getArg(0), node.getArgByName("protocol")]
|
||||
|
|
||||
protocolArg =
|
||||
[ssl.specific_version(result).getAUse(), ssl.unspecific_version(result).getAUse()]
|
||||
.asCfgNode()
|
||||
)
|
||||
or
|
||||
not exists(node.getAnArg()) and
|
||||
result = "TLS"
|
||||
}
|
||||
}
|
||||
|
||||
class SSLDefaultContextCreation extends ContextCreation {
|
||||
SSLDefaultContextCreation() {
|
||||
this = API::moduleImport("ssl").getMember("create_default_context").getACall()
|
||||
}
|
||||
|
||||
// Allowed insecure versions are "TLSv1" and "TLSv1_1"
|
||||
// see https://docs.python.org/3/library/ssl.html#context-creation
|
||||
override string getProtocol() { result = "TLS" }
|
||||
}
|
||||
|
||||
/** Gets a reference to an `ssl.Context` instance. */
|
||||
API::Node sslContextInstance() {
|
||||
result = API::moduleImport("ssl").getMember(["SSLContext", "create_default_context"]).getReturn()
|
||||
}
|
||||
|
||||
class WrapSocketCall extends ConnectionCreation, DataFlow::CallCfgNode {
|
||||
WrapSocketCall() { this = sslContextInstance().getMember("wrap_socket").getACall() }
|
||||
|
||||
override DataFlow::Node getContext() {
|
||||
result = this.getFunction().(DataFlow::AttrRead).getObject()
|
||||
}
|
||||
}
|
||||
|
||||
class OptionsAugOr extends ProtocolRestriction, DataFlow::CfgNode {
|
||||
ProtocolVersion restriction;
|
||||
|
||||
OptionsAugOr() {
|
||||
exists(AugAssign aa, AttrNode attr, Expr flag |
|
||||
aa.getOperation().getOp() instanceof BitOr and
|
||||
aa.getTarget() = attr.getNode() and
|
||||
attr.getName() = "options" and
|
||||
attr.getObject() = node and
|
||||
flag = API::moduleImport("ssl").getMember("OP_NO_" + restriction).getAUse().asExpr() and
|
||||
(
|
||||
aa.getValue() = flag
|
||||
or
|
||||
impliesBitSet(aa.getValue(), flag, false, false)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getContext() { result = this }
|
||||
|
||||
override ProtocolVersion getRestriction() { result = restriction }
|
||||
}
|
||||
|
||||
class OptionsAugAndNot extends ProtocolUnrestriction, DataFlow::CfgNode {
|
||||
ProtocolVersion restriction;
|
||||
|
||||
OptionsAugAndNot() {
|
||||
exists(AugAssign aa, AttrNode attr, Expr flag, UnaryExpr notFlag |
|
||||
aa.getOperation().getOp() instanceof BitAnd and
|
||||
aa.getTarget() = attr.getNode() and
|
||||
attr.getName() = "options" and
|
||||
attr.getObject() = node and
|
||||
notFlag.getOp() instanceof Invert and
|
||||
notFlag.getOperand() = flag and
|
||||
flag = API::moduleImport("ssl").getMember("OP_NO_" + restriction).getAUse().asExpr() and
|
||||
(
|
||||
aa.getValue() = notFlag
|
||||
or
|
||||
impliesBitSet(aa.getValue(), notFlag, true, true)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getContext() { result = this }
|
||||
|
||||
override ProtocolVersion getUnrestriction() { result = restriction }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if
|
||||
* for every bit, _b_:
|
||||
* `wholeHasBitSet` represents that _b_ is set in `whole`
|
||||
* implies
|
||||
* `partHasBitSet` represents that _b_ is set in `part`
|
||||
*
|
||||
* As an example take `whole` = `part1 & part2`. Then
|
||||
* `impliesBitSet(whole, part1, true, true)` holds
|
||||
* because for any bit in `whole`, if that bit is set it must also be set in `part1`.
|
||||
*
|
||||
* Similarly for `whole` = `part1 | part2`. Here
|
||||
* `impliesBitSet(whole, part1, false, false)` holds
|
||||
* because for any bit in `whole`, if that bit is not set, it cannot be set in `part1`.
|
||||
*/
|
||||
predicate impliesBitSet(BinaryExpr whole, Expr part, boolean partHasBitSet, boolean wholeHasBitSet) {
|
||||
whole.getOp() instanceof BitAnd and
|
||||
(
|
||||
wholeHasBitSet = true and partHasBitSet = true and part in [whole.getLeft(), whole.getRight()]
|
||||
or
|
||||
wholeHasBitSet = true and
|
||||
impliesBitSet([whole.getLeft(), whole.getRight()], part, partHasBitSet, wholeHasBitSet)
|
||||
)
|
||||
or
|
||||
whole.getOp() instanceof BitOr and
|
||||
(
|
||||
wholeHasBitSet = false and partHasBitSet = false and part in [whole.getLeft(), whole.getRight()]
|
||||
or
|
||||
wholeHasBitSet = false and
|
||||
impliesBitSet([whole.getLeft(), whole.getRight()], part, partHasBitSet, wholeHasBitSet)
|
||||
)
|
||||
}
|
||||
|
||||
class ContextSetVersion extends ProtocolRestriction, ProtocolUnrestriction, DataFlow::CfgNode {
|
||||
ProtocolVersion restriction;
|
||||
|
||||
ContextSetVersion() {
|
||||
exists(DataFlow::AttrWrite aw |
|
||||
aw.getObject().asCfgNode() = node and
|
||||
aw.getAttributeName() = "minimum_version" and
|
||||
aw.getValue() =
|
||||
API::moduleImport("ssl").getMember("TLSVersion").getMember(restriction).getAUse()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getContext() { result = this }
|
||||
|
||||
override ProtocolVersion getRestriction() { result.lessThan(restriction) }
|
||||
|
||||
override ProtocolVersion getUnrestriction() {
|
||||
restriction = result or restriction.lessThan(result)
|
||||
}
|
||||
}
|
||||
|
||||
class UnspecificSSLContextCreation extends SSLContextCreation, UnspecificContextCreation {
|
||||
UnspecificSSLContextCreation() { library instanceof Ssl }
|
||||
|
||||
override ProtocolVersion getUnrestriction() {
|
||||
result = UnspecificContextCreation.super.getUnrestriction() and
|
||||
// These are turned off by default since Python 3.6
|
||||
// see https://docs.python.org/3.6/library/ssl.html#ssl.SSLContext
|
||||
not result in ["SSLv2", "SSLv3"]
|
||||
}
|
||||
}
|
||||
|
||||
class UnspecificSSLDefaultContextCreation extends SSLDefaultContextCreation, ProtocolUnrestriction {
|
||||
override DataFlow::Node getContext() { result = this }
|
||||
|
||||
// see https://docs.python.org/3/library/ssl.html#ssl.create_default_context
|
||||
override ProtocolVersion getUnrestriction() {
|
||||
result in ["TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
|
||||
}
|
||||
}
|
||||
|
||||
class Ssl extends TlsLibrary {
|
||||
Ssl() { this = "ssl" }
|
||||
|
||||
override string specific_version_name(ProtocolVersion version) { result = "PROTOCOL_" + version }
|
||||
|
||||
override string unspecific_version_name(ProtocolFamily family) {
|
||||
family = "SSLv23" and result = "PROTOCOL_" + family
|
||||
or
|
||||
family = "TLS" and result = "PROTOCOL_" + family + ["", "_CLIENT", "_SERVER"]
|
||||
}
|
||||
|
||||
override API::Node version_constants() { result = API::moduleImport("ssl") }
|
||||
|
||||
override ContextCreation default_context_creation() {
|
||||
result instanceof SSLDefaultContextCreation
|
||||
}
|
||||
|
||||
override ContextCreation specific_context_creation() { result instanceof SSLContextCreation }
|
||||
|
||||
override DataFlow::CallCfgNode insecure_connection_creation(ProtocolVersion version) {
|
||||
result = API::moduleImport("ssl").getMember("wrap_socket").getACall() and
|
||||
this.specific_version(version).getAUse() = result.getArgByName("ssl_version") and
|
||||
version.isInsecure()
|
||||
}
|
||||
|
||||
override ConnectionCreation connection_creation() { result instanceof WrapSocketCall }
|
||||
|
||||
override ProtocolRestriction protocol_restriction() {
|
||||
result instanceof OptionsAugOr
|
||||
or
|
||||
result instanceof ContextSetVersion
|
||||
}
|
||||
|
||||
override ProtocolUnrestriction protocol_unrestriction() {
|
||||
result instanceof OptionsAugAndNot
|
||||
or
|
||||
result instanceof ContextSetVersion
|
||||
or
|
||||
result instanceof UnspecificSSLContextCreation
|
||||
or
|
||||
result instanceof UnspecificSSLDefaultContextCreation
|
||||
}
|
||||
}
|
||||
137
python/ql/src/Security/CWE-327/TlsLibraryModel.qll
Normal file
137
python/ql/src/Security/CWE-327/TlsLibraryModel.qll
Normal file
@@ -0,0 +1,137 @@
|
||||
private import python
|
||||
private import semmle.python.ApiGraphs
|
||||
import Ssl
|
||||
import PyOpenSSL
|
||||
|
||||
/**
|
||||
* A specific protocol version of SSL or TLS.
|
||||
*/
|
||||
class ProtocolVersion extends string {
|
||||
ProtocolVersion() { this in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"] }
|
||||
|
||||
/** Gets a `ProtocolVersion` that is less than this `ProtocolVersion`, if any. */
|
||||
predicate lessThan(ProtocolVersion version) {
|
||||
this = "SSLv2" and version = "SSLv3"
|
||||
or
|
||||
this = "TLSv1" and version = ["TLSv1_1", "TLSv1_2", "TLSv1_3"]
|
||||
or
|
||||
this = ["TLSv1", "TLSv1_1"] and version = ["TLSv1_2", "TLSv1_3"]
|
||||
or
|
||||
this = ["TLSv1", "TLSv1_1", "TLSv1_2"] and version = "TLSv1_3"
|
||||
}
|
||||
|
||||
/** Holds if this protocol version is known to be insecure. */
|
||||
predicate isInsecure() { this in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1"] }
|
||||
}
|
||||
|
||||
/** An unspecific protocol version */
|
||||
class ProtocolFamily extends string {
|
||||
ProtocolFamily() { this in ["SSLv23", "TLS"] }
|
||||
}
|
||||
|
||||
/** The creation of a context. */
|
||||
abstract class ContextCreation extends DataFlow::Node {
|
||||
/** Gets the protocol version or family for this context. */
|
||||
abstract string getProtocol();
|
||||
}
|
||||
|
||||
/** The creation of a connection from a context. */
|
||||
abstract class ConnectionCreation extends DataFlow::Node {
|
||||
/** Gets the context used to create the connection. */
|
||||
abstract DataFlow::Node getContext();
|
||||
}
|
||||
|
||||
/** A context is being restricted on which protocols it can accepts. */
|
||||
abstract class ProtocolRestriction extends DataFlow::Node {
|
||||
/** Gets the context being restricted. */
|
||||
abstract DataFlow::Node getContext();
|
||||
|
||||
/** Gets the protocol version being disallowed. */
|
||||
abstract ProtocolVersion getRestriction();
|
||||
}
|
||||
|
||||
/** A context is being relaxed on which protocols it can accepts. */
|
||||
abstract class ProtocolUnrestriction extends DataFlow::Node {
|
||||
/** Gets the context being relaxed. */
|
||||
abstract DataFlow::Node getContext();
|
||||
|
||||
/** Gets the protocol version being allowed. */
|
||||
abstract ProtocolVersion getUnrestriction();
|
||||
}
|
||||
|
||||
/**
|
||||
* A context is being created with a range of allowed protocols.
|
||||
* This also serves as unrestricting these protocols.
|
||||
*/
|
||||
abstract class UnspecificContextCreation extends ContextCreation, ProtocolUnrestriction {
|
||||
TlsLibrary library;
|
||||
ProtocolFamily family;
|
||||
|
||||
UnspecificContextCreation() { this.getProtocol() = family }
|
||||
|
||||
override DataFlow::CfgNode getContext() { result = this }
|
||||
|
||||
override ProtocolVersion getUnrestriction() {
|
||||
// There is only one family, the two names are aliases in OpenSSL.
|
||||
// see https://github.com/openssl/openssl/blob/13888e797c5a3193e91d71e5f5a196a2d68d266f/include/openssl/ssl.h.in#L1953-L1955
|
||||
family in ["SSLv23", "TLS"] and
|
||||
// see https://docs.python.org/3/library/ssl.html#ssl-contexts
|
||||
result in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
|
||||
}
|
||||
}
|
||||
|
||||
/** A model of a SSL/TLS library. */
|
||||
abstract class TlsLibrary extends string {
|
||||
bindingset[this]
|
||||
TlsLibrary() { any() }
|
||||
|
||||
/** The name of a specific protocol version. */
|
||||
abstract string specific_version_name(ProtocolVersion version);
|
||||
|
||||
/** Gets a name, which is a member of `version_constants`, that can be used to specify the protocol family `family`. */
|
||||
abstract string unspecific_version_name(ProtocolFamily family);
|
||||
|
||||
/** Gets an API node representing the module or class holding the version constants. */
|
||||
abstract API::Node version_constants();
|
||||
|
||||
/** Gets an API node representing a specific protocol version. */
|
||||
API::Node specific_version(ProtocolVersion version) {
|
||||
result = version_constants().getMember(specific_version_name(version))
|
||||
}
|
||||
|
||||
/** Gets an API node representing the protocol family `family`. */
|
||||
API::Node unspecific_version(ProtocolFamily family) {
|
||||
result = version_constants().getMember(unspecific_version_name(family))
|
||||
}
|
||||
|
||||
/** Gets a creation of a context with a default protocol. */
|
||||
abstract ContextCreation default_context_creation();
|
||||
|
||||
/** Gets a creation of a context with a specific protocol. */
|
||||
abstract ContextCreation specific_context_creation();
|
||||
|
||||
/** Gets a creation of a context with a specific protocol version, known to be insecure. */
|
||||
ContextCreation insecure_context_creation(ProtocolVersion version) {
|
||||
result in [specific_context_creation(), default_context_creation()] and
|
||||
result.getProtocol() = version and
|
||||
version.isInsecure()
|
||||
}
|
||||
|
||||
/** Gets a context that was created using `family`, known to have insecure instances. */
|
||||
ContextCreation unspecific_context_creation(ProtocolFamily family) {
|
||||
result in [specific_context_creation(), default_context_creation()] and
|
||||
result.getProtocol() = family
|
||||
}
|
||||
|
||||
/** Gets a dataflow node representing a connection being created in an insecure manner, not from a context. */
|
||||
abstract DataFlow::Node insecure_connection_creation(ProtocolVersion version);
|
||||
|
||||
/** Gets a dataflow node representing a connection being created from a context. */
|
||||
abstract ConnectionCreation connection_creation();
|
||||
|
||||
/** Gets a dataflow node representing a context being restricted on which protocols it can accepts. */
|
||||
abstract ProtocolRestriction protocol_restriction();
|
||||
|
||||
/** Gets a dataflow node representing a context being relaxed on which protocols it can accepts. */
|
||||
abstract ProtocolUnrestriction protocol_unrestriction();
|
||||
}
|
||||
104
python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.qhelp
Normal file
104
python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.qhelp
Normal file
@@ -0,0 +1,104 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>
|
||||
Using a broken or weak cryptographic hash function can leave data
|
||||
vulnerable, and should not be used in security related code.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
A strong cryptographic hash function should be resistant to:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
pre-image attacks: if you know a hash value <code>h(x)</code>,
|
||||
you should not be able to easily find the input <code>x</code>.
|
||||
</li>
|
||||
<li>
|
||||
collision attacks: if you know a hash value <code>h(x)</code>,
|
||||
you should not be able to easily find a different input <code>y</code>
|
||||
with the same hash value <code>h(x) = h(y)</code>.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
In cases with a limited input space, such as for passwords, the hash
|
||||
function also needs to be computationally expensive to be resistant to
|
||||
brute-force attacks. Passwords should also have an unique salt applied
|
||||
before hashing, but that is not considered by this query.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
As an example, both MD5 and SHA-1 are known to be vulnerable to collision attacks.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Since it's OK to use a weak cryptographic hash function in a non-security
|
||||
context, this query only alerts when these are used to hash sensitive
|
||||
data (such as passwords, certificates, usernames).
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Use of broken or weak cryptographic algorithms that are not hashing algorithms, is
|
||||
handled by the <code>py/weak-cryptographic-algorithm</code> query.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
Ensure that you use a strong, modern cryptographic hash function:
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
such as Argon2, scrypt, bcrypt, or PBKDF2 for passwords and other data with limited input space.
|
||||
</li>
|
||||
<li>
|
||||
such as SHA-2, or SHA-3 in other cases.
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</recommendation>
|
||||
<example>
|
||||
|
||||
<p>
|
||||
The following example shows two functions for checking whether the hash
|
||||
of a certificate matches a known value -- to prevent tampering.
|
||||
|
||||
The first function uses MD5 that is known to be vulnerable to collision attacks.
|
||||
|
||||
The second function uses SHA-256 that is a strong cryptographic hashing function.
|
||||
</p>
|
||||
|
||||
<sample src="examples/weak_certificate_hashing.py" />
|
||||
|
||||
</example>
|
||||
<example>
|
||||
<p>
|
||||
The following example shows two functions for hashing passwords.
|
||||
|
||||
The first function uses SHA-256 to hash passwords. Although SHA-256 is a
|
||||
strong cryptographic hash function, it is not suitable for password
|
||||
hashing since it is not computationally expensive.
|
||||
</p>
|
||||
|
||||
<sample src="examples/weak_password_hashing_bad.py" />
|
||||
|
||||
|
||||
<p>
|
||||
The second function uses Argon2 (through the <code>argon2-cffi</code>
|
||||
PyPI package), which is a strong password hashing algorithm (and
|
||||
includes a per-password salt by default).
|
||||
</p>
|
||||
|
||||
<sample src="examples/weak_password_hashing_good.py" />
|
||||
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html">Password Storage Cheat Sheet</a></li>
|
||||
</references>
|
||||
|
||||
</qhelp>
|
||||
48
python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.ql
Normal file
48
python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.ql
Normal file
@@ -0,0 +1,48 @@
|
||||
/**
|
||||
* @name Use of a broken or weak cryptographic hashing algorithm on sensitive data
|
||||
* @description Using broken or weak cryptographic hashing algorithms can compromise security.
|
||||
* @kind path-problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 5.9
|
||||
* @precision high
|
||||
* @id py/weak-sensitive-data-hashing
|
||||
* @tags security
|
||||
* external/cwe/cwe-327
|
||||
* external/cwe/cwe-916
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.dataflow.WeakSensitiveDataHashing
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
DataFlow::PathNode source, DataFlow::PathNode sink, string ending, string algorithmName,
|
||||
string classification
|
||||
where
|
||||
exists(NormalHashFunction::Configuration config |
|
||||
config.hasFlowPath(source, sink) and
|
||||
algorithmName = sink.getNode().(NormalHashFunction::Sink).getAlgorithmName() and
|
||||
classification = source.getNode().(NormalHashFunction::Source).getClassification() and
|
||||
ending = "."
|
||||
)
|
||||
or
|
||||
exists(ComputationallyExpensiveHashFunction::Configuration config |
|
||||
config.hasFlowPath(source, sink) and
|
||||
algorithmName = sink.getNode().(ComputationallyExpensiveHashFunction::Sink).getAlgorithmName() and
|
||||
classification =
|
||||
source.getNode().(ComputationallyExpensiveHashFunction::Source).getClassification() and
|
||||
(
|
||||
sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
|
||||
ending = "."
|
||||
or
|
||||
not sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
|
||||
ending =
|
||||
" for " + classification +
|
||||
" hashing, since it is not a computationally expensive hash function."
|
||||
)
|
||||
)
|
||||
select sink.getNode(), source, sink,
|
||||
"$@ is used in a hashing algorithm (" + algorithmName + ") that is insecure" + ending,
|
||||
source.getNode(), "Sensitive data (" + classification + ")"
|
||||
@@ -0,0 +1,9 @@
|
||||
import hashlib
|
||||
|
||||
def certificate_matches_known_hash_bad(certificate, known_hash):
|
||||
hash = hashlib.md5(certificate).hexdigest() # BAD
|
||||
return hash == known_hash
|
||||
|
||||
def certificate_matches_known_hash_good(certificate, known_hash):
|
||||
hash = hashlib.sha256(certificate).hexdigest() # GOOD
|
||||
return hash == known_hash
|
||||
@@ -0,0 +1,4 @@
|
||||
import hashlib
|
||||
|
||||
def get_password_hash(password: str, salt: str):
|
||||
return hashlib.sha256(password + salt).hexdigest() # BAD
|
||||
@@ -0,0 +1,9 @@
|
||||
from argon2 import PasswordHasher
|
||||
|
||||
def get_initial_hash(password: str):
|
||||
ph = PasswordHasher()
|
||||
return ph.hash(password) # GOOD
|
||||
|
||||
def check_password(password: str, known_hash):
|
||||
ph = PasswordHasher()
|
||||
return ph.verify(known_hash, password) # GOOD
|
||||
@@ -4,6 +4,7 @@
|
||||
* @kind problem
|
||||
* @id py/insecure-temporary-file
|
||||
* @problem.severity error
|
||||
* @security-severity 5.9
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @tags external/cwe/cwe-377
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* @kind path-problem
|
||||
* @id py/unsafe-deserialization
|
||||
* @problem.severity error
|
||||
* @security-severity 5.9
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @tags external/cwe/cwe-502
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* may cause redirection to malicious web sites.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 2.7
|
||||
* @sub-severity low
|
||||
* @id py/url-redirection
|
||||
* @tags security
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* @kind problem
|
||||
* @id py/overly-permissive-file
|
||||
* @problem.severity warning
|
||||
* @security-severity 5.9
|
||||
* @sub-severity high
|
||||
* @precision medium
|
||||
* @tags external/cwe/cwe-732
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Credentials are hard coded in the source code of the application.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 5.9
|
||||
* @precision medium
|
||||
* @id py/hardcoded-credentials
|
||||
* @tags security
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
* @tags security
|
||||
* correctness
|
||||
* @problem.severity error
|
||||
* @security-severity 4.2
|
||||
* @sub-severity high
|
||||
* @precision low
|
||||
* @id py/use-of-exec
|
||||
|
||||
13
python/ql/src/Summary/LinesOfCode.ql
Normal file
13
python/ql/src/Summary/LinesOfCode.ql
Normal file
@@ -0,0 +1,13 @@
|
||||
/**
|
||||
* @name Total lines of Python code in the database
|
||||
* @description The total number of lines of Python code across all files, including
|
||||
* external libraries and auto-generated files. This is a useful metric of the size of a
|
||||
* database. This query counts the lines of code, excluding whitespace or comments.
|
||||
* @kind metric
|
||||
* @tags summary
|
||||
* @id py/summary/lines-of-code
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
select sum(Module m | | m.getMetrics().getNumberOfLinesOfCode())
|
||||
22
python/ql/src/Summary/LinesOfUserCode.ql
Normal file
22
python/ql/src/Summary/LinesOfUserCode.ql
Normal file
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* @name Total lines of user written Python code in the database
|
||||
* @description The total number of lines of Python code from the source code directory,
|
||||
* excluding auto-generated files. This query counts the lines of code, excluding
|
||||
* whitespace or comments. Note: If external libraries are included in the codebase
|
||||
* either in a checked-in virtual environment or as vendored code, that will currently
|
||||
* be counted as user written code.
|
||||
* @kind metric
|
||||
* @tags summary
|
||||
* lines-of-code
|
||||
* @id py/summary/lines-of-user-code
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.filters.GeneratedCode
|
||||
|
||||
select sum(Module m |
|
||||
exists(m.getFile().getRelativePath()) and
|
||||
not m.getFile() instanceof GeneratedFile
|
||||
|
|
||||
m.getMetrics().getNumberOfLinesOfCode()
|
||||
)
|
||||
@@ -26,9 +26,8 @@ variable should be renamed to make the code easier to interpret.</p>
|
||||
</example>
|
||||
<references>
|
||||
|
||||
<li>J. Lusth, <i>The Art and Craft of Programming - Python Edition</i>, Section: Scope. University of Alabama, 2012. (<a href="http://troll.cs.ua.edu/ACP-PY/index_13.html">Published online</a>).</li>
|
||||
<li>New Mexico Tech Computer Center: <a href="http://infohost.nmt.edu/tcc/help/pubs/python/web/global-statement.html">The global
|
||||
statement: Declare access to a global name</a>.</li>
|
||||
<li>J. Lusth, <i>The Art and Craft of Programming - Python Edition</i>, Section: Scope. University of Alabama, 2012. (<a href="https://web.archive.org/web/20190919091129/http://troll.cs.ua.edu/ACP-PY/index_13.html">Published online</a>).</li>
|
||||
<li>Python Language Reference: <a href="http://docs.python.org/reference/simple_stmts.html#the-global-statement">The global statement</a>.</li>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
|
||||
<overview>
|
||||
<p>A class name that begins with a lowercase letter does not follow standard
|
||||
naming conventions. This decreases code readability. For example, <code>class background</code>.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
Write the class name beginning with an uppercase letter. For example, <code>class Background</code>.
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
|
||||
<references>
|
||||
|
||||
<li>
|
||||
Guido van Rossum, Barry Warsaw, Nick Coghlan <em>PEP 8 -- Style Guide for Python Code</em>
|
||||
<a href="https://www.python.org/dev/peps/pep-0008/#class-names">Python Class Names</a>
|
||||
</li>
|
||||
|
||||
</references>
|
||||
|
||||
</qhelp>
|
||||
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* @name Misnamed class
|
||||
* @description A class name that begins with a lowercase letter decreases readability.
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id py/misnamed-class
|
||||
* @tags maintainability
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
predicate lower_case_class(Class c) {
|
||||
exists(string first_char |
|
||||
first_char = c.getName().prefix(1) and
|
||||
not first_char = first_char.toUpperCase()
|
||||
)
|
||||
}
|
||||
|
||||
from Class c
|
||||
where
|
||||
c.inSource() and
|
||||
lower_case_class(c) and
|
||||
not exists(Class c1 |
|
||||
c1 != c and
|
||||
c1.getLocation().getFile() = c.getLocation().getFile() and
|
||||
lower_case_class(c1)
|
||||
)
|
||||
select c, "Class names should start in uppercase."
|
||||
@@ -0,0 +1,30 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
|
||||
<overview>
|
||||
<p>A function name that begins with an uppercase letter does not follow standard
|
||||
naming conventions. This decreases code readability. For example, <code>Jump</code>.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
Write the function name beginning with an lowercase letter. For example, <code>jump</code>.
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
|
||||
<references>
|
||||
|
||||
<li>
|
||||
Guido van Rossum, Barry Warsaw, Nick Coghlan <em>PEP 8 -- Style Guide for Python Code</em>
|
||||
<a href="https://www.python.org/dev/peps/pep-0008/#function-and-variable-names">Python Function and Variable Names</a>
|
||||
</li>
|
||||
|
||||
</references>
|
||||
|
||||
</qhelp>
|
||||
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* @name Misnamed function
|
||||
* @description A function name that begins with an uppercase letter decreases readability.
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id py/misnamed-function
|
||||
* @tags maintainability
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
predicate upper_case_function(Function func) {
|
||||
exists(string first_char |
|
||||
first_char = func.getName().prefix(1) and
|
||||
not first_char = first_char.toLowerCase()
|
||||
)
|
||||
}
|
||||
|
||||
from Function func
|
||||
where
|
||||
func.inSource() and
|
||||
upper_case_function(func) and
|
||||
not exists(Function func1 |
|
||||
func1 != func and
|
||||
func1.getLocation().getFile() = func.getLocation().getFile() and
|
||||
upper_case_function(func1)
|
||||
)
|
||||
select func, "Function names should start in lowercase."
|
||||
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* @name OLD QUERY: Use of a broken or weak cryptographic algorithm
|
||||
* @description Using broken or weak cryptographic algorithms can compromise security.
|
||||
* @kind path-problem
|
||||
* @problem.severity warning
|
||||
* @id py/old/weak-cryptographic-algorithm
|
||||
* @deprecated
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.security.SensitiveData
|
||||
import semmle.python.security.Crypto
|
||||
|
||||
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
|
||||
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
|
||||
|
||||
override predicate isSource(TaintTracking::Source source) {
|
||||
source instanceof SensitiveDataSource
|
||||
}
|
||||
|
||||
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
|
||||
}
|
||||
|
||||
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
|
||||
where config.hasFlowPath(src, sink)
|
||||
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
|
||||
src.getSource(), "Sensitive data"
|
||||
@@ -0,0 +1,50 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>If an LDAP query or DN is built using string concatenation or string formatting, and the
|
||||
components of the concatenation include user input without any proper sanitization, a user
|
||||
is likely to be able to run malicious LDAP queries.</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>If user input must be included in an LDAP query or DN, it should be escaped to
|
||||
avoid a malicious user providing special characters that change the meaning
|
||||
of the query. In Python2, user input should be escaped with <code>ldap.dn.escape_dn_chars</code>
|
||||
or <code>ldap.filter.escape_filter_chars</code>, while in Python3, user input should be escaped with
|
||||
<code>ldap3.utils.dn.escape_rdn</code> or <code>ldap3.utils.conv.escape_filter_chars</code>
|
||||
depending on the component tainted by the user. A good practice is to escape filter characters
|
||||
that could change the meaning of the query (https://tools.ietf.org/search/rfc4515#section-3).</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>In the following examples, the code accepts both <code>username</code> and <code>dc</code> from the user,
|
||||
which it then uses to build a LDAP query and DN.</p>
|
||||
|
||||
<p>The first and the second example uses the unsanitized user input directly
|
||||
in the search filter and DN for the LDAP query.
|
||||
A malicious user could provide special characters to change the meaning of these
|
||||
components, and search for a completely different set of values.</p>
|
||||
|
||||
<sample src="examples/example_bad1.py" />
|
||||
<sample src="examples/example_bad2.py" />
|
||||
|
||||
<p>In the third and four example, the input provided by the user is sanitized before it is included in the search filter or DN.
|
||||
This ensures the meaning of the query cannot be changed by a malicious user.</p>
|
||||
|
||||
<sample src="examples/example_good1.py" />
|
||||
<sample src="examples/example_good2.py" />
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/LDAP_Injection_Prevention_Cheat_Sheet.html">LDAP Injection Prevention Cheat Sheet</a>.</li>
|
||||
<li>OWASP: <a href="https://owasp.org/www-community/attacks/LDAP_Injection">LDAP Injection</a>.</li>
|
||||
<li>SonarSource: <a href="https://rules.sonarsource.com/python/RSPEC-2078">RSPEC-2078</a>.</li>
|
||||
<li>Python2: <a href="https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html">LDAP Documentation</a>.</li>
|
||||
<li>Python3: <a href="https://ldap3.readthedocs.io/en/latest/">LDAP Documentation</a>.</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/LDAP_injection">LDAP injection</a>.</li>
|
||||
<li>BlackHat: <a href="https://www.blackhat.com/presentations/bh-europe-08/Alonso-Parada/Whitepaper/bh-eu-08-alonso-parada-WP.pdf">LDAP Injection and Blind LDAP Injection</a>.</li>
|
||||
<li>LDAP: <a href="https://ldap.com/2018/05/04/understanding-and-defending-against-ldap-injection-attacks/">Understanding and Defending Against LDAP Injection Attacks</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
21
python/ql/src/experimental/Security/CWE-090/LDAPInjection.ql
Normal file
21
python/ql/src/experimental/Security/CWE-090/LDAPInjection.ql
Normal file
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* @name LDAP query built from user-controlled sources
|
||||
* @description Building an LDAP query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious LDAP code by the user.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/ldap-injection
|
||||
* @tags experimental
|
||||
* security
|
||||
* external/cwe/cwe-090
|
||||
*/
|
||||
|
||||
// Determine precision above
|
||||
import python
|
||||
import experimental.semmle.python.security.injection.LDAP
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from LDAPInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "$@ LDAP query parameter comes from $@.", sink.getNode(),
|
||||
"This", source.getNode(), "a user-provided value"
|
||||
@@ -0,0 +1,15 @@
|
||||
from flask import request, Flask
|
||||
import ldap
|
||||
|
||||
|
||||
@app.route("/normal")
|
||||
def normal():
|
||||
unsafe_dc = request.args['dc']
|
||||
unsafe_filter = request.args['username']
|
||||
|
||||
dn = "dc={}".format(unsafe_dc)
|
||||
search_filter = "(user={})".format(unsafe_filter)
|
||||
|
||||
ldap_connection = ldap.initialize("ldap://127.0.0.1")
|
||||
user = ldap_connection.search_s(
|
||||
dn, ldap.SCOPE_SUBTREE, search_filter)
|
||||
@@ -0,0 +1,15 @@
|
||||
from flask import request, Flask
|
||||
import ldap3
|
||||
|
||||
|
||||
@app.route("/normal")
|
||||
def normal():
|
||||
unsafe_dc = request.args['dc']
|
||||
unsafe_filter = request.args['username']
|
||||
|
||||
dn = "dc={}".format(unsafe_dc)
|
||||
search_filter = "(user={})".format(unsafe_filter)
|
||||
|
||||
srv = ldap3.Server('ldap://127.0.0.1')
|
||||
conn = ldap3.Connection(srv, user=dn, auto_bind=True)
|
||||
conn.search(dn, search_filter)
|
||||
@@ -0,0 +1,20 @@
|
||||
from flask import request, Flask
|
||||
import ldap
|
||||
import ldap.filter
|
||||
import ldap.dn
|
||||
|
||||
|
||||
@app.route("/normal")
|
||||
def normal():
|
||||
unsafe_dc = request.args['dc']
|
||||
unsafe_filter = request.args['username']
|
||||
|
||||
safe_dc = ldap.dn.escape_dn_chars(unsafe_dc)
|
||||
safe_filter = ldap.filter.escape_filter_chars(unsafe_filter)
|
||||
|
||||
dn = "dc={}".format(safe_dc)
|
||||
search_filter = "(user={})".format(safe_filter)
|
||||
|
||||
ldap_connection = ldap.initialize("ldap://127.0.0.1")
|
||||
user = ldap_connection.search_s(
|
||||
dn, ldap.SCOPE_SUBTREE, search_filter)
|
||||
@@ -0,0 +1,20 @@
|
||||
from flask import request, Flask
|
||||
import ldap3
|
||||
from ldap3.utils.dn import escape_rdn
|
||||
from ldap3.utils.conv import escape_filter_chars
|
||||
|
||||
|
||||
@app.route("/normal")
|
||||
def normal():
|
||||
unsafe_dc = request.args['dc']
|
||||
unsafe_filter = request.args['username']
|
||||
|
||||
safe_dc = escape_rdn(unsafe_dc)
|
||||
safe_filter = escape_filter_chars(unsafe_filter)
|
||||
|
||||
dn = "dc={}".format(safe_dc)
|
||||
search_filter = "(user={})".format(safe_filter)
|
||||
|
||||
srv = ldap3.Server('ldap://127.0.0.1')
|
||||
conn = ldap3.Connection(srv, user=dn, auto_bind=True)
|
||||
conn.search(dn, search_filter)
|
||||
@@ -0,0 +1,45 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>
|
||||
Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
|
||||
be able to modify the meaning of the expression. In particular, such a user may be able to provide
|
||||
a regular expression fragment that takes exponential time in the worst case, and use that to
|
||||
perform a Denial of Service attack.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
Before embedding user input into a regular expression, use a sanitization function such as
|
||||
<code>re.escape</code> to escape meta-characters that have a special meaning regarding
|
||||
regular expressions' syntax.
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
The following examples are based on a simple Flask web server environment.
|
||||
</p>
|
||||
<p>
|
||||
The following example shows a HTTP request parameter that is used to construct a regular expression
|
||||
without sanitizing it first:
|
||||
</p>
|
||||
<sample src="re_bad.py" />
|
||||
<p>
|
||||
Instead, the request parameter should be sanitized first, for example using the function
|
||||
<code>re.escape</code>. This ensures that the user cannot insert characters which have a
|
||||
special meaning in regular expressions.
|
||||
</p>
|
||||
<sample src="re_good.py" />
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
|
||||
<li>Python docs: <a href="https://docs.python.org/3/library/re.html">re</a>.</li>
|
||||
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2631">RSPEC-2631</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* @name Regular expression injection
|
||||
* @description User input should not be used in regular expressions without first being escaped,
|
||||
* otherwise a malicious user may be able to inject an expression that could require
|
||||
* exponential time on certain inputs.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/regex-injection
|
||||
* @tags security
|
||||
* external/cwe/cwe-730
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
// determine precision above
|
||||
import python
|
||||
import experimental.semmle.python.security.injection.RegexInjection
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
|
||||
RegexInjectionSink regexInjectionSink, Attribute methodAttribute
|
||||
where
|
||||
config.hasFlowPath(source, sink) and
|
||||
regexInjectionSink = sink.getNode() and
|
||||
methodAttribute = regexInjectionSink.getRegexMethod()
|
||||
select sink.getNode(), source, sink,
|
||||
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
|
||||
source.getNode(), "user-provided value", methodAttribute,
|
||||
regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()
|
||||
15
python/ql/src/experimental/Security/CWE-730/re_bad.py
Normal file
15
python/ql/src/experimental/Security/CWE-730/re_bad.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from flask import request, Flask
|
||||
import re
|
||||
|
||||
|
||||
@app.route("/direct")
|
||||
def direct():
|
||||
unsafe_pattern = request.args["pattern"]
|
||||
re.search(unsafe_pattern, "")
|
||||
|
||||
|
||||
@app.route("/compile")
|
||||
def compile():
|
||||
unsafe_pattern = request.args["pattern"]
|
||||
compiled_pattern = re.compile(unsafe_pattern)
|
||||
compiled_pattern.search("")
|
||||
17
python/ql/src/experimental/Security/CWE-730/re_good.py
Normal file
17
python/ql/src/experimental/Security/CWE-730/re_good.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from flask import request, Flask
|
||||
import re
|
||||
|
||||
|
||||
@app.route("/direct")
|
||||
def direct():
|
||||
unsafe_pattern = request.args['pattern']
|
||||
safe_pattern = re.escape(unsafe_pattern)
|
||||
re.search(safe_pattern, "")
|
||||
|
||||
|
||||
@app.route("/compile")
|
||||
def compile():
|
||||
unsafe_pattern = request.args['pattern']
|
||||
safe_pattern = re.escape(unsafe_pattern)
|
||||
compiled_pattern = re.compile(safe_pattern)
|
||||
compiled_pattern.search("")
|
||||
@@ -14,6 +14,139 @@ private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import experimental.semmle.python.Frameworks
|
||||
|
||||
/** Provides classes for modeling Regular Expression-related APIs. */
|
||||
module RegexExecution {
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RegexExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the executed expression.
|
||||
*/
|
||||
abstract DataFlow::Node getRegexNode();
|
||||
|
||||
/**
|
||||
* Gets the library used to execute the regular expression.
|
||||
*/
|
||||
abstract string getRegexModule();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegexExecution::Range` instead.
|
||||
*/
|
||||
class RegexExecution extends DataFlow::Node {
|
||||
RegexExecution::Range range;
|
||||
|
||||
RegexExecution() { this = range }
|
||||
|
||||
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
|
||||
|
||||
string getRegexModule() { result = range.getRegexModule() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling Regular Expression escape-related APIs. */
|
||||
module RegexEscape {
|
||||
/**
|
||||
* A data-flow node that escapes a regular expression.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RegexEscape` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the escaped expression.
|
||||
*/
|
||||
abstract DataFlow::Node getRegexNode();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that escapes a regular expression.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegexEscape::Range` instead.
|
||||
*/
|
||||
class RegexEscape extends DataFlow::Node {
|
||||
RegexEscape::Range range;
|
||||
|
||||
RegexEscape() { this = range }
|
||||
|
||||
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling LDAP query execution-related APIs. */
|
||||
module LDAPQuery {
|
||||
/**
|
||||
* A data-flow node that collects methods executing a LDAP query.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `LDAPQuery` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the executed expression.
|
||||
*/
|
||||
abstract DataFlow::Node getQuery();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that collect methods executing a LDAP query.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `LDAPQuery::Range` instead.
|
||||
*/
|
||||
class LDAPQuery extends DataFlow::Node {
|
||||
LDAPQuery::Range range;
|
||||
|
||||
LDAPQuery() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the argument containing the executed expression.
|
||||
*/
|
||||
DataFlow::Node getQuery() { result = range.getQuery() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling LDAP components escape-related APIs. */
|
||||
module LDAPEscape {
|
||||
/**
|
||||
* A data-flow node that collects functions escaping LDAP components.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `LDAPEscape` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the escaped expression.
|
||||
*/
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that collects functions escaping LDAP components.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `LDAPEscape::Range` instead.
|
||||
*/
|
||||
class LDAPEscape extends DataFlow::Node {
|
||||
LDAPEscape::Range range;
|
||||
|
||||
LDAPEscape() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the argument containing the escaped expression.
|
||||
*/
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling HTTP Header APIs. */
|
||||
module HeaderDeclaration {
|
||||
/**
|
||||
|
||||
@@ -6,3 +6,4 @@ private import experimental.semmle.python.frameworks.Stdlib
|
||||
private import experimental.semmle.python.frameworks.Flask
|
||||
private import experimental.semmle.python.frameworks.Django
|
||||
private import experimental.semmle.python.frameworks.Werkzeug
|
||||
private import experimental.semmle.python.frameworks.LDAP
|
||||
|
||||
153
python/ql/src/experimental/semmle/python/frameworks/LDAP.qll
Normal file
153
python/ql/src/experimental/semmle/python/frameworks/LDAP.qll
Normal file
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the LDAP libraries.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for Python's ldap-related libraries.
|
||||
*/
|
||||
private module LDAP {
|
||||
/**
|
||||
* Provides models for the `python-ldap` PyPI package (imported as `ldap`).
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/index.html
|
||||
*/
|
||||
private module LDAP2 {
|
||||
/**
|
||||
* List of `ldap` methods used to execute a query.
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
|
||||
*/
|
||||
private class LDAP2QueryMethods extends string {
|
||||
LDAP2QueryMethods() {
|
||||
this in ["search", "search_s", "search_st", "search_ext", "search_ext_s"]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `ldap` methods executing a query.
|
||||
*
|
||||
* See `LDAP2QueryMethods`
|
||||
*/
|
||||
private class LDAP2Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
|
||||
DataFlow::Node ldapQuery;
|
||||
|
||||
LDAP2Query() {
|
||||
exists(DataFlow::AttrRead searchMethod |
|
||||
this.getFunction() = searchMethod and
|
||||
API::moduleImport("ldap").getMember("initialize").getACall() =
|
||||
searchMethod.getObject().getALocalSource() and
|
||||
searchMethod.getAttributeName() instanceof LDAP2QueryMethods and
|
||||
(
|
||||
ldapQuery = this.getArg(0)
|
||||
or
|
||||
(
|
||||
ldapQuery = this.getArg(2) or
|
||||
ldapQuery = this.getArgByName("filterstr")
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getQuery() { result = ldapQuery }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find calls to `ldap.dn.escape_dn_chars`.
|
||||
*
|
||||
* See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
|
||||
*/
|
||||
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
|
||||
LDAP2EscapeDNCall() {
|
||||
this = API::moduleImport("ldap").getMember("dn").getMember("escape_dn_chars").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find calls to `ldap.filter.escape_filter_chars`.
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap-filter.html#ldap.filter.escape_filter_chars
|
||||
*/
|
||||
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
|
||||
LDAP2EscapeFilterCall() {
|
||||
this =
|
||||
API::moduleImport("ldap").getMember("filter").getMember("escape_filter_chars").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides models for the `ldap3` PyPI package
|
||||
*
|
||||
* See https://pypi.org/project/ldap3/
|
||||
*/
|
||||
private module LDAP3 {
|
||||
/**
|
||||
* A class to find `ldap3` methods executing a query.
|
||||
*/
|
||||
private class LDAP3Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
|
||||
DataFlow::Node ldapQuery;
|
||||
|
||||
LDAP3Query() {
|
||||
exists(DataFlow::AttrRead searchMethod |
|
||||
this.getFunction() = searchMethod and
|
||||
API::moduleImport("ldap3").getMember("Connection").getACall() =
|
||||
searchMethod.getObject().getALocalSource() and
|
||||
searchMethod.getAttributeName() = "search" and
|
||||
(
|
||||
ldapQuery = this.getArg(0) or
|
||||
ldapQuery = this.getArg(1)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getQuery() { result = ldapQuery }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find calls to `ldap3.utils.dn.escape_rdn`.
|
||||
*
|
||||
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
|
||||
*/
|
||||
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
|
||||
LDAP3EscapeDNCall() {
|
||||
this =
|
||||
API::moduleImport("ldap3")
|
||||
.getMember("utils")
|
||||
.getMember("dn")
|
||||
.getMember("escape_rdn")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find calls to `ldap3.utils.conv.escape_filter_chars`.
|
||||
*
|
||||
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/conv.py#L91
|
||||
*/
|
||||
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
|
||||
LDAP3EscapeFilterCall() {
|
||||
this =
|
||||
API::moduleImport("ldap3")
|
||||
.getMember("utils")
|
||||
.getMember("conv")
|
||||
.getMember("escape_filter_chars")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,3 +9,92 @@ private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for Python's `re` library.
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html
|
||||
*/
|
||||
private module Re {
|
||||
/**
|
||||
* List of `re` methods immediately executing an expression.
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#module-contents
|
||||
*/
|
||||
private class RegexExecutionMethods extends string {
|
||||
RegexExecutionMethods() {
|
||||
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods immediately executing an expression.
|
||||
*
|
||||
* See `RegexExecutionMethods`
|
||||
*/
|
||||
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
DirectRegex() {
|
||||
this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and
|
||||
regexNode = this.getArg(0)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
|
||||
override string getRegexModule() { result = "re" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
|
||||
*
|
||||
* Given the following example:
|
||||
*
|
||||
* ```py
|
||||
* pattern = re.compile(input)
|
||||
* pattern.match(s)
|
||||
* ```
|
||||
*
|
||||
* This class will identify that `re.compile` compiles `input` and afterwards
|
||||
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
|
||||
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
|
||||
*
|
||||
*
|
||||
* See `RegexExecutionMethods`
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#regular-expression-objects
|
||||
*/
|
||||
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
CompiledRegex() {
|
||||
exists(DataFlow::CallCfgNode patternCall, DataFlow::AttrRead reMethod |
|
||||
this.getFunction() = reMethod and
|
||||
patternCall = API::moduleImport("re").getMember("compile").getACall() and
|
||||
patternCall.flowsTo(reMethod.getObject()) and
|
||||
reMethod.getAttributeName() instanceof RegexExecutionMethods and
|
||||
regexNode = patternCall.getArg(0)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
|
||||
override string getRegexModule() { result = "re" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods escaping an expression.
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#re.escape
|
||||
*/
|
||||
class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
ReEscape() {
|
||||
this = API::moduleImport("re").getMember("escape").getACall() and
|
||||
regexNode = this.getArg(0)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting LDAP injection vulnerabilities
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting LDAP injections.
|
||||
*/
|
||||
class LDAPInjectionFlowConfig extends TaintTracking::Configuration {
|
||||
LDAPInjectionFlowConfig() { this = "LDAPInjectionFlowConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink = any(LDAPQuery ldapQuery).getQuery() }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) {
|
||||
sanitizer = any(LDAPEscape ldapEsc).getAnInput()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting regular expression injection
|
||||
* vulnerabilities.
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
|
||||
/**
|
||||
* A class to find methods executing regular expressions.
|
||||
*
|
||||
* See `RegexExecution`
|
||||
*/
|
||||
class RegexInjectionSink extends DataFlow::Node {
|
||||
string regexModule;
|
||||
Attribute regexMethod;
|
||||
|
||||
RegexInjectionSink() {
|
||||
exists(RegexExecution reExec |
|
||||
this = reExec.getRegexNode() and
|
||||
regexModule = reExec.getRegexModule() and
|
||||
regexMethod = reExec.(DataFlow::CallCfgNode).getFunction().asExpr().(Attribute)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the argument containing the executed expression.
|
||||
*/
|
||||
string getRegexModule() { result = regexModule }
|
||||
|
||||
/**
|
||||
* Gets the method used to execute the regular expression.
|
||||
*/
|
||||
Attribute getRegexMethod() { result = regexMethod }
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting regular expression injections.
|
||||
*/
|
||||
class RegexInjectionFlowConfig extends TaintTracking::Configuration {
|
||||
RegexInjectionFlowConfig() { this = "RegexInjectionFlowConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) {
|
||||
sanitizer = any(RegexEscape reEscape).getRegexNode()
|
||||
}
|
||||
}
|
||||
@@ -25,7 +25,7 @@ duplicate classes.</p>
|
||||
</recommendation>
|
||||
<references>
|
||||
|
||||
<li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="http://www4.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
|
||||
<li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="https://wwwbroy.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
|
||||
@@ -25,7 +25,7 @@ importing that module into the original module.</p>
|
||||
</recommendation>
|
||||
<references>
|
||||
|
||||
<li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="http://www4.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
|
||||
<li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="https://wwwbroy.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
|
||||
@@ -19,7 +19,7 @@ of the shared code into its own module and import that module into the original.
|
||||
</recommendation>
|
||||
<references>
|
||||
|
||||
<li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="http://www4.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
|
||||
<li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="https://wwwbroy.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
|
||||
2
python/ql/src/external/SimilarFunction.qhelp
vendored
2
python/ql/src/external/SimilarFunction.qhelp
vendored
@@ -25,7 +25,7 @@ almost all of their lines are the same, then consider extracting the same lines
|
||||
</recommendation>
|
||||
<references>
|
||||
|
||||
<li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="http://www4.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
|
||||
<li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="https://wwwbroy.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
|
||||
@@ -1,174 +1,3 @@
|
||||
/**
|
||||
* Provides classes modeling cryptographic algorithms, separated into strong and weak variants.
|
||||
*
|
||||
* The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
|
||||
*/
|
||||
/** DEPRECATED: Use `semmle.python.concepts.CryptoAlgorithms` instead. */
|
||||
|
||||
/**
|
||||
* Names of cryptographic algorithms, separated into strong and weak variants.
|
||||
*
|
||||
* The names are normalized: upper-case, no spaces, dashes or underscores.
|
||||
*
|
||||
* The names are inspired by the names used in real world crypto libraries.
|
||||
*
|
||||
* The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
|
||||
*/
|
||||
private module AlgorithmNames {
|
||||
predicate isStrongHashingAlgorithm(string name) {
|
||||
name = "DSA" or
|
||||
name = "ED25519" or
|
||||
name = "ES256" or
|
||||
name = "ECDSA256" or
|
||||
name = "ES384" or
|
||||
name = "ECDSA384" or
|
||||
name = "ES512" or
|
||||
name = "ECDSA512" or
|
||||
name = "SHA2" or
|
||||
name = "SHA224" or
|
||||
name = "SHA256" or
|
||||
name = "SHA384" or
|
||||
name = "SHA512" or
|
||||
name = "SHA3"
|
||||
}
|
||||
|
||||
predicate isWeakHashingAlgorithm(string name) {
|
||||
name = "HAVEL128" or
|
||||
name = "MD2" or
|
||||
name = "MD4" or
|
||||
name = "MD5" or
|
||||
name = "PANAMA" or
|
||||
name = "RIPEMD" or
|
||||
name = "RIPEMD128" or
|
||||
name = "RIPEMD256" or
|
||||
name = "RIPEMD160" or
|
||||
name = "RIPEMD320" or
|
||||
name = "SHA0" or
|
||||
name = "SHA1"
|
||||
}
|
||||
|
||||
predicate isStrongEncryptionAlgorithm(string name) {
|
||||
name = "AES" or
|
||||
name = "AES128" or
|
||||
name = "AES192" or
|
||||
name = "AES256" or
|
||||
name = "AES512" or
|
||||
name = "RSA" or
|
||||
name = "RABBIT" or
|
||||
name = "BLOWFISH"
|
||||
}
|
||||
|
||||
predicate isWeakEncryptionAlgorithm(string name) {
|
||||
name = "DES" or
|
||||
name = "3DES" or
|
||||
name = "TRIPLEDES" or
|
||||
name = "TDEA" or
|
||||
name = "TRIPLEDEA" or
|
||||
name = "ARC2" or
|
||||
name = "RC2" or
|
||||
name = "ARC4" or
|
||||
name = "RC4" or
|
||||
name = "ARCFOUR" or
|
||||
name = "ARC5" or
|
||||
name = "RC5"
|
||||
}
|
||||
|
||||
predicate isStrongPasswordHashingAlgorithm(string name) {
|
||||
name = "ARGON2" or
|
||||
name = "PBKDF2" or
|
||||
name = "BCRYPT" or
|
||||
name = "SCRYPT"
|
||||
}
|
||||
|
||||
predicate isWeakPasswordHashingAlgorithm(string name) { none() }
|
||||
}
|
||||
|
||||
private import AlgorithmNames
|
||||
|
||||
/**
|
||||
* A cryptographic algorithm.
|
||||
*/
|
||||
private newtype TCryptographicAlgorithm =
|
||||
MkHashingAlgorithm(string name, boolean isWeak) {
|
||||
isStrongHashingAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakHashingAlgorithm(name) and isWeak = true
|
||||
} or
|
||||
MkEncryptionAlgorithm(string name, boolean isWeak) {
|
||||
isStrongEncryptionAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakEncryptionAlgorithm(name) and isWeak = true
|
||||
} or
|
||||
MkPasswordHashingAlgorithm(string name, boolean isWeak) {
|
||||
isStrongPasswordHashingAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakPasswordHashingAlgorithm(name) and isWeak = true
|
||||
}
|
||||
|
||||
/**
|
||||
* A cryptographic algorithm.
|
||||
*/
|
||||
abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = getName() }
|
||||
|
||||
/**
|
||||
* Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores).
|
||||
*/
|
||||
abstract string getName();
|
||||
|
||||
/**
|
||||
* Holds if the name of this algorithm matches `name` modulo case,
|
||||
* white space, dashes, and underscores.
|
||||
*/
|
||||
bindingset[name]
|
||||
predicate matchesName(string name) {
|
||||
name.toUpperCase().regexpReplaceAll("[-_ ]", "") = getName()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this algorithm is weak.
|
||||
*/
|
||||
abstract predicate isWeak();
|
||||
}
|
||||
|
||||
/**
|
||||
* A hashing algorithm such as `MD5` or `SHA512`.
|
||||
*/
|
||||
class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
|
||||
/**
|
||||
* An encryption algorithm such as `DES` or `AES512`.
|
||||
*/
|
||||
class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
|
||||
/**
|
||||
* A password hashing algorithm such as `PBKDF2` or `SCRYPT`.
|
||||
*/
|
||||
class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
import semmle.python.concepts.CryptoAlgorithms
|
||||
|
||||
@@ -97,6 +97,11 @@ module API {
|
||||
*/
|
||||
Node getASubclass() { result = getASuccessor(Label::subclass()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing the result from awaiting this node.
|
||||
*/
|
||||
Node getAwaited() { result = getASuccessor(Label::await()) }
|
||||
|
||||
/**
|
||||
* Gets a string representation of the lexicographically least among all shortest access paths
|
||||
* from the root to this node.
|
||||
@@ -349,22 +354,95 @@ module API {
|
||||
)
|
||||
}
|
||||
|
||||
private import semmle.python.types.Builtins as Builtins
|
||||
/** Gets the name of a known built-in. */
|
||||
private string getBuiltInName() {
|
||||
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
|
||||
// Python 3 and 2 respectively, using the `dir` built-in.
|
||||
// Built-in functions and exceptions shared between Python 2 and 3
|
||||
result in [
|
||||
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
|
||||
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
|
||||
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
|
||||
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
|
||||
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
|
||||
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
|
||||
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
|
||||
// Exceptions
|
||||
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
|
||||
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
|
||||
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
|
||||
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
|
||||
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
|
||||
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
|
||||
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
|
||||
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
|
||||
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
|
||||
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
|
||||
// Added for compatibility
|
||||
"exec"
|
||||
]
|
||||
or
|
||||
// Built-in constants shared between Python 2 and 3
|
||||
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
|
||||
or
|
||||
// Python 3 only
|
||||
result in [
|
||||
"ascii", "breakpoint", "bytes", "exec",
|
||||
// Exceptions
|
||||
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
|
||||
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
|
||||
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
|
||||
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
|
||||
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
|
||||
]
|
||||
or
|
||||
// Python 2 only
|
||||
result in [
|
||||
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
|
||||
"unichr", "unicode", "xrange"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
|
||||
*
|
||||
* Currently this is an over-approximation, and does not account for things like overwriting a
|
||||
* Currently this is an over-approximation, and may not account for things like overwriting a
|
||||
* built-in with a different value.
|
||||
*/
|
||||
private DataFlow::Node likely_builtin(string name) {
|
||||
result.asCfgNode() =
|
||||
any(NameNode n |
|
||||
n.isGlobal() and
|
||||
n.isLoad() and
|
||||
name = n.getId() and
|
||||
name = any(Builtins::Builtin b).getName()
|
||||
)
|
||||
exists(Module m |
|
||||
result.asCfgNode() =
|
||||
any(NameNode n |
|
||||
possible_builtin_accessed_in_module(n, name, m) and
|
||||
not possible_builtin_defined_in_module(name, m)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
|
||||
* a value in the module `m`.
|
||||
*/
|
||||
private predicate possible_builtin_defined_in_module(string name, Module m) {
|
||||
exists(NameNode n |
|
||||
not exists(LocalVariable v | n.defines(v)) and
|
||||
n.isStore() and
|
||||
name = n.getId() and
|
||||
name = getBuiltInName() and
|
||||
m = n.getEnclosingModule()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
|
||||
* built-in) inside the module `m`.
|
||||
*/
|
||||
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
|
||||
n.isGlobal() and
|
||||
n.isLoad() and
|
||||
name = n.getId() and
|
||||
name = getBuiltInName() and
|
||||
m = n.getEnclosingModule()
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -396,6 +474,14 @@ module API {
|
||||
exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
|
||||
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
|
||||
)
|
||||
or
|
||||
// awaiting
|
||||
exists(Await await, DataFlow::Node awaitedValue |
|
||||
lbl = Label::await() and
|
||||
ref.asExpr() = await and
|
||||
await.getValue() = awaitedValue.asExpr() and
|
||||
pred.flowsTo(awaitedValue)
|
||||
)
|
||||
)
|
||||
or
|
||||
// Built-ins, treated as members of the module `builtins`
|
||||
@@ -422,9 +508,9 @@ module API {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data-flow node to which `nd`, which is a use of an API-graph node, flows.
|
||||
* Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
|
||||
*
|
||||
* The flow from `nd` to that node may be inter-procedural.
|
||||
* The flow from `src` to that node may be inter-procedural.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode trackUseNode(
|
||||
DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
|
||||
@@ -433,30 +519,26 @@ module API {
|
||||
use(_, src) and
|
||||
result = src
|
||||
or
|
||||
// Due to bad performance when using `trackUseNode(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::StepSummary summary |
|
||||
t = trackUseNode_first_join(src, result, summary).append(summary)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private DataFlow::TypeTracker trackUseNode_first_join(
|
||||
DataFlow::LocalSourceNode src, DataFlow::LocalSourceNode res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(trackUseNode(src, result), res, summary)
|
||||
exists(DataFlow::TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
|
||||
*
|
||||
* The flow from `src` to that node may be inter-procedural.
|
||||
*/
|
||||
cached
|
||||
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
|
||||
result = trackUseNode(src, DataFlow::TypeTracker::end())
|
||||
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
|
||||
// We exclude module variable nodes, as these do not correspond to real uses.
|
||||
not result instanceof DataFlow::ModuleVariableNode
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`.
|
||||
*/
|
||||
cached
|
||||
predicate edge(Node pred, string lbl, Node succ) {
|
||||
predicate edge(TApiNode pred, string lbl, TApiNode succ) {
|
||||
/* There's an edge from the root node for each imported module. */
|
||||
exists(string m |
|
||||
pred = MkRoot() and
|
||||
@@ -516,5 +598,9 @@ private module Label {
|
||||
/** Gets the `return` edge label. */
|
||||
string return() { result = "getReturn()" }
|
||||
|
||||
/** Gets the `subclass` edge label. */
|
||||
string subclass() { result = "getASubclass()" }
|
||||
|
||||
/** Gets the `await` edge label. */
|
||||
string await() { result = "getAwaited()" }
|
||||
}
|
||||
|
||||
@@ -527,7 +527,14 @@ module HTTP {
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides models for cryptographic things. */
|
||||
/**
|
||||
* Provides models for cryptographic things.
|
||||
*
|
||||
* Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into
|
||||
* consideration for the `isWeak` member predicate. So RSA is always considered
|
||||
* secure, although using a low number of bits will actually make it insecure. We plan
|
||||
* to improve our libraries in the future to more precisely capture this aspect.
|
||||
*/
|
||||
module Cryptography {
|
||||
/** Provides models for public-key cryptography, also called asymmetric cryptography. */
|
||||
module PublicKey {
|
||||
@@ -570,21 +577,7 @@ module Cryptography {
|
||||
arg = any(KeyGeneration::Range r).getKeySizeArg() and
|
||||
result = arg.getALocalSource()
|
||||
or
|
||||
// Due to bad performance when using normal setup with we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeBackTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
keysizeBacktracker_first_join(t2, arg, result, summary) and
|
||||
t = t2.prepend(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate keysizeBacktracker_first_join(
|
||||
DataFlow::TypeBackTracker t2, DataFlow::Node arg, DataFlow::Node res,
|
||||
DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(res, keysizeBacktracker(t2, arg), summary)
|
||||
exists(DataFlow::TypeBackTracker t2 | result = keysizeBacktracker(t2, arg).backtrack(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
|
||||
@@ -640,4 +633,43 @@ module Cryptography {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
import semmle.python.concepts.CryptoAlgorithms
|
||||
|
||||
/**
|
||||
* A data-flow node that is an application of a cryptographic algorithm. For example,
|
||||
* encryption, decryption, signature-validation.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `CryptographicOperation::Range` instead.
|
||||
*/
|
||||
class CryptographicOperation extends DataFlow::Node {
|
||||
CryptographicOperation::Range range;
|
||||
|
||||
CryptographicOperation() { this = range }
|
||||
|
||||
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
|
||||
CryptographicAlgorithm getAlgorithm() { result = range.getAlgorithm() }
|
||||
|
||||
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling new applications of a cryptographic algorithms. */
|
||||
module CryptographicOperation {
|
||||
/**
|
||||
* A data-flow node that is an application of a cryptographic algorithm. For example,
|
||||
* encryption, decryption, signature-validation.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `CryptographicOperation` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
|
||||
abstract CryptographicAlgorithm getAlgorithm();
|
||||
|
||||
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,6 +72,33 @@ class File extends Container {
|
||||
* are specified to be extracted.
|
||||
*/
|
||||
string getContents() { file_contents(this, result) }
|
||||
|
||||
/** Holds if this file is likely to get executed directly, and thus act as an entry point for execution. */
|
||||
predicate isPossibleEntryPoint() {
|
||||
// Only consider files in the source code, and not things like the standard library
|
||||
exists(this.getRelativePath()) and
|
||||
(
|
||||
// The file doesn't have the extension `.py` but still contains Python statements
|
||||
not this.getExtension().matches("py%") and
|
||||
exists(Stmt s | s.getLocation().getFile() = this)
|
||||
or
|
||||
// The file contains the usual `if __name__ == '__main__':` construction
|
||||
exists(If i, Name name, StrConst main, Cmpop op |
|
||||
i.getScope().(Module).getFile() = this and
|
||||
op instanceof Eq and
|
||||
i.getTest().(Compare).compares(name, op, main) and
|
||||
name.getId() = "__name__" and
|
||||
main.getText() = "__main__"
|
||||
)
|
||||
or
|
||||
// The file contains a `#!` line referencing the python interpreter
|
||||
exists(Comment c |
|
||||
c.getLocation().getFile() = this and
|
||||
c.getLocation().getStartLine() = 1 and
|
||||
c.getText().regexpMatch("^#! */.*python(2|3)?[ \\\\t]*$")
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private predicate occupied_line(File f, int n) {
|
||||
|
||||
@@ -2,17 +2,27 @@
|
||||
* Helper file that imports all framework modeling.
|
||||
*/
|
||||
|
||||
// If you add modeling of a new framework/library, remember to add it it to the docs in
|
||||
// `docs/codeql/support/reusables/frameworks.rst`
|
||||
private import semmle.python.frameworks.Aioch
|
||||
private import semmle.python.frameworks.Aiohttp
|
||||
private import semmle.python.frameworks.ClickhouseDriver
|
||||
private import semmle.python.frameworks.Cryptodome
|
||||
private import semmle.python.frameworks.Cryptography
|
||||
private import semmle.python.frameworks.Dill
|
||||
private import semmle.python.frameworks.Django
|
||||
private import semmle.python.frameworks.Fabric
|
||||
private import semmle.python.frameworks.Flask
|
||||
private import semmle.python.frameworks.Idna
|
||||
private import semmle.python.frameworks.Invoke
|
||||
private import semmle.python.frameworks.MysqlConnectorPython
|
||||
private import semmle.python.frameworks.Multidict
|
||||
private import semmle.python.frameworks.Mysql
|
||||
private import semmle.python.frameworks.MySQLdb
|
||||
private import semmle.python.frameworks.Psycopg2
|
||||
private import semmle.python.frameworks.PyMySQL
|
||||
private import semmle.python.frameworks.Simplejson
|
||||
private import semmle.python.frameworks.Stdlib
|
||||
private import semmle.python.frameworks.Tornado
|
||||
private import semmle.python.frameworks.Ujson
|
||||
private import semmle.python.frameworks.Yaml
|
||||
private import semmle.python.frameworks.Yarl
|
||||
|
||||
@@ -205,11 +205,38 @@ private string moduleNameFromBase(Container file) {
|
||||
file instanceof File and result = file.getStem()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `file` may be transitively imported from a file that may serve as the entry point of
|
||||
* the execution.
|
||||
*/
|
||||
private predicate transitively_imported_from_entry_point(File file) {
|
||||
file.getExtension().matches("%py%") and
|
||||
exists(File importer |
|
||||
// Only consider files that are in the source archive
|
||||
exists(importer.getRelativePath()) and
|
||||
importer.getParent() = file.getParent() and
|
||||
exists(ImportExpr i |
|
||||
i.getLocation().getFile() = importer and
|
||||
i.getName() = file.getStem() and
|
||||
// Disregard relative imports
|
||||
i.getLevel() = 0
|
||||
)
|
||||
|
|
||||
importer.isPossibleEntryPoint() or transitively_imported_from_entry_point(importer)
|
||||
)
|
||||
}
|
||||
|
||||
string moduleNameFromFile(Container file) {
|
||||
exists(string basename |
|
||||
basename = moduleNameFromBase(file) and
|
||||
legalShortName(basename) and
|
||||
legalShortName(basename)
|
||||
|
|
||||
result = moduleNameFromFile(file.getParent()) + "." + basename
|
||||
or
|
||||
// If `file` is a transitive import of a file that's executed directly, we allow references
|
||||
// to it by its `basename`.
|
||||
transitively_imported_from_entry_point(file) and
|
||||
result = basename
|
||||
)
|
||||
or
|
||||
isPotentialSourcePackage(file) and
|
||||
|
||||
174
python/ql/src/semmle/python/concepts/CryptoAlgorithms.qll
Normal file
174
python/ql/src/semmle/python/concepts/CryptoAlgorithms.qll
Normal file
@@ -0,0 +1,174 @@
|
||||
/**
|
||||
* Provides classes modeling cryptographic algorithms, separated into strong and weak variants.
|
||||
*
|
||||
* The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
|
||||
*/
|
||||
|
||||
/**
|
||||
* Names of cryptographic algorithms, separated into strong and weak variants.
|
||||
*
|
||||
* The names are normalized: upper-case, no spaces, dashes or underscores.
|
||||
*
|
||||
* The names are inspired by the names used in real world crypto libraries.
|
||||
*
|
||||
* The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
|
||||
*/
|
||||
private module AlgorithmNames {
|
||||
predicate isStrongHashingAlgorithm(string name) {
|
||||
name = "DSA" or
|
||||
name = "ED25519" or
|
||||
name = "ES256" or
|
||||
name = "ECDSA256" or
|
||||
name = "ES384" or
|
||||
name = "ECDSA384" or
|
||||
name = "ES512" or
|
||||
name = "ECDSA512" or
|
||||
name = "SHA2" or
|
||||
name = "SHA224" or
|
||||
name = "SHA256" or
|
||||
name = "SHA384" or
|
||||
name = "SHA512" or
|
||||
name = "SHA3"
|
||||
}
|
||||
|
||||
predicate isWeakHashingAlgorithm(string name) {
|
||||
name = "HAVEL128" or
|
||||
name = "MD2" or
|
||||
name = "MD4" or
|
||||
name = "MD5" or
|
||||
name = "PANAMA" or
|
||||
name = "RIPEMD" or
|
||||
name = "RIPEMD128" or
|
||||
name = "RIPEMD256" or
|
||||
name = "RIPEMD160" or
|
||||
name = "RIPEMD320" or
|
||||
name = "SHA0" or
|
||||
name = "SHA1"
|
||||
}
|
||||
|
||||
predicate isStrongEncryptionAlgorithm(string name) {
|
||||
name = "AES" or
|
||||
name = "AES128" or
|
||||
name = "AES192" or
|
||||
name = "AES256" or
|
||||
name = "AES512" or
|
||||
name = "RSA" or
|
||||
name = "RABBIT" or
|
||||
name = "BLOWFISH"
|
||||
}
|
||||
|
||||
predicate isWeakEncryptionAlgorithm(string name) {
|
||||
name = "DES" or
|
||||
name = "3DES" or
|
||||
name = "TRIPLEDES" or
|
||||
name = "TDEA" or
|
||||
name = "TRIPLEDEA" or
|
||||
name = "ARC2" or
|
||||
name = "RC2" or
|
||||
name = "ARC4" or
|
||||
name = "RC4" or
|
||||
name = "ARCFOUR" or
|
||||
name = "ARC5" or
|
||||
name = "RC5"
|
||||
}
|
||||
|
||||
predicate isStrongPasswordHashingAlgorithm(string name) {
|
||||
name = "ARGON2" or
|
||||
name = "PBKDF2" or
|
||||
name = "BCRYPT" or
|
||||
name = "SCRYPT"
|
||||
}
|
||||
|
||||
predicate isWeakPasswordHashingAlgorithm(string name) { none() }
|
||||
}
|
||||
|
||||
private import AlgorithmNames
|
||||
|
||||
/**
|
||||
* A cryptographic algorithm.
|
||||
*/
|
||||
private newtype TCryptographicAlgorithm =
|
||||
MkHashingAlgorithm(string name, boolean isWeak) {
|
||||
isStrongHashingAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakHashingAlgorithm(name) and isWeak = true
|
||||
} or
|
||||
MkEncryptionAlgorithm(string name, boolean isWeak) {
|
||||
isStrongEncryptionAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakEncryptionAlgorithm(name) and isWeak = true
|
||||
} or
|
||||
MkPasswordHashingAlgorithm(string name, boolean isWeak) {
|
||||
isStrongPasswordHashingAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakPasswordHashingAlgorithm(name) and isWeak = true
|
||||
}
|
||||
|
||||
/**
|
||||
* A cryptographic algorithm.
|
||||
*/
|
||||
abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = getName() }
|
||||
|
||||
/**
|
||||
* Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores).
|
||||
*/
|
||||
abstract string getName();
|
||||
|
||||
/**
|
||||
* Holds if the name of this algorithm matches `name` modulo case,
|
||||
* white space, dashes, and underscores.
|
||||
*/
|
||||
bindingset[name]
|
||||
predicate matchesName(string name) {
|
||||
name.toUpperCase().regexpReplaceAll("[-_ ]", "") = getName()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this algorithm is weak.
|
||||
*/
|
||||
abstract predicate isWeak();
|
||||
}
|
||||
|
||||
/**
|
||||
* A hashing algorithm such as `MD5` or `SHA512`.
|
||||
*/
|
||||
class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
|
||||
/**
|
||||
* An encryption algorithm such as `DES` or `AES512`.
|
||||
*/
|
||||
class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
|
||||
/**
|
||||
* A password hashing algorithm such as `PBKDF2` or `SCRYPT`.
|
||||
*/
|
||||
class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
@@ -0,0 +1,265 @@
|
||||
/**
|
||||
* Provides an extension point for for modeling sensitive data, such as secrets, certificates, or passwords.
|
||||
* Sensitive data can be interesting to use as data-flow sources in security queries.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
// Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range`
|
||||
private import semmle.python.Frameworks
|
||||
private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
|
||||
|
||||
// We export these explicitly, so we don't also export the `HeuristicNames` module.
|
||||
class SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
|
||||
|
||||
module SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
|
||||
|
||||
/**
|
||||
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `SensitiveDataSource::Range` instead.
|
||||
*/
|
||||
class SensitiveDataSource extends DataFlow::Node {
|
||||
SensitiveDataSource::Range range;
|
||||
|
||||
SensitiveDataSource() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the classification of the sensitive data.
|
||||
*/
|
||||
SensitiveDataClassification getClassification() { result = range.getClassification() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new sources of sensitive data, such as secrets, certificates, or passwords. */
|
||||
module SensitiveDataSource {
|
||||
/**
|
||||
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `SensitiveDataSource` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the classification of the sensitive data.
|
||||
*/
|
||||
abstract SensitiveDataClassification getClassification();
|
||||
}
|
||||
}
|
||||
|
||||
/** Actual sensitive data modeling */
|
||||
private module SensitiveDataModeling {
|
||||
private import SensitiveDataHeuristics::HeuristicNames
|
||||
|
||||
/**
|
||||
* Gets a reference to a function that is considered to be a sensitive source of
|
||||
* `classification`.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode sensitiveFunction(
|
||||
DataFlow::TypeTracker t, SensitiveDataClassification classification
|
||||
) {
|
||||
t.start() and
|
||||
exists(Function f |
|
||||
nameIndicatesSensitiveData(f.getName(), classification) and
|
||||
result.asExpr() = f.getDefinition()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = sensitiveFunction(t2, classification).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a function that is considered to be a sensitive source of
|
||||
* `classification`.
|
||||
*/
|
||||
DataFlow::Node sensitiveFunction(SensitiveDataClassification classification) {
|
||||
sensitiveFunction(DataFlow::TypeTracker::end(), classification).flowsTo(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a string constant that, if used as the key in a lookup,
|
||||
* indicates the presence of sensitive data with `classification`.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode sensitiveLookupStringConst(
|
||||
DataFlow::TypeTracker t, SensitiveDataClassification classification
|
||||
) {
|
||||
t.start() and
|
||||
nameIndicatesSensitiveData(result.asExpr().(StrConst).getText(), classification)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
result = sensitiveLookupStringConst(t2, classification).track(t2, t)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a string constant that, if used as the key in a lookup,
|
||||
* indicates the presence of sensitive data with `classification`.
|
||||
*
|
||||
* Also see `extraStepForCalls`.
|
||||
*/
|
||||
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
|
||||
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
|
||||
}
|
||||
|
||||
/** A function call that is considered a source of sensitive data. */
|
||||
class SensitiveFunctionCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveFunctionCall() {
|
||||
this.getFunction() = sensitiveFunction(classification)
|
||||
or
|
||||
// to cover functions that we don't have the definition for, and where the
|
||||
// reference to the function has not already been marked as being sensitive
|
||||
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks any modeled source of sensitive data (with any classification),
|
||||
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof SensitiveDataSource
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = possibleSensitiveCallable(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks any modeled source of sensitive data (with any classification),
|
||||
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
|
||||
*/
|
||||
private DataFlow::Node possibleSensitiveCallable() {
|
||||
possibleSensitiveCallable(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a
|
||||
* taint-flow step for sensitive-data, to ensure calls are handled correctly.
|
||||
*
|
||||
* To handle calls properly, while preserving a good source for path explanations,
|
||||
* you need to include this predicate as an additional taint step in your taint-tracking
|
||||
* configurations.
|
||||
*
|
||||
* The core problem can be illustrated by the example below. If we consider the
|
||||
* `print` a sink, what path and what source do we want to show? My initial approach
|
||||
* would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
|
||||
* lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
|
||||
* like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
|
||||
* with a non-optimal path, which starts at _bad source_, and therefore doesn't show
|
||||
* how we figured out that `non_sensitive_name`
|
||||
* could be a function that returns a password (and in cases where there is many calls to
|
||||
* `my_func` it will be annoying for someone to figure this out manually).
|
||||
*
|
||||
* By including this additional taint-step in the taint-tracking configuration, it's possible
|
||||
* to get a path explanation going from _good source_ to the sink.
|
||||
*
|
||||
* ```python
|
||||
* def my_func(non_sensitive_name):
|
||||
* x = non_sensitive_name() # <-- bad source
|
||||
* print(x) # <-- sink
|
||||
*
|
||||
* import not_found
|
||||
* f = not_found.get_passwd # <-- good source
|
||||
* my_func(f)
|
||||
* ```
|
||||
*/
|
||||
predicate extraStepForCalls(DataFlow::Node nodeFrom, DataFlow::CallCfgNode nodeTo) {
|
||||
// However, we do still use the type-tracking approach to limit the size of this
|
||||
// predicate.
|
||||
nodeTo.getFunction() = nodeFrom and
|
||||
nodeFrom = possibleSensitiveCallable()
|
||||
}
|
||||
|
||||
/**
|
||||
* Any kind of variable assignment (also including with/for) where the name indicates
|
||||
* it contains sensitive data.
|
||||
*
|
||||
* Note: We _could_ make any access to a variable with a sensitive name a source of
|
||||
* sensitive data, but to make path explanations in data-flow/taint-tracking good,
|
||||
* we don't want that, since it works against allowing users to understand the flow
|
||||
* in the program (which is the whole point).
|
||||
*
|
||||
* Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
|
||||
* the variable is marked as the source (as compared to marking the variable as the
|
||||
* source).
|
||||
*/
|
||||
class SensitiveVariableAssignment extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveVariableAssignment() {
|
||||
exists(DefinitionNode def |
|
||||
nameIndicatesSensitiveData(def.(NameNode).getId(), classification) and
|
||||
(
|
||||
this.asCfgNode() = def.getValue()
|
||||
or
|
||||
this.asCfgNode() = def.getValue().(ForNode).getSequence()
|
||||
) and
|
||||
not this.asExpr() instanceof FunctionExpr and
|
||||
not this.asExpr() instanceof ClassExpr
|
||||
)
|
||||
or
|
||||
exists(With with |
|
||||
nameIndicatesSensitiveData(with.getOptionalVars().(Name).getId(), classification) and
|
||||
this.asExpr() = with.getContextExpr()
|
||||
)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** An attribute access that is considered a source of sensitive data. */
|
||||
class SensitiveAttributeAccess extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveAttributeAccess() {
|
||||
// Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
|
||||
// I considered excluding any `from ... import something_sensitive`, but then realized that
|
||||
// we should flag up `form ... import password as ...` as a password
|
||||
nameIndicatesSensitiveData(this.(DataFlow::AttrRead).getAttributeName(), classification)
|
||||
or
|
||||
// Things like `getattr(foo, <reference-to-string>)`
|
||||
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** A subscript, where the key indicates the result will be sensitive data. */
|
||||
class SensitiveSubscript extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveSubscript() {
|
||||
this.asCfgNode().(SubscriptNode).getIndex() =
|
||||
sensitiveLookupStringConst(classification).asCfgNode()
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** A call to `get` on an object, where the key indicates the result will be sensitive data. */
|
||||
class SensitiveGetCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveGetCall() {
|
||||
this.getFunction().asCfgNode().(AttrNode).getName() = "get" and
|
||||
this.getArg(0) = sensitiveLookupStringConst(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** A parameter where the name indicates it will receive sensitive data. */
|
||||
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveParameter() {
|
||||
nameIndicatesSensitiveData(this.getParameter().getName(), classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
}
|
||||
|
||||
predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;
|
||||
@@ -1,173 +1,16 @@
|
||||
/** Step Summaries and Type Tracking */
|
||||
/**
|
||||
* This file acts as a wrapper for `internal.TypeTracker`, exposing some of the functionality with
|
||||
* names that are more appropriate for Python.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import internal.DataFlowPublic
|
||||
private import internal.DataFlowPrivate
|
||||
private import internal.TypeTracker as Internal
|
||||
|
||||
/** Any string that may appear as the name of an attribute or access path. */
|
||||
class AttributeName extends string {
|
||||
AttributeName() { this = any(AttrRef a).getAttributeName() }
|
||||
}
|
||||
class AttributeName = Internal::ContentName;
|
||||
|
||||
/** Either an attribute name, or the empty string (representing no attribute). */
|
||||
class OptionalAttributeName extends string {
|
||||
OptionalAttributeName() { this instanceof AttributeName or this = "" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
private newtype TStepSummary =
|
||||
LevelStep() or
|
||||
CallStep() or
|
||||
ReturnStep() or
|
||||
StoreStep(AttributeName attr) or
|
||||
LoadStep(AttributeName attr)
|
||||
|
||||
/**
|
||||
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
|
||||
*
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
class StepSummary extends TStepSummary {
|
||||
/** Gets a textual representation of this step summary. */
|
||||
string toString() {
|
||||
this instanceof LevelStep and result = "level"
|
||||
or
|
||||
this instanceof CallStep and result = "call"
|
||||
or
|
||||
this instanceof ReturnStep and result = "return"
|
||||
or
|
||||
exists(string attr | this = StoreStep(attr) | result = "store " + attr)
|
||||
or
|
||||
exists(string attr | this = LoadStep(attr) | result = "load " + attr)
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides predicates for updating step summaries (`StepSummary`s). */
|
||||
module StepSummary {
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
cached
|
||||
predicate step(LocalSourceNode nodeFrom, Node nodeTo, StepSummary summary) {
|
||||
exists(Node mid | typePreservingStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Unlike `StepSummary::step`, this predicate does not compress
|
||||
* type-preserving steps.
|
||||
*/
|
||||
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
|
||||
typePreservingStep(nodeFrom, nodeTo) and
|
||||
summary = LevelStep()
|
||||
or
|
||||
callStep(nodeFrom, nodeTo) and summary = CallStep()
|
||||
or
|
||||
returnStep(nodeFrom, nodeTo) and
|
||||
summary = ReturnStep()
|
||||
or
|
||||
exists(string attr |
|
||||
basicStoreStep(nodeFrom, nodeTo, attr) and
|
||||
summary = StoreStep(attr)
|
||||
or
|
||||
basicLoadStep(nodeFrom, nodeTo, attr) and summary = LoadStep(attr)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
|
||||
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) or
|
||||
jumpStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
|
||||
*
|
||||
* Helper predicate to avoid bad join order experienced in `callStep`.
|
||||
* This happened when `isParameterOf` was joined _before_ `getCallable`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private DataFlowCallable getCallableForArgument(ArgumentNode nodeFrom, int i) {
|
||||
exists(DataFlowCall call |
|
||||
nodeFrom.argumentOf(call, i) and
|
||||
result = call.getCallable()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
|
||||
predicate callStep(ArgumentNode nodeFrom, ParameterNode nodeTo) {
|
||||
// TODO: Support special methods?
|
||||
exists(DataFlowCallable callable, int i |
|
||||
callable = getCallableForArgument(nodeFrom, i) and
|
||||
nodeTo.isParameterOf(callable, i)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
|
||||
predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
|
||||
exists(DataFlowCall call |
|
||||
nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `attr` attribute of the object in `nodeTo`.
|
||||
*
|
||||
* Note that the choice of `nodeTo` does not have to make sense "chronologically".
|
||||
* All we care about is whether the `attr` attribute of `nodeTo` can have a specific type,
|
||||
* and the assumption is that if a specific type appears here, then any access of that
|
||||
* particular attribute can yield something of that particular type.
|
||||
*
|
||||
* Thus, in an example such as
|
||||
*
|
||||
* ```python
|
||||
* def foo(y):
|
||||
* x = Foo()
|
||||
* bar(x)
|
||||
* x.attr = y
|
||||
* baz(x)
|
||||
*
|
||||
* def bar(x):
|
||||
* z = x.attr
|
||||
* ```
|
||||
* for the attribute write `x.attr = y`, we will have `attr` being the literal string `"attr"`,
|
||||
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
|
||||
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
|
||||
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
|
||||
*/
|
||||
predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string attr) {
|
||||
exists(AttrWrite a |
|
||||
a.mayHaveAttributeName(attr) and
|
||||
nodeFrom = a.getValue() and
|
||||
nodeTo.flowsTo(a.getObject())
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
|
||||
*/
|
||||
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
|
||||
exists(AttrRead a |
|
||||
a.mayHaveAttributeName(attr) and
|
||||
nodeFrom = a.getObject() and
|
||||
nodeTo = a
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility class that is equivalent to `boolean` but does not require type joining.
|
||||
*/
|
||||
private class Boolean extends boolean {
|
||||
Boolean() { this = true or this = false }
|
||||
}
|
||||
|
||||
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeName attr)
|
||||
class OptionalAttributeName = Internal::OptionalContentName;
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to track a value to a given dataflow node.
|
||||
@@ -179,8 +22,8 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeN
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```
|
||||
* private DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
|
||||
* ```ql
|
||||
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
@@ -189,279 +32,34 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeN
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
* DataFlow::LocalSourceNode myType() { myType(DataFlow::TypeTracker::end()) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
|
||||
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
|
||||
*/
|
||||
class TypeTracker extends TTypeTracker {
|
||||
Boolean hasCall;
|
||||
OptionalAttributeName attr;
|
||||
|
||||
TypeTracker() { this = MkTypeTracker(hasCall, attr) }
|
||||
|
||||
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
|
||||
cached
|
||||
TypeTracker append(StepSummary step) {
|
||||
step = LevelStep() and result = this
|
||||
or
|
||||
step = CallStep() and result = MkTypeTracker(true, attr)
|
||||
or
|
||||
step = ReturnStep() and hasCall = false and result = this
|
||||
or
|
||||
step = LoadStep(attr) and result = MkTypeTracker(hasCall, "")
|
||||
or
|
||||
exists(string p | step = StoreStep(p) and attr = "" and result = MkTypeTracker(hasCall, p))
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this summary. */
|
||||
string toString() {
|
||||
exists(string withCall, string withAttr |
|
||||
(if hasCall = true then withCall = "with" else withCall = "without") and
|
||||
(if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
|
||||
result = "type tracker " + withCall + " call steps" + withAttr
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking.
|
||||
*/
|
||||
predicate start() { hasCall = false and attr = "" }
|
||||
|
||||
class TypeTracker extends Internal::TypeTracker {
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
|
||||
* The type tracking only ends after the attribute has been loaded.
|
||||
*/
|
||||
predicate startInAttr(AttributeName attrName) { hasCall = false and attr = attrName }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking
|
||||
* when tracking a parameter into a call, but not out of it.
|
||||
*/
|
||||
predicate call() { hasCall = true and attr = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the end point of type tracking.
|
||||
*/
|
||||
predicate end() { attr = "" }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Holds if this type has been tracked into a call.
|
||||
*/
|
||||
boolean hasCall() { result = hasCall }
|
||||
predicate startInAttr(string attrName) { this.startInContent(attrName) }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Gets the attribute associated with this type tracker.
|
||||
*/
|
||||
string getAttr() { result = attr }
|
||||
|
||||
/**
|
||||
* Gets a type tracker that starts where this one has left off to allow continued
|
||||
* tracking.
|
||||
*
|
||||
* This predicate is only defined if the type has not been tracked into an attribute.
|
||||
*/
|
||||
TypeTracker continue() { attr = "" and result = this }
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker step(LocalSourceNode nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::step(nodeFrom, nodeTo, summary) and
|
||||
result = this.append(summary)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Unlike `TypeTracker::step`, this predicate exposes all edges
|
||||
* in the flow graph, and not just the edges between `Node`s.
|
||||
* It may therefore be less performant.
|
||||
*
|
||||
* Type tracking predicates using small steps typically take the following form:
|
||||
* ```ql
|
||||
* DataFlow::Node myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* t = t2.smallstep(myType(t2), result)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() {
|
||||
* result = myType(DataFlow::TypeTracker::end())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
|
||||
result = this.append(summary)
|
||||
)
|
||||
or
|
||||
typePreservingStep(nodeFrom, nodeTo) and
|
||||
result = this
|
||||
}
|
||||
string getAttr() { result = this.getContent() }
|
||||
}
|
||||
|
||||
/** Provides predicates for implementing custom `TypeTracker`s. */
|
||||
module TypeTracker {
|
||||
/**
|
||||
* Gets a valid end point of type tracking.
|
||||
*/
|
||||
TypeTracker end() { result.end() }
|
||||
}
|
||||
module TypeTracker = Internal::TypeTracker;
|
||||
|
||||
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalAttributeName attr)
|
||||
class StepSummary = Internal::StepSummary;
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to back-track a use of a value to a given dataflow node.
|
||||
*
|
||||
* This can for example be used to track callbacks that are passed to a certain API,
|
||||
* so we can model specific parameters of that callback as having a certain type.
|
||||
*
|
||||
* Note that type back-tracking does not provide a source/sink relation, that is,
|
||||
* it may determine that a node will be used in an API call somewhere, but it won't
|
||||
* determine exactly where that use was, or the path that led to the use.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for back-tracking some callback type `myCallback`:
|
||||
*
|
||||
* ```
|
||||
* private DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
|
||||
* t.start() and
|
||||
* result = (< some API call >).getArgument(< n >).getALocalSource()
|
||||
* or
|
||||
* exists (DataFlow::TypeBackTracker t2 |
|
||||
* result = myCallback(t2).backtrack(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
|
||||
* `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
|
||||
*/
|
||||
class TypeBackTracker extends TTypeBackTracker {
|
||||
Boolean hasReturn;
|
||||
string attr;
|
||||
module StepSummary = Internal::StepSummary;
|
||||
|
||||
TypeBackTracker() { this = MkTypeBackTracker(hasReturn, attr) }
|
||||
class TypeBackTracker = Internal::TypeBackTracker;
|
||||
|
||||
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
|
||||
TypeBackTracker prepend(StepSummary step) {
|
||||
step = LevelStep() and result = this
|
||||
or
|
||||
step = CallStep() and hasReturn = false and result = this
|
||||
or
|
||||
step = ReturnStep() and result = MkTypeBackTracker(true, attr)
|
||||
or
|
||||
exists(string p | step = LoadStep(p) and attr = "" and result = MkTypeBackTracker(hasReturn, p))
|
||||
or
|
||||
step = StoreStep(attr) and result = MkTypeBackTracker(hasReturn, "")
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this summary. */
|
||||
string toString() {
|
||||
exists(string withReturn, string withAttr |
|
||||
(if hasReturn = true then withReturn = "with" else withReturn = "without") and
|
||||
(if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
|
||||
result = "type back-tracker " + withReturn + " return steps" + withAttr
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking.
|
||||
*/
|
||||
predicate start() { hasReturn = false and attr = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the end point of type tracking.
|
||||
*/
|
||||
predicate end() { attr = "" }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Holds if this type has been back-tracked into a call through return edge.
|
||||
*/
|
||||
boolean hasReturn() { result = hasReturn }
|
||||
|
||||
/**
|
||||
* Gets a type tracker that starts where this one has left off to allow continued
|
||||
* tracking.
|
||||
*
|
||||
* This predicate is only defined if the type has not been tracked into an attribute.
|
||||
*/
|
||||
TypeBackTracker continue() { attr = "" and result = this }
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a backwards
|
||||
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::step(nodeFrom, nodeTo, summary) and
|
||||
this = result.prepend(summary)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a backwards
|
||||
* local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
|
||||
*
|
||||
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
|
||||
* in the flowgraph, and not just the edges between
|
||||
* `LocalSourceNode`s. It may therefore be less performant.
|
||||
*
|
||||
* Type tracking predicates using small steps typically take the following form:
|
||||
* ```ql
|
||||
* DataFlow::Node myType(DataFlow::TypeBackTracker t) {
|
||||
* t.start() and
|
||||
* result = < some API call >.getArgument(< n >)
|
||||
* or
|
||||
* exists (DataFlow::TypeBackTracker t2 |
|
||||
* t = t2.smallstep(result, myType(t2))
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() {
|
||||
* result = myType(DataFlow::TypeBackTracker::end())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
|
||||
this = result.prepend(summary)
|
||||
)
|
||||
or
|
||||
typePreservingStep(nodeFrom, nodeTo) and
|
||||
this = result
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides predicates for implementing custom `TypeBackTracker`s. */
|
||||
module TypeBackTracker {
|
||||
/**
|
||||
* Gets a valid end point of type back-tracking.
|
||||
*/
|
||||
TypeBackTracker end() { result.end() }
|
||||
}
|
||||
module TypeBackTracker = Internal::TypeBackTracker;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -31,26 +31,26 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
|
||||
* currently excludes read-steps, store-steps, and flow-through.
|
||||
*
|
||||
* The analysis uses non-linear recursion: When computing a flow path in or out
|
||||
* of a call, we use the results of the analysis recursively to resolve lamba
|
||||
* of a call, we use the results of the analysis recursively to resolve lambda
|
||||
* calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
|
||||
*/
|
||||
private module LambdaFlow {
|
||||
private predicate viableParamNonLambda(DataFlowCall call, int i, ParameterNode p) {
|
||||
private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
|
||||
p.isParameterOf(viableCallable(call), i)
|
||||
}
|
||||
|
||||
private predicate viableParamLambda(DataFlowCall call, int i, ParameterNode p) {
|
||||
private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
|
||||
p.isParameterOf(viableCallableLambda(call, _), i)
|
||||
}
|
||||
|
||||
private predicate viableParamArgNonLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
|
||||
private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
|
||||
exists(int i |
|
||||
viableParamNonLambda(call, i, p) and
|
||||
arg.argumentOf(call, i)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate viableParamArgLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
|
||||
private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
|
||||
exists(int i |
|
||||
viableParamLambda(call, i, p) and
|
||||
arg.argumentOf(call, i)
|
||||
@@ -118,8 +118,8 @@ private module LambdaFlow {
|
||||
boolean toJump, DataFlowCallOption lastCall
|
||||
) {
|
||||
revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and
|
||||
if node instanceof CastNode or node instanceof ArgumentNode or node instanceof ReturnNode
|
||||
then compatibleTypes(t, getNodeType(node))
|
||||
if castNode(node) or node instanceof ArgNode or node instanceof ReturnNode
|
||||
then compatibleTypes(t, getNodeDataFlowType(node))
|
||||
else any()
|
||||
}
|
||||
|
||||
@@ -129,7 +129,7 @@ private module LambdaFlow {
|
||||
boolean toJump, DataFlowCallOption lastCall
|
||||
) {
|
||||
lambdaCall(lambdaCall, kind, node) and
|
||||
t = getNodeType(node) and
|
||||
t = getNodeDataFlowType(node) and
|
||||
toReturn = false and
|
||||
toJump = false and
|
||||
lastCall = TDataFlowCallNone()
|
||||
@@ -146,7 +146,7 @@ private module LambdaFlow {
|
||||
getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid)
|
||||
|
|
||||
preservesValue = false and
|
||||
t = getNodeType(node)
|
||||
t = getNodeDataFlowType(node)
|
||||
or
|
||||
preservesValue = true and
|
||||
t = t0
|
||||
@@ -160,7 +160,7 @@ private module LambdaFlow {
|
||||
toJump = true and
|
||||
lastCall = TDataFlowCallNone()
|
||||
|
|
||||
jumpStep(node, mid) and
|
||||
jumpStepCached(node, mid) and
|
||||
t = t0
|
||||
or
|
||||
exists(boolean preservesValue |
|
||||
@@ -168,7 +168,7 @@ private module LambdaFlow {
|
||||
getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid)
|
||||
|
|
||||
preservesValue = false and
|
||||
t = getNodeType(node)
|
||||
t = getNodeDataFlowType(node)
|
||||
or
|
||||
preservesValue = true and
|
||||
t = t0
|
||||
@@ -176,7 +176,7 @@ private module LambdaFlow {
|
||||
)
|
||||
or
|
||||
// flow into a callable
|
||||
exists(ParameterNode p, DataFlowCallOption lastCall0, DataFlowCall call |
|
||||
exists(ParamNode p, DataFlowCallOption lastCall0, DataFlowCall call |
|
||||
revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and
|
||||
(
|
||||
if lastCall0 = TDataFlowCallNone() and toJump = false
|
||||
@@ -227,7 +227,7 @@ private module LambdaFlow {
|
||||
|
||||
pragma[nomagic]
|
||||
predicate revLambdaFlowIn(
|
||||
DataFlowCall lambdaCall, LambdaCallKind kind, ParameterNode p, DataFlowType t, boolean toJump,
|
||||
DataFlowCall lambdaCall, LambdaCallKind kind, ParamNode p, DataFlowType t, boolean toJump,
|
||||
DataFlowCallOption lastCall
|
||||
) {
|
||||
revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall)
|
||||
@@ -242,6 +242,89 @@ private DataFlowCallable viableCallableExt(DataFlowCall call) {
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
/**
|
||||
* If needed, call this predicate from `DataFlowImplSpecific.qll` in order to
|
||||
* force a stage-dependency on the `DataFlowImplCommon.qll` stage and therby
|
||||
* collapsing the two stages.
|
||||
*/
|
||||
cached
|
||||
predicate forceCachingInSameStage() { any() }
|
||||
|
||||
cached
|
||||
predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() }
|
||||
|
||||
cached
|
||||
predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) {
|
||||
c = call.getEnclosingCallable()
|
||||
}
|
||||
|
||||
cached
|
||||
predicate nodeDataFlowType(Node n, DataFlowType t) { t = getNodeType(n) }
|
||||
|
||||
cached
|
||||
predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) }
|
||||
|
||||
cached
|
||||
predicate clearsContentCached(Node n, Content c) { clearsContent(n, c) }
|
||||
|
||||
cached
|
||||
predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) }
|
||||
|
||||
cached
|
||||
predicate outNodeExt(Node n) {
|
||||
n instanceof OutNode
|
||||
or
|
||||
n.(PostUpdateNode).getPreUpdateNode() instanceof ArgNode
|
||||
}
|
||||
|
||||
cached
|
||||
predicate hiddenNode(Node n) { nodeIsHidden(n) }
|
||||
|
||||
cached
|
||||
OutNodeExt getAnOutNodeExt(DataFlowCall call, ReturnKindExt k) {
|
||||
result = getAnOutNode(call, k.(ValueReturnKind).getKind())
|
||||
or
|
||||
exists(ArgNode arg |
|
||||
result.(PostUpdateNode).getPreUpdateNode() = arg and
|
||||
arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate returnNodeExt(Node n, ReturnKindExt k) {
|
||||
k = TValueReturn(n.(ReturnNode).getKind())
|
||||
or
|
||||
exists(ParamNode p, int pos |
|
||||
parameterValueFlowsToPreUpdate(p, n) and
|
||||
p.isParameterOf(_, pos) and
|
||||
k = TParamUpdate(pos)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate castNode(Node n) { n instanceof CastNode }
|
||||
|
||||
cached
|
||||
predicate castingNode(Node n) {
|
||||
castNode(n) or
|
||||
n instanceof ParamNode or
|
||||
n instanceof OutNodeExt or
|
||||
// For reads, `x.f`, we want to check that the tracked type after the read (which
|
||||
// is obtained by popping the head of the access path stack) is compatible with
|
||||
// the type of `x.f`.
|
||||
read(_, _, n)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate parameterNode(Node n, DataFlowCallable c, int i) {
|
||||
n.(ParameterNode).isParameterOf(c, i)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate argumentNode(Node n, DataFlowCall call, int pos) {
|
||||
n.(ArgumentNode).argumentOf(call, pos)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a viable target for the lambda call `call`.
|
||||
*
|
||||
@@ -261,7 +344,7 @@ private module Cached {
|
||||
* The instance parameter is considered to have index `-1`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private predicate viableParam(DataFlowCall call, int i, ParameterNode p) {
|
||||
private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
|
||||
p.isParameterOf(viableCallableExt(call), i)
|
||||
}
|
||||
|
||||
@@ -270,11 +353,11 @@ private module Cached {
|
||||
* dispatch into account.
|
||||
*/
|
||||
cached
|
||||
predicate viableParamArg(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
|
||||
predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
|
||||
exists(int i |
|
||||
viableParam(call, i, p) and
|
||||
arg.argumentOf(call, i) and
|
||||
compatibleTypes(getNodeType(arg), getNodeType(p))
|
||||
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -312,7 +395,7 @@ private module Cached {
|
||||
* `read` indicates whether it is contents of `p` that can flow to `node`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private predicate parameterValueFlowCand(ParameterNode p, Node node, boolean read) {
|
||||
private predicate parameterValueFlowCand(ParamNode p, Node node, boolean read) {
|
||||
p = node and
|
||||
read = false
|
||||
or
|
||||
@@ -325,30 +408,30 @@ private module Cached {
|
||||
// read
|
||||
exists(Node mid |
|
||||
parameterValueFlowCand(p, mid, false) and
|
||||
readStep(mid, _, node) and
|
||||
read(mid, _, node) and
|
||||
read = true
|
||||
)
|
||||
or
|
||||
// flow through: no prior read
|
||||
exists(ArgumentNode arg |
|
||||
exists(ArgNode arg |
|
||||
parameterValueFlowArgCand(p, arg, false) and
|
||||
argumentValueFlowsThroughCand(arg, node, read)
|
||||
)
|
||||
or
|
||||
// flow through: no read inside method
|
||||
exists(ArgumentNode arg |
|
||||
exists(ArgNode arg |
|
||||
parameterValueFlowArgCand(p, arg, read) and
|
||||
argumentValueFlowsThroughCand(arg, node, false)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate parameterValueFlowArgCand(ParameterNode p, ArgumentNode arg, boolean read) {
|
||||
private predicate parameterValueFlowArgCand(ParamNode p, ArgNode arg, boolean read) {
|
||||
parameterValueFlowCand(p, arg, read)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
predicate parameterValueFlowsToPreUpdateCand(ParameterNode p, PostUpdateNode n) {
|
||||
predicate parameterValueFlowsToPreUpdateCand(ParamNode p, PostUpdateNode n) {
|
||||
parameterValueFlowCand(p, n.getPreUpdateNode(), false)
|
||||
}
|
||||
|
||||
@@ -360,7 +443,7 @@ private module Cached {
|
||||
* `read` indicates whether it is contents of `p` that can flow to the return
|
||||
* node.
|
||||
*/
|
||||
predicate parameterValueFlowReturnCand(ParameterNode p, ReturnKind kind, boolean read) {
|
||||
predicate parameterValueFlowReturnCand(ParamNode p, ReturnKind kind, boolean read) {
|
||||
exists(ReturnNode ret |
|
||||
parameterValueFlowCand(p, ret, read) and
|
||||
kind = ret.getKind()
|
||||
@@ -369,9 +452,9 @@ private module Cached {
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate argumentValueFlowsThroughCand0(
|
||||
DataFlowCall call, ArgumentNode arg, ReturnKind kind, boolean read
|
||||
DataFlowCall call, ArgNode arg, ReturnKind kind, boolean read
|
||||
) {
|
||||
exists(ParameterNode param | viableParamArg(call, param, arg) |
|
||||
exists(ParamNode param | viableParamArg(call, param, arg) |
|
||||
parameterValueFlowReturnCand(param, kind, read)
|
||||
)
|
||||
}
|
||||
@@ -382,14 +465,14 @@ private module Cached {
|
||||
*
|
||||
* `read` indicates whether it is contents of `arg` that can flow to `out`.
|
||||
*/
|
||||
predicate argumentValueFlowsThroughCand(ArgumentNode arg, Node out, boolean read) {
|
||||
predicate argumentValueFlowsThroughCand(ArgNode arg, Node out, boolean read) {
|
||||
exists(DataFlowCall call, ReturnKind kind |
|
||||
argumentValueFlowsThroughCand0(call, arg, kind, read) and
|
||||
out = getAnOutNode(call, kind)
|
||||
)
|
||||
}
|
||||
|
||||
predicate cand(ParameterNode p, Node n) {
|
||||
predicate cand(ParamNode p, Node n) {
|
||||
parameterValueFlowCand(p, n, _) and
|
||||
(
|
||||
parameterValueFlowReturnCand(p, _, _)
|
||||
@@ -416,21 +499,21 @@ private module Cached {
|
||||
* If a read step was taken, then `read` captures the `Content`, the
|
||||
* container type, and the content type.
|
||||
*/
|
||||
predicate parameterValueFlow(ParameterNode p, Node node, ReadStepTypesOption read) {
|
||||
predicate parameterValueFlow(ParamNode p, Node node, ReadStepTypesOption read) {
|
||||
parameterValueFlow0(p, node, read) and
|
||||
if node instanceof CastingNode
|
||||
then
|
||||
// normal flow through
|
||||
read = TReadStepTypesNone() and
|
||||
compatibleTypes(getNodeType(p), getNodeType(node))
|
||||
compatibleTypes(getNodeDataFlowType(p), getNodeDataFlowType(node))
|
||||
or
|
||||
// getter
|
||||
compatibleTypes(read.getContentType(), getNodeType(node))
|
||||
compatibleTypes(read.getContentType(), getNodeDataFlowType(node))
|
||||
else any()
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate parameterValueFlow0(ParameterNode p, Node node, ReadStepTypesOption read) {
|
||||
private predicate parameterValueFlow0(ParamNode p, Node node, ReadStepTypesOption read) {
|
||||
p = node and
|
||||
Cand::cand(p, _) and
|
||||
read = TReadStepTypesNone()
|
||||
@@ -447,7 +530,7 @@ private module Cached {
|
||||
readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
|
||||
read.getContentType()) and
|
||||
Cand::parameterValueFlowReturnCand(p, _, true) and
|
||||
compatibleTypes(getNodeType(p), read.getContainerType())
|
||||
compatibleTypes(getNodeDataFlowType(p), read.getContainerType())
|
||||
)
|
||||
or
|
||||
parameterValueFlow0_0(TReadStepTypesNone(), p, node, read)
|
||||
@@ -455,34 +538,32 @@ private module Cached {
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate parameterValueFlow0_0(
|
||||
ReadStepTypesOption mustBeNone, ParameterNode p, Node node, ReadStepTypesOption read
|
||||
ReadStepTypesOption mustBeNone, ParamNode p, Node node, ReadStepTypesOption read
|
||||
) {
|
||||
// flow through: no prior read
|
||||
exists(ArgumentNode arg |
|
||||
exists(ArgNode arg |
|
||||
parameterValueFlowArg(p, arg, mustBeNone) and
|
||||
argumentValueFlowsThrough(arg, read, node)
|
||||
)
|
||||
or
|
||||
// flow through: no read inside method
|
||||
exists(ArgumentNode arg |
|
||||
exists(ArgNode arg |
|
||||
parameterValueFlowArg(p, arg, read) and
|
||||
argumentValueFlowsThrough(arg, mustBeNone, node)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate parameterValueFlowArg(
|
||||
ParameterNode p, ArgumentNode arg, ReadStepTypesOption read
|
||||
) {
|
||||
private predicate parameterValueFlowArg(ParamNode p, ArgNode arg, ReadStepTypesOption read) {
|
||||
parameterValueFlow(p, arg, read) and
|
||||
Cand::argumentValueFlowsThroughCand(arg, _, _)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate argumentValueFlowsThrough0(
|
||||
DataFlowCall call, ArgumentNode arg, ReturnKind kind, ReadStepTypesOption read
|
||||
DataFlowCall call, ArgNode arg, ReturnKind kind, ReadStepTypesOption read
|
||||
) {
|
||||
exists(ParameterNode param | viableParamArg(call, param, arg) |
|
||||
exists(ParamNode param | viableParamArg(call, param, arg) |
|
||||
parameterValueFlowReturn(param, kind, read)
|
||||
)
|
||||
}
|
||||
@@ -496,18 +577,18 @@ private module Cached {
|
||||
* container type, and the content type.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate argumentValueFlowsThrough(ArgumentNode arg, ReadStepTypesOption read, Node out) {
|
||||
predicate argumentValueFlowsThrough(ArgNode arg, ReadStepTypesOption read, Node out) {
|
||||
exists(DataFlowCall call, ReturnKind kind |
|
||||
argumentValueFlowsThrough0(call, arg, kind, read) and
|
||||
out = getAnOutNode(call, kind)
|
||||
|
|
||||
// normal flow through
|
||||
read = TReadStepTypesNone() and
|
||||
compatibleTypes(getNodeType(arg), getNodeType(out))
|
||||
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(out))
|
||||
or
|
||||
// getter
|
||||
compatibleTypes(getNodeType(arg), read.getContainerType()) and
|
||||
compatibleTypes(read.getContentType(), getNodeType(out))
|
||||
compatibleTypes(getNodeDataFlowType(arg), read.getContainerType()) and
|
||||
compatibleTypes(read.getContentType(), getNodeDataFlowType(out))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -516,7 +597,7 @@ private module Cached {
|
||||
* value-preserving steps and a single read step, not taking call
|
||||
* contexts into account, thus representing a getter-step.
|
||||
*/
|
||||
predicate getterStep(ArgumentNode arg, Content c, Node out) {
|
||||
predicate getterStep(ArgNode arg, Content c, Node out) {
|
||||
argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
|
||||
}
|
||||
|
||||
@@ -529,7 +610,7 @@ private module Cached {
|
||||
* container type, and the content type.
|
||||
*/
|
||||
private predicate parameterValueFlowReturn(
|
||||
ParameterNode p, ReturnKind kind, ReadStepTypesOption read
|
||||
ParamNode p, ReturnKind kind, ReadStepTypesOption read
|
||||
) {
|
||||
exists(ReturnNode ret |
|
||||
parameterValueFlow(p, ret, read) and
|
||||
@@ -553,7 +634,7 @@ private module Cached {
|
||||
private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) {
|
||||
mayBenefitFromCallContext(call, callable)
|
||||
or
|
||||
callable = call.getEnclosingCallable() and
|
||||
callEnclosingCallable(call, callable) and
|
||||
exists(viableCallableLambda(call, TDataFlowCallSome(_)))
|
||||
}
|
||||
|
||||
@@ -611,7 +692,7 @@ private module Cached {
|
||||
mayBenefitFromCallContextExt(call, _) and
|
||||
c = viableCallableExt(call) and
|
||||
ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and
|
||||
tgts = strictcount(DataFlowCall ctx | viableCallableExt(ctx) = call.getEnclosingCallable()) and
|
||||
tgts = strictcount(DataFlowCall ctx | callEnclosingCallable(call, viableCallableExt(ctx))) and
|
||||
ctxtgts < tgts
|
||||
)
|
||||
}
|
||||
@@ -635,8 +716,7 @@ private module Cached {
|
||||
* Holds if `p` can flow to the pre-update node associated with post-update
|
||||
* node `n`, in the same callable, using only value-preserving steps.
|
||||
*/
|
||||
cached
|
||||
predicate parameterValueFlowsToPreUpdate(ParameterNode p, PostUpdateNode n) {
|
||||
private predicate parameterValueFlowsToPreUpdate(ParamNode p, PostUpdateNode n) {
|
||||
parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
|
||||
}
|
||||
|
||||
@@ -644,9 +724,9 @@ private module Cached {
|
||||
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
|
||||
) {
|
||||
storeStep(node1, c, node2) and
|
||||
readStep(_, c, _) and
|
||||
contentType = getNodeType(node1) and
|
||||
containerType = getNodeType(node2)
|
||||
read(_, c, _) and
|
||||
contentType = getNodeDataFlowType(node1) and
|
||||
containerType = getNodeDataFlowType(node2)
|
||||
or
|
||||
exists(Node n1, Node n2 |
|
||||
n1 = node1.(PostUpdateNode).getPreUpdateNode() and
|
||||
@@ -654,12 +734,15 @@ private module Cached {
|
||||
|
|
||||
argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
|
||||
or
|
||||
readStep(n2, c, n1) and
|
||||
contentType = getNodeType(n1) and
|
||||
containerType = getNodeType(n2)
|
||||
read(n2, c, n1) and
|
||||
contentType = getNodeDataFlowType(n1) and
|
||||
containerType = getNodeDataFlowType(n2)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate read(Node node1, Content c, Node node2) { readStep(node1, c, node2) }
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `node1` to `node2` via a direct assignment to
|
||||
* `f`.
|
||||
@@ -678,8 +761,9 @@ private module Cached {
|
||||
* are aliases. A typical example is a function returning `this`, implementing a fluent
|
||||
* interface.
|
||||
*/
|
||||
cached
|
||||
predicate reverseStepThroughInputOutputAlias(PostUpdateNode fromNode, PostUpdateNode toNode) {
|
||||
private predicate reverseStepThroughInputOutputAlias(
|
||||
PostUpdateNode fromNode, PostUpdateNode toNode
|
||||
) {
|
||||
exists(Node fromPre, Node toPre |
|
||||
fromPre = fromNode.getPreUpdateNode() and
|
||||
toPre = toNode.getPreUpdateNode()
|
||||
@@ -688,14 +772,20 @@ private module Cached {
|
||||
// Does the language-specific simpleLocalFlowStep already model flow
|
||||
// from function input to output?
|
||||
fromPre = getAnOutNode(c, _) and
|
||||
toPre.(ArgumentNode).argumentOf(c, _) and
|
||||
simpleLocalFlowStep(toPre.(ArgumentNode), fromPre)
|
||||
toPre.(ArgNode).argumentOf(c, _) and
|
||||
simpleLocalFlowStep(toPre.(ArgNode), fromPre)
|
||||
)
|
||||
or
|
||||
argumentValueFlowsThrough(toPre, TReadStepTypesNone(), fromPre)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate simpleLocalFlowStepExt(Node node1, Node node2) {
|
||||
simpleLocalFlowStep(node1, node2) or
|
||||
reverseStepThroughInputOutputAlias(node1, node2)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the call context `call` either improves virtual dispatch in
|
||||
* `callable` or if it allows us to prune unreachable nodes in `callable`.
|
||||
@@ -704,7 +794,7 @@ private module Cached {
|
||||
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
|
||||
reducedViableImplInCallContext(_, callable, call)
|
||||
or
|
||||
exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCall(n, call))
|
||||
exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
|
||||
}
|
||||
|
||||
cached
|
||||
@@ -726,12 +816,12 @@ private module Cached {
|
||||
cached
|
||||
newtype TLocalFlowCallContext =
|
||||
TAnyLocalCall() or
|
||||
TSpecificLocalCall(DataFlowCall call) { isUnreachableInCall(_, call) }
|
||||
TSpecificLocalCall(DataFlowCall call) { isUnreachableInCallCached(_, call) }
|
||||
|
||||
cached
|
||||
newtype TReturnKindExt =
|
||||
TValueReturn(ReturnKind kind) or
|
||||
TParamUpdate(int pos) { exists(ParameterNode p | p.isParameterOf(_, pos)) }
|
||||
TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
|
||||
|
||||
cached
|
||||
newtype TBooleanOption =
|
||||
@@ -761,23 +851,15 @@ private module Cached {
|
||||
* A `Node` at which a cast can occur such that the type should be checked.
|
||||
*/
|
||||
class CastingNode extends Node {
|
||||
CastingNode() {
|
||||
this instanceof ParameterNode or
|
||||
this instanceof CastNode or
|
||||
this instanceof OutNodeExt or
|
||||
// For reads, `x.f`, we want to check that the tracked type after the read (which
|
||||
// is obtained by popping the head of the access path stack) is compatible with
|
||||
// the type of `x.f`.
|
||||
readStep(_, _, this)
|
||||
}
|
||||
CastingNode() { castingNode(this) }
|
||||
}
|
||||
|
||||
private predicate readStepWithTypes(
|
||||
Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
|
||||
) {
|
||||
readStep(n1, c, n2) and
|
||||
container = getNodeType(n1) and
|
||||
content = getNodeType(n2)
|
||||
read(n1, c, n2) and
|
||||
container = getNodeDataFlowType(n1) and
|
||||
content = getNodeDataFlowType(n2)
|
||||
}
|
||||
|
||||
private newtype TReadStepTypesOption =
|
||||
@@ -854,7 +936,7 @@ class CallContextSomeCall extends CallContextCall, TSomeCall {
|
||||
override string toString() { result = "CcSomeCall" }
|
||||
|
||||
override predicate relevantFor(DataFlowCallable callable) {
|
||||
exists(ParameterNode p | getNodeEnclosingCallable(p) = callable)
|
||||
exists(ParamNode p | getNodeEnclosingCallable(p) = callable)
|
||||
}
|
||||
|
||||
override predicate matchesCall(DataFlowCall call) { any() }
|
||||
@@ -866,7 +948,7 @@ class CallContextReturn extends CallContextNoCall, TReturn {
|
||||
}
|
||||
|
||||
override predicate relevantFor(DataFlowCallable callable) {
|
||||
exists(DataFlowCall call | this = TReturn(_, call) and call.getEnclosingCallable() = callable)
|
||||
exists(DataFlowCall call | this = TReturn(_, call) and callEnclosingCallable(call, callable))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -899,7 +981,7 @@ class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall
|
||||
}
|
||||
|
||||
private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) {
|
||||
exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCall(n, call))
|
||||
exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCallCached(n, call))
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -913,26 +995,37 @@ LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable)
|
||||
else result instanceof LocalCallContextAny
|
||||
}
|
||||
|
||||
/**
|
||||
* The value of a parameter at function entry, viewed as a node in a data
|
||||
* flow graph.
|
||||
*/
|
||||
class ParamNode extends Node {
|
||||
ParamNode() { parameterNode(this, _, _) }
|
||||
|
||||
/**
|
||||
* Holds if this node is the parameter of callable `c` at the specified
|
||||
* (zero-based) position.
|
||||
*/
|
||||
predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
|
||||
}
|
||||
|
||||
/** A data-flow node that represents a call argument. */
|
||||
class ArgNode extends Node {
|
||||
ArgNode() { argumentNode(this, _, _) }
|
||||
|
||||
/** Holds if this argument occurs at the given position in the given call. */
|
||||
final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node from which flow can return to the caller. This is either a regular
|
||||
* `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
|
||||
*/
|
||||
class ReturnNodeExt extends Node {
|
||||
ReturnNodeExt() {
|
||||
this instanceof ReturnNode or
|
||||
parameterValueFlowsToPreUpdate(_, this)
|
||||
}
|
||||
ReturnNodeExt() { returnNodeExt(this, _) }
|
||||
|
||||
/** Gets the kind of this returned value. */
|
||||
ReturnKindExt getKind() {
|
||||
result = TValueReturn(this.(ReturnNode).getKind())
|
||||
or
|
||||
exists(ParameterNode p, int pos |
|
||||
parameterValueFlowsToPreUpdate(p, this) and
|
||||
p.isParameterOf(_, pos) and
|
||||
result = TParamUpdate(pos)
|
||||
)
|
||||
}
|
||||
ReturnKindExt getKind() { returnNodeExt(this, result) }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -940,11 +1033,7 @@ class ReturnNodeExt extends Node {
|
||||
* or a post-update node associated with a call argument.
|
||||
*/
|
||||
class OutNodeExt extends Node {
|
||||
OutNodeExt() {
|
||||
this instanceof OutNode
|
||||
or
|
||||
this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode
|
||||
}
|
||||
OutNodeExt() { outNodeExt(this) }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -957,7 +1046,7 @@ abstract class ReturnKindExt extends TReturnKindExt {
|
||||
abstract string toString();
|
||||
|
||||
/** Gets a node corresponding to data flow out of `call`. */
|
||||
abstract OutNodeExt getAnOutNode(DataFlowCall call);
|
||||
final OutNodeExt getAnOutNode(DataFlowCall call) { result = getAnOutNodeExt(call, this) }
|
||||
}
|
||||
|
||||
class ValueReturnKind extends ReturnKindExt, TValueReturn {
|
||||
@@ -968,10 +1057,6 @@ class ValueReturnKind extends ReturnKindExt, TValueReturn {
|
||||
ReturnKind getKind() { result = kind }
|
||||
|
||||
override string toString() { result = kind.toString() }
|
||||
|
||||
override OutNodeExt getAnOutNode(DataFlowCall call) {
|
||||
result = getAnOutNode(call, this.getKind())
|
||||
}
|
||||
}
|
||||
|
||||
class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
|
||||
@@ -982,13 +1067,6 @@ class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
|
||||
int getPosition() { result = pos }
|
||||
|
||||
override string toString() { result = "param update " + pos }
|
||||
|
||||
override OutNodeExt getAnOutNode(DataFlowCall call) {
|
||||
exists(ArgumentNode arg |
|
||||
result.(PostUpdateNode).getPreUpdateNode() = arg and
|
||||
arg.argumentOf(call, this.getPosition())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A callable tagged with a relevant return kind. */
|
||||
@@ -1015,10 +1093,13 @@ class ReturnPosition extends TReturnPosition0 {
|
||||
*/
|
||||
pragma[inline]
|
||||
DataFlowCallable getNodeEnclosingCallable(Node n) {
|
||||
exists(Node n0 |
|
||||
pragma[only_bind_into](n0) = n and
|
||||
pragma[only_bind_into](result) = n0.getEnclosingCallable()
|
||||
)
|
||||
nodeEnclosingCallable(pragma[only_bind_out](n), pragma[only_bind_into](result))
|
||||
}
|
||||
|
||||
/** Gets the type of `n` used for type pruning. */
|
||||
pragma[inline]
|
||||
DataFlowType getNodeDataFlowType(Node n) {
|
||||
nodeDataFlowType(pragma[only_bind_out](n), pragma[only_bind_into](result))
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
@@ -1042,7 +1123,7 @@ predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall
|
||||
cc instanceof CallContextAny and callable = viableCallableExt(call)
|
||||
or
|
||||
exists(DataFlowCallable c0, DataFlowCall call0 |
|
||||
call0.getEnclosingCallable() = callable and
|
||||
callEnclosingCallable(call0, callable) and
|
||||
cc = TReturn(c0, call0) and
|
||||
c0 = prunedViableImplInCallContextReverse(call0, call)
|
||||
)
|
||||
@@ -1063,8 +1144,6 @@ DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
|
||||
result = viableCallableExt(call) and cc instanceof CallContextReturn
|
||||
}
|
||||
|
||||
predicate read = readStep/3;
|
||||
|
||||
/** An optional Boolean value. */
|
||||
class BooleanOption extends TBooleanOption {
|
||||
string toString() {
|
||||
@@ -1116,7 +1195,7 @@ abstract class AccessPathFront extends TAccessPathFront {
|
||||
|
||||
TypedContent getHead() { this = TFrontHead(result) }
|
||||
|
||||
predicate isClearedAt(Node n) { clearsContent(n, getHead().getContent()) }
|
||||
predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
|
||||
}
|
||||
|
||||
class AccessPathFrontNil extends AccessPathFront, TFrontNil {
|
||||
|
||||
@@ -168,7 +168,13 @@ module Consistency {
|
||||
msg = "ArgumentNode is missing PostUpdateNode."
|
||||
}
|
||||
|
||||
query predicate postWithInFlow(PostUpdateNode n, string msg) {
|
||||
// This predicate helps the compiler forget that in some languages
|
||||
// it is impossible for a `PostUpdateNode` to be the target of
|
||||
// `simpleLocalFlowStep`.
|
||||
private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
|
||||
|
||||
query predicate postWithInFlow(Node n, string msg) {
|
||||
isPostUpdateNode(n) and
|
||||
simpleLocalFlowStep(_, n) and
|
||||
msg = "PostUpdateNode should not be the target of local flow."
|
||||
}
|
||||
|
||||
@@ -228,7 +228,6 @@ module EssaFlow {
|
||||
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
|
||||
* excludes SSA flow through instance fields.
|
||||
*/
|
||||
cached
|
||||
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// If there is ESSA-flow out of a node `node`, we want flow
|
||||
// both out of `node` and any post-update node of `node`.
|
||||
@@ -1559,7 +1558,6 @@ predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node no
|
||||
* any value stored inside `f` is cleared at the pre-update node associated with `x`
|
||||
* in `x.f = newValue`.
|
||||
*/
|
||||
cached
|
||||
predicate clearsContent(Node n, Content c) {
|
||||
exists(CallNode call, CallableValue callable, string name |
|
||||
call_unpacks(call, _, callable, name, _) and
|
||||
@@ -1617,5 +1615,5 @@ predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c)
|
||||
/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
|
||||
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }
|
||||
|
||||
/** Extra data-flow steps needed for lamba flow analysis. */
|
||||
/** Extra data-flow steps needed for lambda flow analysis. */
|
||||
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
|
||||
|
||||
@@ -119,22 +119,6 @@ class Node extends TNode {
|
||||
/** Gets the expression corresponding to this node, if any. */
|
||||
Expr asExpr() { none() }
|
||||
|
||||
/**
|
||||
* Gets a node that this node may flow to using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a node that may flow into this one using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeBackTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
|
||||
|
||||
/**
|
||||
* Gets a local source node from which data may flow to this node in zero or more local data-flow steps.
|
||||
*/
|
||||
|
||||
@@ -10,23 +10,37 @@ import python
|
||||
import DataFlowPublic
|
||||
private import DataFlowPrivate
|
||||
|
||||
private predicate comes_from_cfgnode(Node node) {
|
||||
exists(CfgNode first, Node second |
|
||||
simpleLocalFlowStep(first, second) and
|
||||
simpleLocalFlowStep*(second, node)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node that is a source of local flow. This includes things like
|
||||
* - Expressions
|
||||
* - Function parameters
|
||||
*
|
||||
*
|
||||
* Local source nodes and the `flowsTo` relation should be thought of in terms of the reference
|
||||
* semantics of the underlying object. For instance, in the following snippet of code
|
||||
*
|
||||
* ```python
|
||||
* x = []
|
||||
* x.append(1)
|
||||
* x.append(2)
|
||||
* ```
|
||||
*
|
||||
* the local source node corresponding to the occurrences of `x` is the empty list that is assigned to `x`
|
||||
* originally. Even though the two `append` calls modify the value of `x`, they do not change the fact that
|
||||
* `x` still points to the same object. If, however, we next do `x = x + [3]`, then the expression `x + [3]`
|
||||
* will be the new local source of what `x` now points to.
|
||||
*/
|
||||
class LocalSourceNode extends Node {
|
||||
cached
|
||||
LocalSourceNode() {
|
||||
not comes_from_cfgnode(this) and
|
||||
not this instanceof ModuleVariableNode
|
||||
not simpleLocalFlowStep(_, this) and
|
||||
// Currently, we create synthetic post-update nodes for
|
||||
// - arguments to calls that may modify said argument
|
||||
// - direct reads a writes of object attributes
|
||||
// Both of these preserve the identity of the underlying pointer, and hence we exclude these as
|
||||
// local source nodes.
|
||||
// We do, however, allow the post-update nodes that arise from object creation (which are non-synthetic).
|
||||
not this instanceof SyntheticPostUpdateNode
|
||||
or
|
||||
this = any(ModuleVariableNode mvn).getARead()
|
||||
}
|
||||
@@ -63,6 +77,22 @@ class LocalSourceNode extends Node {
|
||||
* Gets a call to this node.
|
||||
*/
|
||||
CallCfgNode getACall() { Cached::call(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a node that this node may flow to using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a node that may flow into this one using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeBackTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
|
||||
}
|
||||
|
||||
cached
|
||||
|
||||
@@ -9,36 +9,42 @@ private import semmle.python.dataflow.new.internal.TaintTrackingPublic
|
||||
*/
|
||||
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
|
||||
|
||||
/**
|
||||
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
|
||||
* global taint flow configurations.
|
||||
*/
|
||||
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
localAdditionalTaintStep(nodeFrom, nodeTo)
|
||||
or
|
||||
any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
|
||||
private module Cached {
|
||||
/**
|
||||
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
|
||||
* global taint flow configurations.
|
||||
*/
|
||||
cached
|
||||
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
localAdditionalTaintStep(nodeFrom, nodeTo)
|
||||
or
|
||||
any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
|
||||
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
|
||||
* different objects.
|
||||
*/
|
||||
cached
|
||||
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
concatStep(nodeFrom, nodeTo)
|
||||
or
|
||||
subscriptStep(nodeFrom, nodeTo)
|
||||
or
|
||||
stringManipulation(nodeFrom, nodeTo)
|
||||
or
|
||||
containerStep(nodeFrom, nodeTo)
|
||||
or
|
||||
copyStep(nodeFrom, nodeTo)
|
||||
or
|
||||
forStep(nodeFrom, nodeTo)
|
||||
or
|
||||
unpackingAssignmentStep(nodeFrom, nodeTo)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
|
||||
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
|
||||
* different objects.
|
||||
*/
|
||||
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
concatStep(nodeFrom, nodeTo)
|
||||
or
|
||||
subscriptStep(nodeFrom, nodeTo)
|
||||
or
|
||||
stringManipulation(nodeFrom, nodeTo)
|
||||
or
|
||||
containerStep(nodeFrom, nodeTo)
|
||||
or
|
||||
copyStep(nodeFrom, nodeTo)
|
||||
or
|
||||
forStep(nodeFrom, nodeTo)
|
||||
or
|
||||
unpackingAssignmentStep(nodeFrom, nodeTo)
|
||||
}
|
||||
import Cached
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
|
||||
|
||||
@@ -0,0 +1,470 @@
|
||||
/** Step Summaries and Type Tracking */
|
||||
|
||||
private import TypeTrackerSpecific
|
||||
|
||||
/**
|
||||
* Any string that may appear as the name of a piece of content. This will usually include things like:
|
||||
* - Attribute names (in Python)
|
||||
* - Property names (in JavaScript)
|
||||
*
|
||||
* In general, this can also be used to model things like stores to specific list indices. To ensure
|
||||
* correctness, it is important that
|
||||
*
|
||||
* - different types of content do not have overlapping names, and
|
||||
* - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of
|
||||
* content instead.
|
||||
*/
|
||||
class ContentName extends string {
|
||||
ContentName() { this = getPossibleContentName() }
|
||||
}
|
||||
|
||||
/** Either a content name, or the empty string (representing no content). */
|
||||
class OptionalContentName extends string {
|
||||
OptionalContentName() { this instanceof ContentName or this = "" }
|
||||
}
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
/**
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
cached
|
||||
newtype TStepSummary =
|
||||
LevelStep() or
|
||||
CallStep() or
|
||||
ReturnStep() or
|
||||
StoreStep(ContentName content) or
|
||||
LoadStep(ContentName content)
|
||||
|
||||
/** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
|
||||
cached
|
||||
TypeTracker append(TypeTracker tt, StepSummary step) {
|
||||
exists(Boolean hasCall, OptionalContentName content | tt = MkTypeTracker(hasCall, content) |
|
||||
step = LevelStep() and result = tt
|
||||
or
|
||||
step = CallStep() and result = MkTypeTracker(true, content)
|
||||
or
|
||||
step = ReturnStep() and hasCall = false and result = tt
|
||||
or
|
||||
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
|
||||
or
|
||||
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or intra-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Steps contained in this predicate should _not_ depend on the call graph.
|
||||
*/
|
||||
cached
|
||||
predicate stepNoCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
|
||||
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepNoCall(mid, nodeTo, summary))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
cached
|
||||
predicate stepCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
|
||||
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
|
||||
}
|
||||
}
|
||||
|
||||
private import Cached
|
||||
|
||||
/**
|
||||
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
|
||||
*
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
class StepSummary extends TStepSummary {
|
||||
/** Gets a textual representation of this step summary. */
|
||||
string toString() {
|
||||
this instanceof LevelStep and result = "level"
|
||||
or
|
||||
this instanceof CallStep and result = "call"
|
||||
or
|
||||
this instanceof ReturnStep and result = "return"
|
||||
or
|
||||
exists(string content | this = StoreStep(content) | result = "store " + content)
|
||||
or
|
||||
exists(string content | this = LoadStep(content) | result = "load " + content)
|
||||
}
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate smallstepNoCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
|
||||
jumpStep(nodeFrom, nodeTo) and
|
||||
summary = LevelStep()
|
||||
or
|
||||
exists(string content |
|
||||
StepSummary::localSourceStoreStep(nodeFrom, nodeTo, content) and
|
||||
summary = StoreStep(content)
|
||||
or
|
||||
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate smallstepCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
|
||||
callStep(nodeFrom, nodeTo) and summary = CallStep()
|
||||
or
|
||||
returnStep(nodeFrom, nodeTo) and
|
||||
summary = ReturnStep()
|
||||
}
|
||||
|
||||
/** Provides predicates for updating step summaries (`StepSummary`s). */
|
||||
module StepSummary {
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* This predicate is inlined, which enables better join-orders when
|
||||
* the call graph construction and type tracking are mutually recursive.
|
||||
* In such cases, non-linear recursion involving `step` will be limited
|
||||
* to non-linear recursion for the parts of `step` that involve the
|
||||
* call graph.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
|
||||
stepNoCall(nodeFrom, nodeTo, summary)
|
||||
or
|
||||
stepCall(nodeFrom, nodeTo, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Unlike `StepSummary::step`, this predicate does not compress
|
||||
* type-preserving steps.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate smallstep(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
|
||||
smallstepNoCall(nodeFrom, nodeTo, summary)
|
||||
or
|
||||
smallstepCall(nodeFrom, nodeTo, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
|
||||
*
|
||||
* Note that `nodeTo` will always be a local source node that flows to the place where the content
|
||||
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
|
||||
* from the point of view of the execution of the program.
|
||||
*
|
||||
* For instance, if we interpret attribute writes in Python as writing to content with the same
|
||||
* name as the attribute and consider the following snippet
|
||||
*
|
||||
* ```python
|
||||
* def foo(y):
|
||||
* x = Foo()
|
||||
* bar(x)
|
||||
* x.attr = y
|
||||
* baz(x)
|
||||
*
|
||||
* def bar(x):
|
||||
* z = x.attr
|
||||
* ```
|
||||
* for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
|
||||
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
|
||||
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
|
||||
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
|
||||
*/
|
||||
predicate localSourceStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
|
||||
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
|
||||
}
|
||||
}
|
||||
|
||||
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to track a value to a given dataflow node.
|
||||
*
|
||||
* This can be used to track objects that implement a certain API in order to
|
||||
* recognize calls to that API. Note that type-tracking does not by itself provide a
|
||||
* source/sink relation, that is, it may determine that a node has a given type,
|
||||
* but it won't determine where that type came from.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```ql
|
||||
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* result = myType(t2).track(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
|
||||
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
|
||||
*/
|
||||
class TypeTracker extends TTypeTracker {
|
||||
Boolean hasCall;
|
||||
OptionalContentName content;
|
||||
|
||||
TypeTracker() { this = MkTypeTracker(hasCall, content) }
|
||||
|
||||
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
|
||||
TypeTracker append(StepSummary step) { result = append(this, step) }
|
||||
|
||||
/** Gets a textual representation of this summary. */
|
||||
string toString() {
|
||||
exists(string withCall, string withContent |
|
||||
(if hasCall = true then withCall = "with" else withCall = "without") and
|
||||
(if content != "" then withContent = " with content " + content else withContent = "") and
|
||||
result = "type tracker " + withCall + " call steps" + withContent
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking.
|
||||
*/
|
||||
predicate start() { hasCall = false and content = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
|
||||
* The type tracking only ends after the content has been loaded.
|
||||
*/
|
||||
predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking
|
||||
* when tracking a parameter into a call, but not out of it.
|
||||
*/
|
||||
predicate call() { hasCall = true and content = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the end point of type tracking.
|
||||
*/
|
||||
predicate end() { content = "" }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Holds if this type has been tracked into a call.
|
||||
*/
|
||||
boolean hasCall() { result = hasCall }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Gets the content associated with this type tracker.
|
||||
*/
|
||||
string getContent() { result = content }
|
||||
|
||||
/**
|
||||
* Gets a type tracker that starts where this one has left off to allow continued
|
||||
* tracking.
|
||||
*
|
||||
* This predicate is only defined if the type is not associated to a piece of content.
|
||||
*/
|
||||
TypeTracker continue() { content = "" and result = this }
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
|
||||
result = this.append(pragma[only_bind_into](summary))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Unlike `TypeTracker::step`, this predicate exposes all edges
|
||||
* in the flow graph, and not just the edges between `Node`s.
|
||||
* It may therefore be less performant.
|
||||
*
|
||||
* Type tracking predicates using small steps typically take the following form:
|
||||
* ```ql
|
||||
* DataFlow::Node myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* t = t2.smallstep(myType(t2), result)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() {
|
||||
* result = myType(DataFlow::TypeTracker::end())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
|
||||
result = this.append(summary)
|
||||
)
|
||||
or
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
result = this
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides predicates for implementing custom `TypeTracker`s. */
|
||||
module TypeTracker {
|
||||
/**
|
||||
* Gets a valid end point of type tracking.
|
||||
*/
|
||||
TypeTracker end() { result.end() }
|
||||
}
|
||||
|
||||
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to back-track a use of a value to a given dataflow node.
|
||||
*
|
||||
* This can for example be used to track callbacks that are passed to a certain API,
|
||||
* so we can model specific parameters of that callback as having a certain type.
|
||||
*
|
||||
* Note that type back-tracking does not provide a source/sink relation, that is,
|
||||
* it may determine that a node will be used in an API call somewhere, but it won't
|
||||
* determine exactly where that use was, or the path that led to the use.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for back-tracking some callback type `myCallback`:
|
||||
*
|
||||
* ```ql
|
||||
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
|
||||
* t.start() and
|
||||
* result = (< some API call >).getArgument(< n >).getALocalSource()
|
||||
* or
|
||||
* exists (DataFlow::TypeBackTracker t2 |
|
||||
* result = myCallback(t2).backtrack(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
|
||||
* `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
|
||||
*/
|
||||
class TypeBackTracker extends TTypeBackTracker {
|
||||
Boolean hasReturn;
|
||||
string content;
|
||||
|
||||
TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
|
||||
|
||||
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
|
||||
TypeBackTracker prepend(StepSummary step) {
|
||||
step = LevelStep() and result = this
|
||||
or
|
||||
step = CallStep() and hasReturn = false and result = this
|
||||
or
|
||||
step = ReturnStep() and result = MkTypeBackTracker(true, content)
|
||||
or
|
||||
exists(string p |
|
||||
step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
|
||||
)
|
||||
or
|
||||
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this summary. */
|
||||
string toString() {
|
||||
exists(string withReturn, string withContent |
|
||||
(if hasReturn = true then withReturn = "with" else withReturn = "without") and
|
||||
(if content != "" then withContent = " with content " + content else withContent = "") and
|
||||
result = "type back-tracker " + withReturn + " return steps" + withContent
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking.
|
||||
*/
|
||||
predicate start() { hasReturn = false and content = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the end point of type tracking.
|
||||
*/
|
||||
predicate end() { content = "" }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Holds if this type has been back-tracked into a call through return edge.
|
||||
*/
|
||||
boolean hasReturn() { result = hasReturn }
|
||||
|
||||
/**
|
||||
* Gets a type tracker that starts where this one has left off to allow continued
|
||||
* tracking.
|
||||
*
|
||||
* This predicate is only defined if the type has not been tracked into a piece of content.
|
||||
*/
|
||||
TypeBackTracker continue() { content = "" and result = this }
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a backwards
|
||||
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
|
||||
this = result.prepend(pragma[only_bind_into](summary))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a backwards
|
||||
* local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
|
||||
*
|
||||
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
|
||||
* in the flowgraph, and not just the edges between
|
||||
* `LocalSourceNode`s. It may therefore be less performant.
|
||||
*
|
||||
* Type tracking predicates using small steps typically take the following form:
|
||||
* ```ql
|
||||
* DataFlow::Node myType(DataFlow::TypeBackTracker t) {
|
||||
* t.start() and
|
||||
* result = < some API call >.getArgument(< n >)
|
||||
* or
|
||||
* exists (DataFlow::TypeBackTracker t2 |
|
||||
* t = t2.smallstep(result, myType(t2))
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() {
|
||||
* result = myType(DataFlow::TypeBackTracker::end())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
|
||||
this = result.prepend(summary)
|
||||
)
|
||||
or
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
this = result
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides predicates for implementing custom `TypeBackTracker`s. */
|
||||
module TypeBackTracker {
|
||||
/**
|
||||
* Gets a valid end point of type back-tracking.
|
||||
*/
|
||||
TypeBackTracker end() { result.end() }
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
/**
|
||||
* Provides Python-specific definitions for use in the type tracker library.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
|
||||
|
||||
class Node = DataFlowPublic::Node;
|
||||
|
||||
class LocalSourceNode = DataFlowPublic::LocalSourceNode;
|
||||
|
||||
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;
|
||||
|
||||
predicate jumpStep = DataFlowPrivate::jumpStep/2;
|
||||
|
||||
/**
|
||||
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
|
||||
* using the name of the attribute for the corresponding content.
|
||||
*/
|
||||
string getPossibleContentName() { result = any(DataFlowPublic::AttrRef a).getAttributeName() }
|
||||
|
||||
/**
|
||||
* Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
|
||||
*
|
||||
* Helper predicate to avoid bad join order experienced in `callStep`.
|
||||
* This happened when `isParameterOf` was joined _before_ `getCallable`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private DataFlowPrivate::DataFlowCallable getCallableForArgument(
|
||||
DataFlowPublic::ArgumentNode nodeFrom, int i
|
||||
) {
|
||||
exists(DataFlowPrivate::DataFlowCall call |
|
||||
nodeFrom.argumentOf(call, i) and
|
||||
result = call.getCallable()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
|
||||
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
|
||||
// TODO: Support special methods?
|
||||
exists(DataFlowPrivate::DataFlowCallable callable, int i |
|
||||
callable = getCallableForArgument(nodeFrom, i) and
|
||||
nodeTo.isParameterOf(callable, i)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
|
||||
predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
|
||||
exists(DataFlowPrivate::DataFlowCall call |
|
||||
nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
|
||||
*/
|
||||
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) {
|
||||
exists(DataFlowPublic::AttrWrite a |
|
||||
a.mayHaveAttributeName(content) and
|
||||
nodeFrom = a.getValue() and
|
||||
nodeTo = a.getObject()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
|
||||
*/
|
||||
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
|
||||
exists(DataFlowPublic::AttrRead a |
|
||||
a.mayHaveAttributeName(content) and
|
||||
nodeFrom = a.getObject() and
|
||||
nodeTo = a
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility class that is equivalent to `boolean` but does not require type joining.
|
||||
*/
|
||||
class Boolean extends boolean {
|
||||
Boolean() { this = true or this = false }
|
||||
}
|
||||
52
python/ql/src/semmle/python/frameworks/Aioch.qll
Normal file
52
python/ql/src/semmle/python/frameworks/Aioch.qll
Normal file
@@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `aioch` PyPI package (an
|
||||
* async-io version of the `clickhouse-driver` PyPI package).
|
||||
*
|
||||
* See https://pypi.org/project/aioch/
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.frameworks.PEP249
|
||||
private import semmle.python.frameworks.ClickhouseDriver
|
||||
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Provides models for `aioch` PyPI package (an async-io version of the
|
||||
* `clickhouse-driver` PyPI package).
|
||||
*
|
||||
* See https://pypi.org/project/aioch/
|
||||
*/
|
||||
module Aioch {
|
||||
/** Provides models for `aioch.Client` class and subclasses. */
|
||||
module Client {
|
||||
/** Gets a reference to the `aioch.Client` class or any subclass. */
|
||||
API::Node subclassRef() {
|
||||
result = API::moduleImport("aioch").getMember("Client").getASubclass*()
|
||||
}
|
||||
|
||||
/** Gets a reference to an instance of `clickhouse_driver.Client` or any subclass. */
|
||||
API::Node instance() { result = subclassRef().getReturn() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to any of the the execute methods on a `aioch.Client`, which are just async
|
||||
* versions of the methods in the `clickhouse-driver` PyPI package.
|
||||
*
|
||||
* See
|
||||
* - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute
|
||||
* - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute_iter
|
||||
* - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute_with_progress
|
||||
*/
|
||||
class ClientExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
|
||||
ClientExecuteCall() {
|
||||
exists(string methodName | methodName = ClickhouseDriver::getExecuteMethodName() |
|
||||
this = Client::instance().getMember(methodName).getACall()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
|
||||
}
|
||||
}
|
||||
562
python/ql/src/semmle/python/frameworks/Aiohttp.qll
Normal file
562
python/ql/src/semmle/python/frameworks/Aiohttp.qll
Normal file
@@ -0,0 +1,562 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `aiohttp` PyPI package.
|
||||
* See https://docs.aiohttp.org/en/stable/index.html
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
|
||||
private import semmle.python.frameworks.internal.SelfRefMixin
|
||||
private import semmle.python.frameworks.Multidict
|
||||
private import semmle.python.frameworks.Yarl
|
||||
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Provides models for the web server part (`aiohttp.web`) of the `aiohttp` PyPI package.
|
||||
* See https://docs.aiohttp.org/en/stable/web.html
|
||||
*/
|
||||
module AiohttpWebModel {
|
||||
/**
|
||||
* Provides models for the `aiohttp.web.View` class and subclasses.
|
||||
*
|
||||
* See https://docs.aiohttp.org/en/stable/web_reference.html#view.
|
||||
*/
|
||||
module View {
|
||||
/** Gets a reference to the `aiohttp.web.View` class or any subclass. */
|
||||
API::Node subclassRef() {
|
||||
result = API::moduleImport("aiohttp").getMember("web").getMember("View").getASubclass*()
|
||||
}
|
||||
}
|
||||
|
||||
// -- route modeling --
|
||||
/** Gets a reference to an `aiohttp.web.Application` instance. */
|
||||
API::Node applicationInstance() {
|
||||
// Not sure whether you're allowed to add routes _after_ starting the app, for
|
||||
// example in the middle of handling a http request... but I'm guessing that for 99%
|
||||
// for all code, not modeling that `request.app` is a reference to an application
|
||||
// should be good enough for the route-setup part of the modeling :+1:
|
||||
result = API::moduleImport("aiohttp").getMember("web").getMember("Application").getReturn()
|
||||
}
|
||||
|
||||
/** Gets a reference to an `aiohttp.web.UrlDispatcher` instance. */
|
||||
API::Node urlDispathcerInstance() {
|
||||
result = API::moduleImport("aiohttp").getMember("web").getMember("UrlDispatcher").getReturn()
|
||||
or
|
||||
result = applicationInstance().getMember("router")
|
||||
}
|
||||
|
||||
/**
|
||||
* A route setup in `aiohttp.web`. Since all route-setups can technically use either
|
||||
* coroutines or view-classes as the handler argument (although that's not how you're
|
||||
* **supposed** to do things), we also need to handle this.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `AiohttpRouteSetup::Range` instead.
|
||||
*/
|
||||
class AiohttpRouteSetup extends HTTP::Server::RouteSetup::Range {
|
||||
AiohttpRouteSetup::Range range;
|
||||
|
||||
AiohttpRouteSetup() { this = range }
|
||||
|
||||
override Parameter getARoutedParameter() { none() }
|
||||
|
||||
override string getFramework() { result = "aiohttp.web" }
|
||||
|
||||
/** Gets the argument specifying the handler (either a coroutine or a view-class). */
|
||||
DataFlow::Node getHandlerArg() { result = range.getHandlerArg() }
|
||||
|
||||
override DataFlow::Node getUrlPatternArg() { result = range.getUrlPatternArg() }
|
||||
|
||||
/** Gets the view-class that is referenced in the view-class handler argument, if any. */
|
||||
Class getViewClass() { result = range.getViewClass() }
|
||||
|
||||
override Function getARequestHandler() { result = range.getARequestHandler() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new aiohttp.web route setups. */
|
||||
private module AiohttpRouteSetup {
|
||||
/**
|
||||
* A route setup in `aiohttp.web`. Since all route-setups can technically use either
|
||||
* coroutines or view-classes as the handler argument (although that's not how you're
|
||||
* **supposed** to do things), we also need to handle this.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `AiohttpRouteSetup` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument used to set the URL pattern. */
|
||||
abstract DataFlow::Node getUrlPatternArg();
|
||||
|
||||
/** Gets the argument specifying the handler (either a coroutine or a view-class). */
|
||||
abstract DataFlow::Node getHandlerArg();
|
||||
|
||||
/** Gets the view-class that is referenced in the view-class handler argument, if any. */
|
||||
Class getViewClass() { result = getBackTrackedViewClass(this.getHandlerArg()) }
|
||||
|
||||
/**
|
||||
* Gets a function that will handle incoming requests for this route, if any.
|
||||
*
|
||||
* NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Function`.
|
||||
*/
|
||||
Function getARequestHandler() {
|
||||
this.getHandlerArg() = poorMansFunctionTracker(result)
|
||||
or
|
||||
result = this.getViewClass().(AiohttpViewClass).getARequestHandler()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a class, that has been backtracked from the view-class handler
|
||||
* argument `origin` (to a route-setup for view-classes).
|
||||
*/
|
||||
private DataFlow::LocalSourceNode viewClassBackTracker(
|
||||
DataFlow::TypeBackTracker t, DataFlow::Node origin
|
||||
) {
|
||||
t.start() and
|
||||
origin = any(Range rs).getHandlerArg() and
|
||||
result = origin.getALocalSource()
|
||||
or
|
||||
exists(DataFlow::TypeBackTracker t2 |
|
||||
result = viewClassBackTracker(t2, origin).backtrack(t2, t)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a class, that has been backtracked from the view-class handler
|
||||
* argument `origin` (to a route-setup for view-classes).
|
||||
*/
|
||||
DataFlow::LocalSourceNode viewClassBackTracker(DataFlow::Node origin) {
|
||||
result = viewClassBackTracker(DataFlow::TypeBackTracker::end(), origin)
|
||||
}
|
||||
|
||||
Class getBackTrackedViewClass(DataFlow::Node origin) {
|
||||
result.getParent() = viewClassBackTracker(origin).asExpr()
|
||||
}
|
||||
}
|
||||
|
||||
/** An aiohttp route setup that uses coroutines (async function) as request handlers. */
|
||||
class AiohttpCoroutineRouteSetup extends AiohttpRouteSetup {
|
||||
AiohttpCoroutineRouteSetup() { this.getHandlerArg() = poorMansFunctionTracker(_) }
|
||||
}
|
||||
|
||||
/** An aiohttp route setup that uses view-classes as request handlers. */
|
||||
class AiohttpViewRouteSetup extends AiohttpRouteSetup {
|
||||
AiohttpViewRouteSetup() { exists(this.getViewClass()) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A route-setup from
|
||||
* - `add_route`, `add_view`, `add_get`, `add_post`, , etc. on an `aiohttp.web.UrlDispatcher`.
|
||||
* - `route`, `view`, `get`, `post`, etc. functions from `aiohttp.web`.
|
||||
*/
|
||||
class AiohttpAddRouteCall extends AiohttpRouteSetup::Range, DataFlow::CallCfgNode {
|
||||
/** At what index route arguments starts, so we can handle "route" version together with get/post/... */
|
||||
int routeArgsStart;
|
||||
|
||||
AiohttpAddRouteCall() {
|
||||
exists(string funcName |
|
||||
funcName = HTTP::httpVerbLower() and
|
||||
routeArgsStart = 0
|
||||
or
|
||||
funcName = "view" and
|
||||
routeArgsStart = 0
|
||||
or
|
||||
funcName = "route" and
|
||||
routeArgsStart = 1
|
||||
|
|
||||
this = urlDispathcerInstance().getMember("add_" + funcName).getACall()
|
||||
or
|
||||
this = API::moduleImport("aiohttp").getMember("web").getMember(funcName).getACall()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getUrlPatternArg() {
|
||||
result in [this.getArg(routeArgsStart + 0), this.getArgByName("path")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getHandlerArg() {
|
||||
result in [this.getArg(routeArgsStart + 1), this.getArgByName("handler")]
|
||||
}
|
||||
}
|
||||
|
||||
/** A route-setup using a decorator, such as `route`, `view`, `get`, `post`, etc. on an `aiohttp.web.RouteTableDef`. */
|
||||
class AiohttpDecoratorRouteSetup extends AiohttpRouteSetup::Range, DataFlow::CallCfgNode {
|
||||
/** At what index route arguments starts, so we can handle "route" version together with get/post/... */
|
||||
int routeArgsStart;
|
||||
|
||||
AiohttpDecoratorRouteSetup() {
|
||||
exists(string decoratorName |
|
||||
decoratorName = HTTP::httpVerbLower() and
|
||||
routeArgsStart = 0
|
||||
or
|
||||
decoratorName = "view" and
|
||||
routeArgsStart = 0
|
||||
or
|
||||
decoratorName = "route" and
|
||||
routeArgsStart = 1
|
||||
|
|
||||
this =
|
||||
API::moduleImport("aiohttp")
|
||||
.getMember("web")
|
||||
.getMember("RouteTableDef")
|
||||
.getReturn()
|
||||
.getMember(decoratorName)
|
||||
.getACall()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getUrlPatternArg() {
|
||||
result in [this.getArg(routeArgsStart + 0), this.getArgByName("path")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getHandlerArg() { none() }
|
||||
|
||||
override Class getViewClass() { result.getADecorator() = this.asExpr() }
|
||||
|
||||
override Function getARequestHandler() {
|
||||
// we're decorating a class
|
||||
exists(this.getViewClass()) and
|
||||
result = super.getARequestHandler()
|
||||
or
|
||||
// we're decorating a function
|
||||
not exists(this.getViewClass()) and
|
||||
result.getADecorator() = this.asExpr()
|
||||
}
|
||||
}
|
||||
|
||||
/** A class that we consider an aiohttp.web View class. */
|
||||
abstract class AiohttpViewClass extends Class, SelfRefMixin {
|
||||
/** Gets a function that could handle incoming requests, if any. */
|
||||
Function getARequestHandler() {
|
||||
// TODO: This doesn't handle attribute assignment. Should be OK, but analysis is not as complete as with
|
||||
// points-to and `.lookup`, which would handle `post = my_post_handler` inside class def
|
||||
result = this.getAMethod() and
|
||||
result.getName() = HTTP::httpVerbLower()
|
||||
}
|
||||
}
|
||||
|
||||
/** A class that has a super-type which is an aiohttp.web View class. */
|
||||
class AiohttpViewClassFromSuperClass extends AiohttpViewClass {
|
||||
AiohttpViewClassFromSuperClass() { this.getABase() = View::subclassRef().getAUse().asExpr() }
|
||||
}
|
||||
|
||||
/** A class that is used in a route-setup, therefore being considered an aiohttp.web View class. */
|
||||
class AiohttpViewClassFromRouteSetup extends AiohttpViewClass {
|
||||
AiohttpViewClassFromRouteSetup() { this = any(AiohttpRouteSetup rs).getViewClass() }
|
||||
}
|
||||
|
||||
/** A request handler defined in an `aiohttp.web` view class, that has no known route. */
|
||||
private class AiohttpViewClassRequestHandlerWithoutKnownRoute extends HTTP::Server::RequestHandler::Range {
|
||||
AiohttpViewClassRequestHandlerWithoutKnownRoute() {
|
||||
exists(AiohttpViewClass vc | vc.getARequestHandler() = this) and
|
||||
not exists(AiohttpRouteSetup setup | setup.getARequestHandler() = this)
|
||||
}
|
||||
|
||||
override Parameter getARoutedParameter() { none() }
|
||||
|
||||
override string getFramework() { result = "aiohttp.web" }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// aiohttp.web.Request taint modeling
|
||||
// ---------------------------------------------------------------------------
|
||||
/**
|
||||
* Provides models for the `aiohttp.web.Request` class
|
||||
*
|
||||
* See https://docs.aiohttp.org/en/stable/web_reference.html#request-and-base-request
|
||||
*/
|
||||
module Request {
|
||||
/**
|
||||
* A source of instances of `aiohttp.web.Request`, extend this class to model new instances.
|
||||
*
|
||||
* This can include instantiations of the class, return values from function
|
||||
* calls, or a special parameter that will be set when functions are called by an external
|
||||
* library.
|
||||
*
|
||||
* Use `Request::instance()` predicate to get
|
||||
* references to instances of `aiohttp.web.Request`.
|
||||
*/
|
||||
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
|
||||
|
||||
/** Gets a reference to an instance of `aiohttp.web.Request`. */
|
||||
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof InstanceSource
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to an instance of `aiohttp.web.Request`. */
|
||||
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides models for the `aiohttp.StreamReader` class
|
||||
*
|
||||
* See https://docs.aiohttp.org/en/stable/streams.html#aiohttp.StreamReader
|
||||
*/
|
||||
module StreamReader {
|
||||
/**
|
||||
* A source of instances of `aiohttp.StreamReader`, extend this class to model new instances.
|
||||
*
|
||||
* This can include instantiations of the class, return values from function
|
||||
* calls, or a special parameter that will be set when functions are called by an external
|
||||
* library.
|
||||
*
|
||||
* Use `StreamReader::instance()` predicate to get
|
||||
* references to instances of `aiohttp.StreamReader`.
|
||||
*/
|
||||
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
|
||||
|
||||
/** Gets a reference to an instance of `aiohttp.StreamReader`. */
|
||||
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof InstanceSource
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to an instance of `aiohttp.StreamReader`. */
|
||||
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
|
||||
/**
|
||||
* Taint propagation for `aiohttp.StreamReader`.
|
||||
*/
|
||||
private class AiohttpStreamReaderAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
// Methods
|
||||
//
|
||||
// TODO: When we have tools that make it easy, model these properly to handle
|
||||
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
|
||||
// (since it allows us to at least capture the most common cases).
|
||||
nodeFrom = StreamReader::instance() and
|
||||
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
|
||||
// normal methods
|
||||
attr.getAttributeName() in ["read_nowait"] and
|
||||
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
|
||||
or
|
||||
// async methods
|
||||
exists(Await await, DataFlow::CallCfgNode call |
|
||||
attr.getAttributeName() in [
|
||||
"read", "readany", "readexactly", "readline", "readchunk", "iter_chunked",
|
||||
"iter_any", "iter_chunks"
|
||||
] and
|
||||
call.getFunction() = attr and
|
||||
await.getValue() = call.asExpr() and
|
||||
nodeTo.asExpr() = await
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A parameter that will receive an `aiohttp.web.Request` instance when a request
|
||||
* handler is invoked.
|
||||
*/
|
||||
class AiohttpRequestHandlerRequestParam extends Request::InstanceSource, RemoteFlowSource::Range,
|
||||
DataFlow::ParameterNode {
|
||||
AiohttpRequestHandlerRequestParam() {
|
||||
exists(Function requestHandler |
|
||||
requestHandler = any(AiohttpCoroutineRouteSetup setup).getARequestHandler() and
|
||||
// We select the _last_ parameter for the request since that is what they do in
|
||||
// `aiohttp-jinja2`.
|
||||
// https://github.com/aio-libs/aiohttp-jinja2/blob/7fb4daf2c3003921d34031d38c2311ee0e02c18b/aiohttp_jinja2/__init__.py#L235
|
||||
//
|
||||
// I assume that is just to handle cases such as the one below
|
||||
// ```py
|
||||
// class MyCustomHandlerClass:
|
||||
// async def foo_handler(self, request):
|
||||
// ...
|
||||
//
|
||||
// my_custom_handler = MyCustomHandlerClass()
|
||||
// app.router.add_get("/MyCustomHandlerClass/foo", my_custom_handler.foo_handler)
|
||||
// ```
|
||||
this.getParameter() =
|
||||
max(Parameter param, int i | param = requestHandler.getArg(i) | param order by i)
|
||||
)
|
||||
}
|
||||
|
||||
override string getSourceType() { result = "aiohttp.web.Request" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A read of the `request` attribute on an instance of an aiohttp.web View class,
|
||||
* which is the request being processed currently.
|
||||
*/
|
||||
class AiohttpViewClassRequestAttributeRead extends Request::InstanceSource,
|
||||
RemoteFlowSource::Range, DataFlow::Node {
|
||||
AiohttpViewClassRequestAttributeRead() {
|
||||
this.(DataFlow::AttrRead).getObject() = any(AiohttpViewClass vc).getASelfRef() and
|
||||
this.(DataFlow::AttrRead).getAttributeName() = "request"
|
||||
}
|
||||
|
||||
override string getSourceType() {
|
||||
result = "aiohttp.web.Request from self.request in View class"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Taint propagation for `aiohttp.web.Request`.
|
||||
*
|
||||
* See https://docs.aiohttp.org/en/stable/web_reference.html#request-and-base-request
|
||||
*/
|
||||
private class AiohttpRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
// Methods
|
||||
//
|
||||
// TODO: When we have tools that make it easy, model these properly to handle
|
||||
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
|
||||
// (since it allows us to at least capture the most common cases).
|
||||
nodeFrom = Request::instance() and
|
||||
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
|
||||
// normal methods
|
||||
attr.getAttributeName() in ["clone", "get_extra_info"] and
|
||||
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
|
||||
or
|
||||
// async methods
|
||||
exists(Await await, DataFlow::CallCfgNode call |
|
||||
attr.getAttributeName() in ["read", "text", "json", "multipart", "post"] and
|
||||
call.getFunction() = attr and
|
||||
await.getValue() = call.asExpr() and
|
||||
nodeTo.asExpr() = await
|
||||
)
|
||||
)
|
||||
or
|
||||
// Attributes
|
||||
nodeFrom = Request::instance() and
|
||||
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
|
||||
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
|
||||
"url", "rel_url", "forwarded", "host", "remote", "path", "path_qs", "raw_path", "query",
|
||||
"headers", "transport", "cookies", "content", "_payload", "content_type", "charset",
|
||||
"http_range", "if_modified_since", "if_unmodified_since", "if_range", "match_info"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
/** An attribute read on an `aiohttp.web.Request` that is a `MultiDictProxy` instance. */
|
||||
class AiohttpRequestMultiDictProxyInstances extends Multidict::MultiDictProxy::InstanceSource {
|
||||
AiohttpRequestMultiDictProxyInstances() {
|
||||
this.(DataFlow::AttrRead).getObject() = Request::instance() and
|
||||
this.(DataFlow::AttrRead).getAttributeName() in ["query", "headers"]
|
||||
or
|
||||
// Handle the common case of `x = await request.post()`
|
||||
// but don't try to handle anything else, since we don't have an easy way to do this yet.
|
||||
// TODO: more complete handling of `await request.post()`
|
||||
exists(Await await, DataFlow::CallCfgNode call, DataFlow::AttrRead read |
|
||||
this.asExpr() = await
|
||||
|
|
||||
read.(DataFlow::AttrRead).getObject() = Request::instance() and
|
||||
read.(DataFlow::AttrRead).getAttributeName() = "post" and
|
||||
call.getFunction() = read and
|
||||
await.getValue() = call.asExpr()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** An attribute read on an `aiohttp.web.Request` that is a `yarl.URL` instance. */
|
||||
class AiohttpRequestYarlUrlInstances extends Yarl::Url::InstanceSource {
|
||||
AiohttpRequestYarlUrlInstances() {
|
||||
this.(DataFlow::AttrRead).getObject() = Request::instance() and
|
||||
this.(DataFlow::AttrRead).getAttributeName() in ["url", "rel_url"]
|
||||
}
|
||||
}
|
||||
|
||||
/** An attribute read on an `aiohttp.web.Request` that is a `aiohttp.StreamReader` instance. */
|
||||
class AiohttpRequestStreamReaderInstances extends StreamReader::InstanceSource {
|
||||
AiohttpRequestStreamReaderInstances() {
|
||||
this.(DataFlow::AttrRead).getObject() = Request::instance() and
|
||||
this.(DataFlow::AttrRead).getAttributeName() in ["content", "_payload"]
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// aiohttp.web Response modeling
|
||||
// ---------------------------------------------------------------------------
|
||||
/**
|
||||
* An instantiation of `aiohttp.web.Response`.
|
||||
*
|
||||
* Note that `aiohttp.web.HTTPException` (and it's subclasses) is a subclass of `aiohttp.web.Response`.
|
||||
*
|
||||
* See
|
||||
* - https://docs.aiohttp.org/en/stable/web_reference.html#aiohttp.web.Response
|
||||
* - https://docs.aiohttp.org/en/stable/web_quickstart.html#aiohttp-web-exceptions
|
||||
*/
|
||||
class AiohttpWebResponseInstantiation extends HTTP::Server::HttpResponse::Range,
|
||||
DataFlow::CallCfgNode {
|
||||
AiohttpWebResponseInstantiation() {
|
||||
this = API::moduleImport("aiohttp").getMember("web").getMember("Response").getACall()
|
||||
or
|
||||
exists(string httpExceptionClassName |
|
||||
httpExceptionClassName in [
|
||||
"HTTPException", "HTTPSuccessful", "HTTPOk", "HTTPCreated", "HTTPAccepted",
|
||||
"HTTPNonAuthoritativeInformation", "HTTPNoContent", "HTTPResetContent",
|
||||
"HTTPPartialContent", "HTTPRedirection", "HTTPMultipleChoices", "HTTPMovedPermanently",
|
||||
"HTTPFound", "HTTPSeeOther", "HTTPNotModified", "HTTPUseProxy", "HTTPTemporaryRedirect",
|
||||
"HTTPPermanentRedirect", "HTTPError", "HTTPClientError", "HTTPBadRequest",
|
||||
"HTTPUnauthorized", "HTTPPaymentRequired", "HTTPForbidden", "HTTPNotFound",
|
||||
"HTTPMethodNotAllowed", "HTTPNotAcceptable", "HTTPProxyAuthenticationRequired",
|
||||
"HTTPRequestTimeout", "HTTPConflict", "HTTPGone", "HTTPLengthRequired",
|
||||
"HTTPPreconditionFailed", "HTTPRequestEntityTooLarge", "HTTPRequestURITooLong",
|
||||
"HTTPUnsupportedMediaType", "HTTPRequestRangeNotSatisfiable", "HTTPExpectationFailed",
|
||||
"HTTPMisdirectedRequest", "HTTPUnprocessableEntity", "HTTPFailedDependency",
|
||||
"HTTPUpgradeRequired", "HTTPPreconditionRequired", "HTTPTooManyRequests",
|
||||
"HTTPRequestHeaderFieldsTooLarge", "HTTPUnavailableForLegalReasons", "HTTPServerError",
|
||||
"HTTPInternalServerError", "HTTPNotImplemented", "HTTPBadGateway",
|
||||
"HTTPServiceUnavailable", "HTTPGatewayTimeout", "HTTPVersionNotSupported",
|
||||
"HTTPVariantAlsoNegotiates", "HTTPInsufficientStorage", "HTTPNotExtended",
|
||||
"HTTPNetworkAuthenticationRequired"
|
||||
] and
|
||||
this =
|
||||
API::moduleImport("aiohttp").getMember("web").getMember(httpExceptionClassName).getACall()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getBody() {
|
||||
result in [this.getArgByName("text"), this.getArgByName("body")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getMimetypeOrContentTypeArg() {
|
||||
result = this.getArgByName("content_type")
|
||||
}
|
||||
|
||||
override string getMimetypeDefault() {
|
||||
exists(this.getArgByName("text")) and
|
||||
result = "text/plain"
|
||||
or
|
||||
not exists(this.getArgByName("text")) and
|
||||
result = "application/octet-stream"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An instantiation of aiohttp.web HTTP redirect exception.
|
||||
*
|
||||
* See the part about redirects at https://docs.aiohttp.org/en/stable/web_quickstart.html#aiohttp-web-exceptions
|
||||
*/
|
||||
class AiohttpRedirectExceptionInstantiation extends AiohttpWebResponseInstantiation,
|
||||
HTTP::Server::HttpRedirectResponse::Range {
|
||||
AiohttpRedirectExceptionInstantiation() {
|
||||
exists(string httpRedirectExceptionClassName |
|
||||
httpRedirectExceptionClassName in [
|
||||
"HTTPMultipleChoices", "HTTPMovedPermanently", "HTTPFound", "HTTPSeeOther",
|
||||
"HTTPNotModified", "HTTPUseProxy", "HTTPTemporaryRedirect", "HTTPPermanentRedirect"
|
||||
] and
|
||||
this =
|
||||
API::moduleImport("aiohttp")
|
||||
.getMember("web")
|
||||
.getMember(httpRedirectExceptionClassName)
|
||||
.getACall()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRedirectLocation() {
|
||||
result in [this.getArg(0), this.getArgByName("location")]
|
||||
}
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user