Merge remote-tracking branch 'origin/jorgectf/python/ldapimproperauth' into jorgectf/python/ldapinsecureauth

This commit is contained in:
jorgectf
2021-07-22 02:51:19 +02:00
2651 changed files with 93032 additions and 32712 deletions

View File

@@ -31,7 +31,7 @@ updated to use a context manager.</p>
</example>
<references>
<li>Effbot: <a href="http://effbot.org/zone/python-with-statement.htm">Python with statement</a>.</li>
<li>Effbot: <a href="https://web.archive.org/web/20201012110738/http://effbot.org/zone/python-with-statement.htm">Python with statement</a>.</li>
<li>Python Standard Library: <a href="http://docs.python.org/library/stdtypes.html#context-manager-types">Context manager
</a>.</li>
<li>Python Language Reference: <a href="http://docs.python.org/2.7/reference/datamodel.html#with-statement-context-managers">

View File

@@ -0,0 +1,23 @@
/**
* @name Python extraction errors
* @description List all extraction errors for Python files in the source code directory.
* @kind diagnostic
* @id py/diagnostics/extraction-errors
*/
import python
/**
* Gets the SARIF severity for errors.
*
* See point 3.27.10 in https://docs.oasis-open.org/sarif/sarif/v2.0/sarif-v2.0.html for
* what error means.
*/
int getErrorSeverity() { result = 2 }
from SyntaxError error, File file
where
file = error.getFile() and
exists(file.getRelativePath())
select error, "Extraction failed in " + file + " with error " + error.getMessage(),
getErrorSeverity()

View File

@@ -0,0 +1,15 @@
/**
* @name Successfully extracted Python files
* @description Lists all Python files in the source code directory that were extracted
* without encountering an error.
* @kind diagnostic
* @id py/diagnostics/successfully-extracted-files
*/
import python
from File file
where
not exists(SyntaxError e | e.getFile() = file) and
exists(file.getRelativePath())
select file, ""

View File

@@ -4,18 +4,21 @@
* @kind problem
* @tags security
* correctness
* security/cwe/cwe-78
* @problem.severity error
* @security-severity 5.9
* @sub-severity high
* @precision high
* @id py/use-of-input
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.ApiGraphs
from CallNode call, Context context, ControlFlowNode func
from DataFlow::CallCfgNode call
where
context.getAVersion().includes(2, _) and
call.getFunction() = func and
func.pointsTo(context, Value::named("input"), _) and
not func.pointsTo(context, Value::named("raw_input"), _)
major_version() = 2 and
call = API::builtin("input").getACall() and
call != API::builtin("raw_input").getACall()
select call, "The unsafe built-in function 'input' is used in Python 2."

View File

@@ -1,14 +0,0 @@
/**
* @name Filter: non-generated files
* @description Only keep results that aren't (or don't appear to be) generated.
* @kind problem
* @id py/not-generated-file-filter
*/
import python
import external.DefectFilter
import semmle.python.filters.GeneratedCode
from DefectResult res
where not exists(GeneratedFile f | res.getFile() = f)
select res, res.getMessage()

View File

@@ -1,14 +0,0 @@
/**
* @name Filter: non-test files
* @description Only keep results that aren't in tests
* @kind problem
* @id py/not-test-file-filter
*/
import python
import external.DefectFilter
import semmle.python.filters.Tests
from DefectResult res
where not exists(TestScope s | contains(s.getLocation(), res))
select res, res.getMessage()

View File

@@ -36,7 +36,7 @@ function with a default of <code>default=None</code>, check if the parameter is
</example>
<references>
<li>Effbot: <a href="http://effbot.org/zone/default-values.htm">Default Parameter Values in Python</a>.</li>
<li>Effbot: <a href="https://web.archive.org/web/20201112004749/http://effbot.org/zone/default-values.htm">Default Parameter Values in Python</a>.</li>
<li>Python Language Reference: <a href="http://docs.python.org/2/reference/compound_stmts.html#function-definitions">Function definitions</a>.</li>

View File

@@ -9,7 +9,7 @@ information being thrown away.</p>
<p>A return value is considered to be trivial if it is <code>None</code> or it is a parameter (parameters, usually <code>self</code> are often
returned to assist with method chaining, but can be ignored).
A return value is also assumed to be trivial if it is ignored for 75% or more of calls.
A return value is also assumed to be trivial if it is ignored for more than 25% of calls.
</p>
</overview>

View File

@@ -29,7 +29,7 @@ import that.
<li>Python Language Reference: <a href="http://docs.python.org/2/reference/simple_stmts.html#import">The import statement</a>.</li>
<li>Python: <a href="http://docs.python.org/2/tutorial/modules.html">Modules</a>.</li>
<li> Effbot: <a href="http://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>
<li> Effbot: <a href="https://web.archive.org/web/20200917011425/https://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>
</references>

View File

@@ -33,7 +33,7 @@ import that.
<li>Python Language Reference: <a href="http://docs.python.org/2/reference/simple_stmts.html#import">The import statement</a>.</li>
<li>Python: <a href="http://docs.python.org/2/tutorial/modules.html">Modules</a>.</li>
<li> Effbot: <a href="http://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>
<li> Effbot: <a href="https://web.archive.org/web/20200917011425/https://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>
</references>

View File

@@ -4,7 +4,6 @@
* @kind treemap
* @treemap.warnOn highValues
* @metricType file
* @precision high
* @tags maintainability
* @id py/lines-of-commented-out-code-in-files
*/

View File

@@ -49,7 +49,7 @@ so the general technique is quite widely applicable.
<li>
IBM developerWorks: <a href="http://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
IBM developerWorks: <a href="https://web.archive.org/web/20190919085934/https://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
</li>
<li>
R. Martin, <em>Agile Software Development: Principles, Patterns and Practices</em>. Pearson, 2011.

View File

@@ -5,7 +5,6 @@
* @kind treemap
* @treemap.warnOn highValues
* @metricType externalDependency
* @precision medium
* @id py/external-dependencies
*/

View File

@@ -6,7 +6,6 @@
* @treemap.warnOn highValues
* @metricType file
* @metricAggregate avg sum max
* @precision very-high
* @tags maintainability
* @id py/lines-of-code-in-files
*/

View File

@@ -6,7 +6,6 @@
* @treemap.warnOn lowValues
* @metricType file
* @metricAggregate avg sum max
* @precision very-high
* @id py/lines-of-comments-in-files
*/

View File

@@ -7,21 +7,12 @@
* @treemap.warnOn highValues
* @metricType file
* @metricAggregate avg sum max
* @precision high
* @tags testability
* @id py/duplicated-lines-in-files
*/
import python
import external.CodeDuplication
from File f, int n
where
n =
count(int line |
exists(DuplicateBlock d | d.sourceFile() = f |
line in [d.sourceStartLine() .. d.sourceEndLine()] and
not allowlistedLineForDuplication(f, line)
)
)
where none()
select f, n order by n desc

View File

@@ -7,21 +7,12 @@
* @treemap.warnOn highValues
* @metricType file
* @metricAggregate avg sum max
* @precision high
* @tags testability
* @id py/similar-lines-in-files
*/
import python
import external.CodeDuplication
from File f, int n
where
n =
count(int line |
exists(SimilarBlock d | d.sourceFile() = f |
line in [d.sourceStartLine() .. d.sourceEndLine()] and
not allowlistedLineForDuplication(f, line)
)
)
where none()
select f, n order by n desc

View File

@@ -5,7 +5,6 @@
* @treemap.warnOn lowValues
* @metricType file
* @metricAggregate avg sum max
* @precision medium
* @id py/tests-in-files
*/

View File

@@ -29,7 +29,7 @@ You can reduce efferent coupling by splitting up a module so that each part depe
<li>
IBM developerWorks: <a href="http://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
IBM developerWorks: <a href="https://web.archive.org/web/20190919085934/https://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
</li>
<li>
R. Martin, <em>Agile Software Development: Principles, Patterns and Practices</em>. Pearson, 2011.

View File

@@ -4,7 +4,9 @@
* and is therefore associated with security risks.
* @kind problem
* @tags security
* external/cwe/cwe-200
* @problem.severity error
* @security-severity 3.6
* @sub-severity low
* @precision high
* @id py/bind-socket-all-network-interfaces
@@ -32,21 +34,7 @@ private DataFlow::LocalSourceNode vulnerableHostnameRef(DataFlow::TypeTracker t,
result.asExpr() = allInterfacesStrConst
)
or
// Due to bad performance when using normal setup with `vulnerableHostnameRef(t2, hostname).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
vulnerableHostnameRef_first_join(t2, hostname, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate vulnerableHostnameRef_first_join(
DataFlow::TypeTracker t2, string hostname, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(vulnerableHostnameRef(t2, hostname), res, summary)
exists(DataFlow::TypeTracker t2 | result = vulnerableHostnameRef(t2, hostname).track(t2, t))
}
/** Gets a reference to a hostname that can be used to bind to all interfaces. */
@@ -59,21 +47,7 @@ private DataFlow::LocalSourceNode vulnerableAddressTuple(DataFlow::TypeTracker t
t.start() and
result.asExpr() = any(Tuple tup | tup.getElt(0) = vulnerableHostnameRef(hostname).asExpr())
or
// Due to bad performance when using normal setup with `vulnerableAddressTuple(t2, hostname).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
vulnerableAddressTuple_first_join(t2, hostname, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate vulnerableAddressTuple_first_join(
DataFlow::TypeTracker t2, string hostname, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(vulnerableAddressTuple(t2, hostname), res, summary)
exists(DataFlow::TypeTracker t2 | result = vulnerableAddressTuple(t2, hostname).track(t2, t))
}
/** Gets a reference to a tuple for which the first element is a hostname that can be used to bind to all interfaces. */

View File

@@ -5,6 +5,7 @@
* @kind path-problem
* @precision low
* @problem.severity error
* @security-severity 5.9
* @tags security external/cwe/cwe-20
*/

View File

@@ -3,6 +3,7 @@
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
* @kind problem
* @problem.severity warning
* @security-severity 5.9
* @precision high
* @id py/incomplete-hostname-regexp
* @tags correctness

View File

@@ -3,6 +3,7 @@
* @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing.
* @kind problem
* @problem.severity warning
* @security-severity 5.9
* @precision high
* @id py/incomplete-url-substring-sanitization
* @tags correctness

View File

@@ -3,6 +3,7 @@
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
* @kind path-problem
* @problem.severity error
* @security-severity 6.4
* @sub-severity high
* @precision high
* @id py/path-injection

View File

@@ -6,6 +6,7 @@
* @kind path-problem
* @id py/tarslip
* @problem.severity error
* @security-severity 6.4
* @precision medium
* @tags security
* external/cwe/cwe-022

View File

@@ -4,6 +4,7 @@
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @security-severity 5.9
* @sub-severity high
* @precision high
* @id py/command-line-injection

View File

@@ -4,6 +4,7 @@
* cause a cross-site scripting vulnerability.
* @kind problem
* @problem.severity error
* @security-severity 2.9
* @precision medium
* @id py/jinja2/autoescape-false
* @tags security

View File

@@ -4,6 +4,7 @@
* allows for a cross-site scripting vulnerability.
* @kind path-problem
* @problem.severity error
* @security-severity 2.9
* @sub-severity high
* @precision high
* @id py/reflective-xss

View File

@@ -4,6 +4,7 @@
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @security-severity 6.4
* @precision high
* @id py/sql-injection
* @tags security

View File

@@ -4,6 +4,7 @@
* code execution.
* @kind path-problem
* @problem.severity error
* @security-severity 10.0
* @sub-severity high
* @precision high
* @id py/code-injection

View File

@@ -5,6 +5,7 @@
* developing a subsequent exploit.
* @kind path-problem
* @problem.severity error
* @security-severity 3.6
* @precision high
* @id py/stack-trace-exposure
* @tags security

View File

@@ -3,6 +3,7 @@
* @description Running a Flask app in debug mode may allow an attacker to run arbitrary code through the Werkzeug debugger.
* @kind problem
* @problem.severity error
* @security-severity 6.4
* @precision high
* @id py/flask-debug
* @tags security

View File

@@ -3,6 +3,7 @@
* @description Accepting unknown host keys can allow man-in-the-middle attacks.
* @kind problem
* @problem.severity error
* @security-severity 5.2
* @precision high
* @id py/paramiko-missing-host-key-validation
* @tags security

View File

@@ -3,6 +3,7 @@
* @description Making a request without certificate validation can allow man-in-the-middle attacks.
* @kind problem
* @problem.severity error
* @security-severity 5.2
* @precision medium
* @id py/request-without-cert-validation
* @tags security

View File

@@ -4,6 +4,7 @@
* expose it to an attacker.
* @kind path-problem
* @problem.severity error
* @security-severity 5.9
* @precision high
* @id py/clear-text-logging-sensitive-data
* @tags security

View File

@@ -4,6 +4,7 @@
* attacker.
* @kind path-problem
* @problem.severity error
* @security-severity 5.9
* @precision high
* @id py/clear-text-storage-sensitive-data
* @tags security

View File

@@ -0,0 +1,25 @@
/**
* @name Use of weak cryptographic key
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
* @kind problem
* @problem.severity error
* @security-severity 5.2
* @precision high
* @id py/weak-crypto-key
* @tags security
* external/cwe/cwe-326
*/
import python
import semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.filters.Tests
from Cryptography::PublicKey::KeyGeneration keyGen, int keySize, DataFlow::Node origin
where
keySize = keyGen.getKeySizeWithOrigin(origin) and
keySize < keyGen.minimumSecureKeySize() and
not origin.getScope().getScope*() instanceof TestScope
select keyGen,
"Creation of an " + keyGen.getName() + " key uses $@ bits, which is below " +
keyGen.minimumSecureKeySize() + " and considered breakable.", origin, keySize.toString()

View File

@@ -15,22 +15,28 @@
secure than it appears to be.
</p>
<p>
This query alerts on any use of a weak cryptographic algorithm, that is
not a hashing algorithm. Use of broken or weak cryptographic hash
functions are handled by the
<code>py/weak-sensitive-data-hashing</code> query.
</p>
</overview>
<recommendation>
<p>
Ensure that you use a strong, modern cryptographic
algorithm. Use at least AES-128 or RSA-2048 for
encryption, and SHA-2 or SHA-3 for secure hashing.
algorithm, such as AES-128 or RSA-2048.
</p>
</recommendation>
<example>
<p>
The following code uses the <code>pycrypto</code>
The following code uses the <code>pycryptodome</code>
library to encrypt some secret data. When you create a cipher using
<code>pycrypto</code> you must specify the encryption
<code>pycryptodome</code> you must specify the encryption
algorithm to use. The first example uses DES, which is an
older algorithm that is now considered weak. The second
example uses AES, which is a stronger modern algorithm.
@@ -39,8 +45,12 @@
<sample src="examples/broken_crypto.py" />
<p>
WARNING: Although the second example above is more robust,
pycrypto is no longer actively maintained so we recommend using <code>cryptography</code> instead.
NOTICE: the original
<code><a href="https://pypi.org/project/pycrypto/">pycrypto</a></code>
PyPI package that provided the <code>Crypto</code> module is not longer
actively maintained, so you should use the
<code><a href="https://pypi.org/project/pycryptodome/">pycryptodome</a></code>
PyPI package instead (which has a compatible API).
</p>
</example>

View File

@@ -1,8 +1,9 @@
/**
* @name Use of a broken or weak cryptographic algorithm
* @description Using broken or weak cryptographic algorithms can compromise security.
* @kind path-problem
* @kind problem
* @problem.severity warning
* @security-severity 5.2
* @precision high
* @id py/weak-cryptographic-algorithm
* @tags security
@@ -10,21 +11,15 @@
*/
import python
import semmle.python.security.Paths
import semmle.python.security.SensitiveData
import semmle.python.security.Crypto
import semmle.python.Concepts
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof SensitiveDataSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
}
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
src.getSource(), "Sensitive data"
from Cryptography::CryptographicOperation operation, Cryptography::CryptographicAlgorithm algorithm
where
algorithm = operation.getAlgorithm() and
algorithm.isWeak() and
// `Cryptography::HashingAlgorithm` and `Cryptography::PasswordHashingAlgorithm` are
// handled by `py/weak-sensitive-data-hashing`
algorithm instanceof Cryptography::EncryptionAlgorithm
select operation,
"The cryptographic algorithm " + algorithm.getName() +
" is broken or weak, and should not be used."

View File

@@ -0,0 +1,103 @@
private import python
private import semmle.python.dataflow.new.DataFlow
import TlsLibraryModel
/**
* Configuration to determine the state of a context being used to create
* a connection. There is one configuration for each pair of `TlsLibrary` and `ProtocolVersion`,
* such that a single configuration only tracks contexts where a specific `ProtocolVersion` is allowed.
*
* The state is in terms of whether a specific protocol is allowed. This is
* either true or false when the context is created and can then be modified
* later by either restricting or unrestricting the protocol (see the predicates
* `isRestriction` and `isUnrestriction`).
*
* Since we are interested in the final state, we want the flow to start from
* the last unrestriction, so we disallow flow into unrestrictions. We also
* model the creation as an unrestriction of everything it allows, to account
* for the common case where the creation plays the role of "last unrestriction".
*
* Since we really want "the last unrestriction, not nullified by a restriction",
* we also disallow flow into restrictions.
*/
class InsecureContextConfiguration extends DataFlow::Configuration {
TlsLibrary library;
ProtocolVersion tracked_version;
InsecureContextConfiguration() {
this = library + "Allows" + tracked_version and
tracked_version.isInsecure()
}
ProtocolVersion getTrackedVersion() { result = tracked_version }
override predicate isSource(DataFlow::Node source) { this.isUnrestriction(source) }
override predicate isSink(DataFlow::Node sink) {
sink = library.connection_creation().getContext()
}
override predicate isBarrierIn(DataFlow::Node node) {
this.isRestriction(node)
or
this.isUnrestriction(node)
}
private predicate isRestriction(DataFlow::Node node) {
exists(ProtocolRestriction r |
r = library.protocol_restriction() and
r.getRestriction() = tracked_version
|
node = r.getContext()
)
}
private predicate isUnrestriction(DataFlow::Node node) {
exists(ProtocolUnrestriction pu |
pu = library.protocol_unrestriction() and
pu.getUnrestriction() = tracked_version
|
node = pu.getContext()
)
}
}
/**
* Holds if `conectionCreation` marks the creation of a connetion based on the contex
* found at `contextOrigin` and allowing `insecure_version`.
*
* `specific` is true iff the context is configured for a specific protocol version (`ssl.PROTOCOL_TLSv1_2`) rather
* than for a family of protocols (`ssl.PROTOCOL_TLS`).
*/
predicate unsafe_connection_creation_with_context(
DataFlow::Node connectionCreation, ProtocolVersion insecure_version, DataFlow::Node contextOrigin,
boolean specific
) {
// Connection created from a context allowing `insecure_version`.
exists(InsecureContextConfiguration c | c.hasFlow(contextOrigin, connectionCreation) |
insecure_version = c.getTrackedVersion() and
specific = false
)
or
// Connection created from a context specifying `insecure_version`.
exists(TlsLibrary l |
connectionCreation = l.insecure_connection_creation(insecure_version) and
contextOrigin = connectionCreation and
specific = true
)
}
/**
* Holds if `conectionCreation` marks the creation of a connetion witout reference to a context
* and allowing `insecure_version`.
*/
predicate unsafe_connection_creation_without_context(
DataFlow::CallCfgNode connectionCreation, string insecure_version
) {
exists(TlsLibrary l | connectionCreation = l.insecure_connection_creation(insecure_version))
}
/** Holds if `contextCreation` is creating a context tied to a specific insecure version. */
predicate unsafe_context_creation(DataFlow::CallCfgNode contextCreation, string insecure_version) {
exists(TlsLibrary l | contextCreation = l.insecure_context_creation(insecure_version))
}

View File

@@ -5,6 +5,7 @@
* @id py/insecure-default-protocol
* @kind problem
* @problem.severity warning
* @security-severity 5.2
* @precision high
* @tags security
* external/cwe/cwe-327

View File

@@ -13,8 +13,8 @@
<p>
Ensure that a modern, strong protocol is used. All versions of SSL,
and TLS 1.0 are known to be vulnerable to attacks. Using TLS 1.1 or
above is strongly recommended.
and TLS versions 1.0 and 1.1 are known to be vulnerable to attacks.
Using TLS 1.2 or above is strongly recommended.
</p>
</recommendation>
@@ -30,20 +30,35 @@
<p>
All cases should be updated to use a secure protocol, such as
<code>PROTOCOL_TLSv1_1</code>.
<code>PROTOCOL_TLSv1_2</code>.
</p>
<p>
Note that <code>ssl.wrap_socket</code> has been deprecated in
Python 3.7. A preferred alternative is to use
<code>ssl.SSLContext</code>, which is supported in Python 2.7.9 and
3.2 and later versions.
Python 3.7. The recommended alternatives are:
</p>
<ul>
<li><code>ssl.SSLContext</code> - supported in Python 2.7.9,
3.2, and later versions</li>
<li><code>ssl.create_default_context</code> - a convenience function,
supported in Python 3.4 and later versions.</li>
</ul>
<p>
Even when you use these alternatives, you should
ensure that a safe protocol is used. The following code illustrates
how to use flags (available since Python 3.2) or the `minimum_version`
field (favored since Python 3.7) to restrict the protocols accepted when
creating a connection.
</p>
<sample src="examples/secure_default_protocol.py" />
</example>
<references>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Transport_Layer_Security"> Transport Layer Security</a>.</li>
<li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#ssl.SSLContext"> class ssl.SSLContext</a>.</li>
<li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#ssl.wrap_socket"> ssl.wrap_socket</a>.</li>
<li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#functions-constants-and-exceptions"> notes on context creation</a>.</li>
<li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#ssl-security"> notes on security considerations</a>.</li>
<li>pyOpenSSL documentation: <a href="https://pyopenssl.org/en/stable/api/ssl.html"> An interface to the SSL-specific parts of OpenSSL</a>.</li>
</references>

View File

@@ -4,92 +4,84 @@
* @id py/insecure-protocol
* @kind problem
* @problem.severity warning
* @security-severity 5.2
* @precision high
* @tags security
* external/cwe/cwe-327
*/
import python
import semmle.python.dataflow.new.DataFlow
import FluentApiModel
private ModuleValue the_ssl_module() { result = Module::named("ssl") }
FunctionValue ssl_wrap_socket() { result = the_ssl_module().attr("wrap_socket") }
ClassValue ssl_Context_class() { result = the_ssl_module().attr("SSLContext") }
private ModuleValue the_pyOpenSSL_module() { result = Value::named("pyOpenSSL.SSL") }
ClassValue the_pyOpenSSL_Context_class() { result = Value::named("pyOpenSSL.SSL.Context") }
string insecure_version_name() {
// For `pyOpenSSL.SSL`
result = "SSLv2_METHOD" or
result = "SSLv23_METHOD" or
result = "SSLv3_METHOD" or
result = "TLSv1_METHOD" or
// For the `ssl` module
result = "PROTOCOL_SSLv2" or
result = "PROTOCOL_SSLv3" or
result = "PROTOCOL_SSLv23" or
result = "PROTOCOL_TLS" or
result = "PROTOCOL_TLSv1"
}
/*
* A syntactic check for cases where points-to analysis cannot infer the presence of
* a protocol constant, e.g. if it has been removed in later versions of the `ssl`
* library.
*/
bindingset[named_argument]
predicate probable_insecure_ssl_constant(
CallNode call, string insecure_version, string named_argument
) {
exists(ControlFlowNode arg |
arg = call.getArgByName(named_argument) or
arg = call.getArg(0)
|
arg.(AttrNode).getObject(insecure_version).pointsTo(the_ssl_module())
// Helper for pretty printer `configName`.
// This is a consequence of missing pretty priting.
// We do not want to evaluate our bespoke pretty printer
// for all `DataFlow::Node`s so we define a sub class of interesting ones.
class ProtocolConfiguration extends DataFlow::Node {
ProtocolConfiguration() {
unsafe_connection_creation_with_context(_, _, this, _)
or
arg.(NameNode).getId() = insecure_version and
exists(Import imp |
imp.getAnImportedModuleName() = "ssl" and
imp.getAName().getAsname().(Name).getId() = insecure_version
)
)
}
predicate unsafe_ssl_wrap_socket_call(
CallNode call, string method_name, string insecure_version, string named_argument
) {
(
call = ssl_wrap_socket().getACall() and
method_name = "deprecated method ssl.wrap_socket" and
named_argument = "ssl_version"
unsafe_connection_creation_without_context(this, _)
or
call = ssl_Context_class().getACall() and
named_argument = "protocol" and
method_name = "ssl.SSLContext"
) and
insecure_version = insecure_version_name() and
(
call.getArgByName(named_argument).pointsTo(the_ssl_module().attr(insecure_version))
unsafe_context_creation(this, _)
}
AstNode getNode() { result = this.asCfgNode().(CallNode).getFunction().getNode() }
}
// Helper for pretty printer `callName`.
// This is a consequence of missing pretty priting.
// We do not want to evaluate our bespoke pretty printer
// for all `AstNode`s so we define a sub class of interesting ones.
//
// Note that AstNode is abstract and AstNode_ is a library class, so
// we have to extend @py_ast_node.
class Nameable extends @py_ast_node {
Nameable() {
this = any(ProtocolConfiguration pc).getNode()
or
probable_insecure_ssl_constant(call, insecure_version, named_argument)
)
exists(Nameable attr | this = attr.(Attribute).getObject())
}
string toString() { result = "AstNode" }
}
predicate unsafe_pyOpenSSL_Context_call(CallNode call, string insecure_version) {
call = the_pyOpenSSL_Context_class().getACall() and
insecure_version = insecure_version_name() and
call.getArg(0).pointsTo(the_pyOpenSSL_module().attr(insecure_version))
}
from CallNode call, string method_name, string insecure_version
where
unsafe_ssl_wrap_socket_call(call, method_name, insecure_version, _)
string callName(Nameable call) {
result = call.(Name).getId()
or
unsafe_pyOpenSSL_Context_call(call, insecure_version) and method_name = "pyOpenSSL.SSL.Context"
select call,
"Insecure SSL/TLS protocol version " + insecure_version + " specified in call to " + method_name +
"."
exists(Attribute a | a = call | result = callName(a.getObject()) + "." + a.getName())
}
string configName(ProtocolConfiguration protocolConfiguration) {
result =
"call to " + callName(protocolConfiguration.asCfgNode().(CallNode).getFunction().getNode())
or
not protocolConfiguration.asCfgNode() instanceof CallNode and
not protocolConfiguration instanceof ContextCreation and
result = "context modification"
}
string verb(boolean specific) {
specific = true and result = "specified"
or
specific = false and result = "allowed"
}
from
DataFlow::Node connectionCreation, string insecure_version, DataFlow::Node protocolConfiguration,
boolean specific
where
unsafe_connection_creation_with_context(connectionCreation, insecure_version,
protocolConfiguration, specific)
or
unsafe_connection_creation_without_context(connectionCreation, insecure_version) and
protocolConfiguration = connectionCreation and
specific = true
or
unsafe_context_creation(protocolConfiguration, insecure_version) and
connectionCreation = protocolConfiguration and
specific = true
select connectionCreation,
"Insecure SSL/TLS protocol version " + insecure_version + " " + verb(specific) + " by $@ ",
protocolConfiguration, configName(protocolConfiguration)

View File

@@ -0,0 +1,83 @@
/**
* Provides modeling of SSL/TLS functionality of the `OpenSSL` module from the `pyOpenSSL` PyPI package.
* See https://www.pyopenssl.org/en/stable/
*/
private import python
private import semmle.python.ApiGraphs
import TlsLibraryModel
class PyOpenSSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
PyOpenSSLContextCreation() {
this = API::moduleImport("OpenSSL").getMember("SSL").getMember("Context").getACall()
}
override string getProtocol() {
exists(ControlFlowNode protocolArg, PyOpenSSL pyo |
protocolArg in [node.getArg(0), node.getArgByName("method")]
|
protocolArg =
[pyo.specific_version(result).getAUse(), pyo.unspecific_version(result).getAUse()]
.asCfgNode()
)
}
}
class ConnectionCall extends ConnectionCreation, DataFlow::CallCfgNode {
ConnectionCall() {
this = API::moduleImport("OpenSSL").getMember("SSL").getMember("Connection").getACall()
}
override DataFlow::CfgNode getContext() {
result.getNode() in [node.getArg(0), node.getArgByName("context")]
}
}
// This cannot be used to unrestrict,
// see https://www.pyopenssl.org/en/stable/api/ssl.html#OpenSSL.SSL.Context.set_options
class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode {
SetOptionsCall() { node.getFunction().(AttrNode).getName() = "set_options" }
override DataFlow::CfgNode getContext() {
result.getNode() = node.getFunction().(AttrNode).getObject()
}
override ProtocolVersion getRestriction() {
API::moduleImport("OpenSSL").getMember("SSL").getMember("OP_NO_" + result).getAUse().asCfgNode() in [
node.getArg(0), node.getArgByName("options")
]
}
}
class UnspecificPyOpenSSLContextCreation extends PyOpenSSLContextCreation, UnspecificContextCreation {
UnspecificPyOpenSSLContextCreation() { library instanceof PyOpenSSL }
}
class PyOpenSSL extends TlsLibrary {
PyOpenSSL() { this = "pyOpenSSL" }
override string specific_version_name(ProtocolVersion version) { result = version + "_METHOD" }
override string unspecific_version_name(ProtocolFamily family) {
// `"TLS_METHOD"` is not actually available in pyOpenSSL yet, but should be coming soon..
result = family + "_METHOD"
}
override API::Node version_constants() { result = API::moduleImport("OpenSSL").getMember("SSL") }
override ContextCreation default_context_creation() { none() }
override ContextCreation specific_context_creation() {
result instanceof PyOpenSSLContextCreation
}
override DataFlow::Node insecure_connection_creation(ProtocolVersion version) { none() }
override ConnectionCreation connection_creation() { result instanceof ConnectionCall }
override ProtocolRestriction protocol_restriction() { result instanceof SetOptionsCall }
override ProtocolUnrestriction protocol_unrestriction() {
result instanceof UnspecificPyOpenSSLContextCreation
}
}

View File

@@ -0,0 +1,24 @@
# Current status (Feb 2021)
This should be kept up to date; the world is moving fast and protocols are being broken.
## Protocols
- All versions of SSL are insecure
- TLS 1.0 and TLS 1.1 are insecure
- TLS 1.2 have some issues. but TLS 1.3 is not widely supported
## Conection methods
- `ssl.wrap_socket` is creating insecure connections, use `SSLContext.wrap_socket` instead. [link](https://docs.python.org/3/library/ssl.html#ssl.wrap_socket)
> Deprecated since version 3.7: Since Python 3.2 and 2.7.9, it is recommended to use the `SSLContext.wrap_socket()` instead of `wrap_socket()`. The top-level function is limited and creates an insecure client socket without server name indication or hostname matching.
- Default constructors are fine, a fluent API is used to constrain possible protocols later.
## Current recomendation
TLS 1.2 or TLS 1.3
## Queries
- `InsecureProtocol` detects uses of insecure protocols.
- `InsecureDefaultProtocol` detect default constructions, this is no longer unsafe.

View File

@@ -0,0 +1,214 @@
/**
* Provides modeling of SSL/TLS functionality of the `ssl` module from the standard library.
* See https://docs.python.org/3.9/library/ssl.html
*/
private import python
private import semmle.python.ApiGraphs
import TlsLibraryModel
class SSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
SSLContextCreation() { this = API::moduleImport("ssl").getMember("SSLContext").getACall() }
override string getProtocol() {
exists(ControlFlowNode protocolArg, Ssl ssl |
protocolArg in [node.getArg(0), node.getArgByName("protocol")]
|
protocolArg =
[ssl.specific_version(result).getAUse(), ssl.unspecific_version(result).getAUse()]
.asCfgNode()
)
or
not exists(node.getAnArg()) and
result = "TLS"
}
}
class SSLDefaultContextCreation extends ContextCreation {
SSLDefaultContextCreation() {
this = API::moduleImport("ssl").getMember("create_default_context").getACall()
}
// Allowed insecure versions are "TLSv1" and "TLSv1_1"
// see https://docs.python.org/3/library/ssl.html#context-creation
override string getProtocol() { result = "TLS" }
}
/** Gets a reference to an `ssl.Context` instance. */
API::Node sslContextInstance() {
result = API::moduleImport("ssl").getMember(["SSLContext", "create_default_context"]).getReturn()
}
class WrapSocketCall extends ConnectionCreation, DataFlow::CallCfgNode {
WrapSocketCall() { this = sslContextInstance().getMember("wrap_socket").getACall() }
override DataFlow::Node getContext() {
result = this.getFunction().(DataFlow::AttrRead).getObject()
}
}
class OptionsAugOr extends ProtocolRestriction, DataFlow::CfgNode {
ProtocolVersion restriction;
OptionsAugOr() {
exists(AugAssign aa, AttrNode attr, Expr flag |
aa.getOperation().getOp() instanceof BitOr and
aa.getTarget() = attr.getNode() and
attr.getName() = "options" and
attr.getObject() = node and
flag = API::moduleImport("ssl").getMember("OP_NO_" + restriction).getAUse().asExpr() and
(
aa.getValue() = flag
or
impliesBitSet(aa.getValue(), flag, false, false)
)
)
}
override DataFlow::Node getContext() { result = this }
override ProtocolVersion getRestriction() { result = restriction }
}
class OptionsAugAndNot extends ProtocolUnrestriction, DataFlow::CfgNode {
ProtocolVersion restriction;
OptionsAugAndNot() {
exists(AugAssign aa, AttrNode attr, Expr flag, UnaryExpr notFlag |
aa.getOperation().getOp() instanceof BitAnd and
aa.getTarget() = attr.getNode() and
attr.getName() = "options" and
attr.getObject() = node and
notFlag.getOp() instanceof Invert and
notFlag.getOperand() = flag and
flag = API::moduleImport("ssl").getMember("OP_NO_" + restriction).getAUse().asExpr() and
(
aa.getValue() = notFlag
or
impliesBitSet(aa.getValue(), notFlag, true, true)
)
)
}
override DataFlow::Node getContext() { result = this }
override ProtocolVersion getUnrestriction() { result = restriction }
}
/**
* Holds if
* for every bit, _b_:
* `wholeHasBitSet` represents that _b_ is set in `whole`
* implies
* `partHasBitSet` represents that _b_ is set in `part`
*
* As an example take `whole` = `part1 & part2`. Then
* `impliesBitSet(whole, part1, true, true)` holds
* because for any bit in `whole`, if that bit is set it must also be set in `part1`.
*
* Similarly for `whole` = `part1 | part2`. Here
* `impliesBitSet(whole, part1, false, false)` holds
* because for any bit in `whole`, if that bit is not set, it cannot be set in `part1`.
*/
predicate impliesBitSet(BinaryExpr whole, Expr part, boolean partHasBitSet, boolean wholeHasBitSet) {
whole.getOp() instanceof BitAnd and
(
wholeHasBitSet = true and partHasBitSet = true and part in [whole.getLeft(), whole.getRight()]
or
wholeHasBitSet = true and
impliesBitSet([whole.getLeft(), whole.getRight()], part, partHasBitSet, wholeHasBitSet)
)
or
whole.getOp() instanceof BitOr and
(
wholeHasBitSet = false and partHasBitSet = false and part in [whole.getLeft(), whole.getRight()]
or
wholeHasBitSet = false and
impliesBitSet([whole.getLeft(), whole.getRight()], part, partHasBitSet, wholeHasBitSet)
)
}
class ContextSetVersion extends ProtocolRestriction, ProtocolUnrestriction, DataFlow::CfgNode {
ProtocolVersion restriction;
ContextSetVersion() {
exists(DataFlow::AttrWrite aw |
aw.getObject().asCfgNode() = node and
aw.getAttributeName() = "minimum_version" and
aw.getValue() =
API::moduleImport("ssl").getMember("TLSVersion").getMember(restriction).getAUse()
)
}
override DataFlow::Node getContext() { result = this }
override ProtocolVersion getRestriction() { result.lessThan(restriction) }
override ProtocolVersion getUnrestriction() {
restriction = result or restriction.lessThan(result)
}
}
class UnspecificSSLContextCreation extends SSLContextCreation, UnspecificContextCreation {
UnspecificSSLContextCreation() { library instanceof Ssl }
override ProtocolVersion getUnrestriction() {
result = UnspecificContextCreation.super.getUnrestriction() and
// These are turned off by default since Python 3.6
// see https://docs.python.org/3.6/library/ssl.html#ssl.SSLContext
not result in ["SSLv2", "SSLv3"]
}
}
class UnspecificSSLDefaultContextCreation extends SSLDefaultContextCreation, ProtocolUnrestriction {
override DataFlow::Node getContext() { result = this }
// see https://docs.python.org/3/library/ssl.html#ssl.create_default_context
override ProtocolVersion getUnrestriction() {
result in ["TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
}
}
class Ssl extends TlsLibrary {
Ssl() { this = "ssl" }
override string specific_version_name(ProtocolVersion version) { result = "PROTOCOL_" + version }
override string unspecific_version_name(ProtocolFamily family) {
family = "SSLv23" and result = "PROTOCOL_" + family
or
family = "TLS" and result = "PROTOCOL_" + family + ["", "_CLIENT", "_SERVER"]
}
override API::Node version_constants() { result = API::moduleImport("ssl") }
override ContextCreation default_context_creation() {
result instanceof SSLDefaultContextCreation
}
override ContextCreation specific_context_creation() { result instanceof SSLContextCreation }
override DataFlow::CallCfgNode insecure_connection_creation(ProtocolVersion version) {
result = API::moduleImport("ssl").getMember("wrap_socket").getACall() and
this.specific_version(version).getAUse() = result.getArgByName("ssl_version") and
version.isInsecure()
}
override ConnectionCreation connection_creation() { result instanceof WrapSocketCall }
override ProtocolRestriction protocol_restriction() {
result instanceof OptionsAugOr
or
result instanceof ContextSetVersion
}
override ProtocolUnrestriction protocol_unrestriction() {
result instanceof OptionsAugAndNot
or
result instanceof ContextSetVersion
or
result instanceof UnspecificSSLContextCreation
or
result instanceof UnspecificSSLDefaultContextCreation
}
}

View File

@@ -0,0 +1,137 @@
private import python
private import semmle.python.ApiGraphs
import Ssl
import PyOpenSSL
/**
* A specific protocol version of SSL or TLS.
*/
class ProtocolVersion extends string {
ProtocolVersion() { this in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"] }
/** Gets a `ProtocolVersion` that is less than this `ProtocolVersion`, if any. */
predicate lessThan(ProtocolVersion version) {
this = "SSLv2" and version = "SSLv3"
or
this = "TLSv1" and version = ["TLSv1_1", "TLSv1_2", "TLSv1_3"]
or
this = ["TLSv1", "TLSv1_1"] and version = ["TLSv1_2", "TLSv1_3"]
or
this = ["TLSv1", "TLSv1_1", "TLSv1_2"] and version = "TLSv1_3"
}
/** Holds if this protocol version is known to be insecure. */
predicate isInsecure() { this in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1"] }
}
/** An unspecific protocol version */
class ProtocolFamily extends string {
ProtocolFamily() { this in ["SSLv23", "TLS"] }
}
/** The creation of a context. */
abstract class ContextCreation extends DataFlow::Node {
/** Gets the protocol version or family for this context. */
abstract string getProtocol();
}
/** The creation of a connection from a context. */
abstract class ConnectionCreation extends DataFlow::Node {
/** Gets the context used to create the connection. */
abstract DataFlow::Node getContext();
}
/** A context is being restricted on which protocols it can accepts. */
abstract class ProtocolRestriction extends DataFlow::Node {
/** Gets the context being restricted. */
abstract DataFlow::Node getContext();
/** Gets the protocol version being disallowed. */
abstract ProtocolVersion getRestriction();
}
/** A context is being relaxed on which protocols it can accepts. */
abstract class ProtocolUnrestriction extends DataFlow::Node {
/** Gets the context being relaxed. */
abstract DataFlow::Node getContext();
/** Gets the protocol version being allowed. */
abstract ProtocolVersion getUnrestriction();
}
/**
* A context is being created with a range of allowed protocols.
* This also serves as unrestricting these protocols.
*/
abstract class UnspecificContextCreation extends ContextCreation, ProtocolUnrestriction {
TlsLibrary library;
ProtocolFamily family;
UnspecificContextCreation() { this.getProtocol() = family }
override DataFlow::CfgNode getContext() { result = this }
override ProtocolVersion getUnrestriction() {
// There is only one family, the two names are aliases in OpenSSL.
// see https://github.com/openssl/openssl/blob/13888e797c5a3193e91d71e5f5a196a2d68d266f/include/openssl/ssl.h.in#L1953-L1955
family in ["SSLv23", "TLS"] and
// see https://docs.python.org/3/library/ssl.html#ssl-contexts
result in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
}
}
/** A model of a SSL/TLS library. */
abstract class TlsLibrary extends string {
bindingset[this]
TlsLibrary() { any() }
/** The name of a specific protocol version. */
abstract string specific_version_name(ProtocolVersion version);
/** Gets a name, which is a member of `version_constants`, that can be used to specify the protocol family `family`. */
abstract string unspecific_version_name(ProtocolFamily family);
/** Gets an API node representing the module or class holding the version constants. */
abstract API::Node version_constants();
/** Gets an API node representing a specific protocol version. */
API::Node specific_version(ProtocolVersion version) {
result = version_constants().getMember(specific_version_name(version))
}
/** Gets an API node representing the protocol family `family`. */
API::Node unspecific_version(ProtocolFamily family) {
result = version_constants().getMember(unspecific_version_name(family))
}
/** Gets a creation of a context with a default protocol. */
abstract ContextCreation default_context_creation();
/** Gets a creation of a context with a specific protocol. */
abstract ContextCreation specific_context_creation();
/** Gets a creation of a context with a specific protocol version, known to be insecure. */
ContextCreation insecure_context_creation(ProtocolVersion version) {
result in [specific_context_creation(), default_context_creation()] and
result.getProtocol() = version and
version.isInsecure()
}
/** Gets a context that was created using `family`, known to have insecure instances. */
ContextCreation unspecific_context_creation(ProtocolFamily family) {
result in [specific_context_creation(), default_context_creation()] and
result.getProtocol() = family
}
/** Gets a dataflow node representing a connection being created in an insecure manner, not from a context. */
abstract DataFlow::Node insecure_connection_creation(ProtocolVersion version);
/** Gets a dataflow node representing a connection being created from a context. */
abstract ConnectionCreation connection_creation();
/** Gets a dataflow node representing a context being restricted on which protocols it can accepts. */
abstract ProtocolRestriction protocol_restriction();
/** Gets a dataflow node representing a context being relaxed on which protocols it can accepts. */
abstract ProtocolUnrestriction protocol_unrestriction();
}

View File

@@ -0,0 +1,104 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Using a broken or weak cryptographic hash function can leave data
vulnerable, and should not be used in security related code.
</p>
<p>
A strong cryptographic hash function should be resistant to:
</p>
<ul>
<li>
pre-image attacks: if you know a hash value <code>h(x)</code>,
you should not be able to easily find the input <code>x</code>.
</li>
<li>
collision attacks: if you know a hash value <code>h(x)</code>,
you should not be able to easily find a different input <code>y</code>
with the same hash value <code>h(x) = h(y)</code>.
</li>
</ul>
<p>
In cases with a limited input space, such as for passwords, the hash
function also needs to be computationally expensive to be resistant to
brute-force attacks. Passwords should also have an unique salt applied
before hashing, but that is not considered by this query.
</p>
<p>
As an example, both MD5 and SHA-1 are known to be vulnerable to collision attacks.
</p>
<p>
Since it's OK to use a weak cryptographic hash function in a non-security
context, this query only alerts when these are used to hash sensitive
data (such as passwords, certificates, usernames).
</p>
<p>
Use of broken or weak cryptographic algorithms that are not hashing algorithms, is
handled by the <code>py/weak-cryptographic-algorithm</code> query.
</p>
</overview>
<recommendation>
<p>
Ensure that you use a strong, modern cryptographic hash function:
</p>
<ul>
<li>
such as Argon2, scrypt, bcrypt, or PBKDF2 for passwords and other data with limited input space.
</li>
<li>
such as SHA-2, or SHA-3 in other cases.
</li>
</ul>
</recommendation>
<example>
<p>
The following example shows two functions for checking whether the hash
of a certificate matches a known value -- to prevent tampering.
The first function uses MD5 that is known to be vulnerable to collision attacks.
The second function uses SHA-256 that is a strong cryptographic hashing function.
</p>
<sample src="examples/weak_certificate_hashing.py" />
</example>
<example>
<p>
The following example shows two functions for hashing passwords.
The first function uses SHA-256 to hash passwords. Although SHA-256 is a
strong cryptographic hash function, it is not suitable for password
hashing since it is not computationally expensive.
</p>
<sample src="examples/weak_password_hashing_bad.py" />
<p>
The second function uses Argon2 (through the <code>argon2-cffi</code>
PyPI package), which is a strong password hashing algorithm (and
includes a per-password salt by default).
</p>
<sample src="examples/weak_password_hashing_good.py" />
</example>
<references>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html">Password Storage Cheat Sheet</a></li>
</references>
</qhelp>

View File

@@ -0,0 +1,48 @@
/**
* @name Use of a broken or weak cryptographic hashing algorithm on sensitive data
* @description Using broken or weak cryptographic hashing algorithms can compromise security.
* @kind path-problem
* @problem.severity warning
* @security-severity 5.9
* @precision high
* @id py/weak-sensitive-data-hashing
* @tags security
* external/cwe/cwe-327
* external/cwe/cwe-916
*/
import python
import semmle.python.security.dataflow.WeakSensitiveDataHashing
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import DataFlow::PathGraph
from
DataFlow::PathNode source, DataFlow::PathNode sink, string ending, string algorithmName,
string classification
where
exists(NormalHashFunction::Configuration config |
config.hasFlowPath(source, sink) and
algorithmName = sink.getNode().(NormalHashFunction::Sink).getAlgorithmName() and
classification = source.getNode().(NormalHashFunction::Source).getClassification() and
ending = "."
)
or
exists(ComputationallyExpensiveHashFunction::Configuration config |
config.hasFlowPath(source, sink) and
algorithmName = sink.getNode().(ComputationallyExpensiveHashFunction::Sink).getAlgorithmName() and
classification =
source.getNode().(ComputationallyExpensiveHashFunction::Source).getClassification() and
(
sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
ending = "."
or
not sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
ending =
" for " + classification +
" hashing, since it is not a computationally expensive hash function."
)
)
select sink.getNode(), source, sink,
"$@ is used in a hashing algorithm (" + algorithmName + ") that is insecure" + ending,
source.getNode(), "Sensitive data (" + classification + ")"

View File

@@ -0,0 +1,9 @@
import hashlib
def certificate_matches_known_hash_bad(certificate, known_hash):
hash = hashlib.md5(certificate).hexdigest() # BAD
return hash == known_hash
def certificate_matches_known_hash_good(certificate, known_hash):
hash = hashlib.sha256(certificate).hexdigest() # GOOD
return hash == known_hash

View File

@@ -0,0 +1,4 @@
import hashlib
def get_password_hash(password: str, salt: str):
return hashlib.sha256(password + salt).hexdigest() # BAD

View File

@@ -0,0 +1,9 @@
from argon2 import PasswordHasher
def get_initial_hash(password: str):
ph = PasswordHasher()
return ph.hash(password) # GOOD
def check_password(password: str, known_hash):
ph = PasswordHasher()
return ph.verify(known_hash, password) # GOOD

View File

@@ -4,6 +4,7 @@
* @kind problem
* @id py/insecure-temporary-file
* @problem.severity error
* @security-severity 5.9
* @sub-severity high
* @precision high
* @tags external/cwe/cwe-377

View File

@@ -4,6 +4,7 @@
* @kind path-problem
* @id py/unsafe-deserialization
* @problem.severity error
* @security-severity 5.9
* @sub-severity high
* @precision high
* @tags external/cwe/cwe-502

View File

@@ -4,6 +4,7 @@
* may cause redirection to malicious web sites.
* @kind path-problem
* @problem.severity error
* @security-severity 2.7
* @sub-severity low
* @id py/url-redirection
* @tags security

View File

@@ -4,6 +4,7 @@
* @kind problem
* @id py/overly-permissive-file
* @problem.severity warning
* @security-severity 5.9
* @sub-severity high
* @precision medium
* @tags external/cwe/cwe-732

View File

@@ -3,6 +3,7 @@
* @description Credentials are hard coded in the source code of the application.
* @kind path-problem
* @problem.severity error
* @security-severity 5.9
* @precision medium
* @id py/hardcoded-credentials
* @tags security

View File

@@ -5,6 +5,7 @@
* @tags security
* correctness
* @problem.severity error
* @security-severity 4.2
* @sub-severity high
* @precision low
* @id py/use-of-exec

View File

@@ -0,0 +1,13 @@
/**
* @name Total lines of Python code in the database
* @description The total number of lines of Python code across all files, including
* external libraries and auto-generated files. This is a useful metric of the size of a
* database. This query counts the lines of code, excluding whitespace or comments.
* @kind metric
* @tags summary
* @id py/summary/lines-of-code
*/
import python
select sum(Module m | | m.getMetrics().getNumberOfLinesOfCode())

View File

@@ -0,0 +1,22 @@
/**
* @name Total lines of user written Python code in the database
* @description The total number of lines of Python code from the source code directory,
* excluding auto-generated files. This query counts the lines of code, excluding
* whitespace or comments. Note: If external libraries are included in the codebase
* either in a checked-in virtual environment or as vendored code, that will currently
* be counted as user written code.
* @kind metric
* @tags summary
* lines-of-code
* @id py/summary/lines-of-user-code
*/
import python
import semmle.python.filters.GeneratedCode
select sum(Module m |
exists(m.getFile().getRelativePath()) and
not m.getFile() instanceof GeneratedFile
|
m.getMetrics().getNumberOfLinesOfCode()
)

View File

@@ -26,9 +26,8 @@ variable should be renamed to make the code easier to interpret.</p>
</example>
<references>
<li>J. Lusth, <i>The Art and Craft of Programming - Python Edition</i>, Section: Scope. University of Alabama, 2012. (<a href="http://troll.cs.ua.edu/ACP-PY/index_13.html">Published online</a>).</li>
<li>New Mexico Tech Computer Center: <a href="http://infohost.nmt.edu/tcc/help/pubs/python/web/global-statement.html">The global
statement: Declare access to a global name</a>.</li>
<li>J. Lusth, <i>The Art and Craft of Programming - Python Edition</i>, Section: Scope. University of Alabama, 2012. (<a href="https://web.archive.org/web/20190919091129/http://troll.cs.ua.edu/ACP-PY/index_13.html">Published online</a>).</li>
<li>Python Language Reference: <a href="http://docs.python.org/reference/simple_stmts.html#the-global-statement">The global statement</a>.</li>

View File

@@ -0,0 +1,30 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>A class name that begins with a lowercase letter does not follow standard
naming conventions. This decreases code readability. For example, <code>class background</code>.
</p>
</overview>
<recommendation>
<p>
Write the class name beginning with an uppercase letter. For example, <code>class Background</code>.
</p>
</recommendation>
<references>
<li>
Guido van Rossum, Barry Warsaw, Nick Coghlan <em>PEP 8 -- Style Guide for Python Code</em>
<a href="https://www.python.org/dev/peps/pep-0008/#class-names">Python Class Names</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,28 @@
/**
* @name Misnamed class
* @description A class name that begins with a lowercase letter decreases readability.
* @kind problem
* @problem.severity recommendation
* @id py/misnamed-class
* @tags maintainability
*/
import python
predicate lower_case_class(Class c) {
exists(string first_char |
first_char = c.getName().prefix(1) and
not first_char = first_char.toUpperCase()
)
}
from Class c
where
c.inSource() and
lower_case_class(c) and
not exists(Class c1 |
c1 != c and
c1.getLocation().getFile() = c.getLocation().getFile() and
lower_case_class(c1)
)
select c, "Class names should start in uppercase."

View File

@@ -0,0 +1,30 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>A function name that begins with an uppercase letter does not follow standard
naming conventions. This decreases code readability. For example, <code>Jump</code>.
</p>
</overview>
<recommendation>
<p>
Write the function name beginning with an lowercase letter. For example, <code>jump</code>.
</p>
</recommendation>
<references>
<li>
Guido van Rossum, Barry Warsaw, Nick Coghlan <em>PEP 8 -- Style Guide for Python Code</em>
<a href="https://www.python.org/dev/peps/pep-0008/#function-and-variable-names">Python Function and Variable Names</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,28 @@
/**
* @name Misnamed function
* @description A function name that begins with an uppercase letter decreases readability.
* @kind problem
* @problem.severity recommendation
* @id py/misnamed-function
* @tags maintainability
*/
import python
predicate upper_case_function(Function func) {
exists(string first_char |
first_char = func.getName().prefix(1) and
not first_char = first_char.toLowerCase()
)
}
from Function func
where
func.inSource() and
upper_case_function(func) and
not exists(Function func1 |
func1 != func and
func1.getLocation().getFile() = func.getLocation().getFile() and
upper_case_function(func1)
)
select func, "Function names should start in lowercase."

View File

@@ -3,6 +3,8 @@
* @description Binding a socket to all interfaces opens it up to traffic from any IPv4 address
* and is therefore associated with security risks.
* @kind problem
* @id py/old/bind-socket-all-network-interfaces
* @problem.severity error
*/
import python

View File

@@ -2,6 +2,8 @@
* @name OLD QUERY: Uncontrolled data used in path expression
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
* @kind path-problem
* @problem.severity error
* @id py/old/path-injection
*/
import python

View File

@@ -3,6 +3,8 @@
* @description Using externally controlled strings in a command line may allow a malicious
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @id py/old/command-line-injection
*/
import python

View File

@@ -3,6 +3,8 @@
* @description Writing user input directly to a web page
* allows for a cross-site scripting vulnerability.
* @kind path-problem
* @problem.severity error
* @id py/old/reflective-xss
*/
import python

View File

@@ -3,6 +3,8 @@
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @id py/old/sql-injection
*/
import python

View File

@@ -1,8 +1,10 @@
/**
* @name Code injection
* @description Interpreting unsanitized user input as code allows a malicious user arbitrary
* @description OLD QUERY: Interpreting unsanitized user input as code allows a malicious user arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
* @id py/old/code-injection
*/
import python

View File

@@ -1,12 +1,9 @@
/**
* @name Use of weak cryptographic key
* @name OLD QUERY: Use of weak cryptographic key
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
* @kind problem
* @problem.severity error
* @precision high
* @id py/weak-crypto-key
* @tags security
* external/cwe/cwe-326
* @id py/old/weak-crypto-key
*/
import python

View File

@@ -0,0 +1,28 @@
/**
* @name OLD QUERY: Use of a broken or weak cryptographic algorithm
* @description Using broken or weak cryptographic algorithms can compromise security.
* @kind path-problem
* @problem.severity warning
* @id py/old/weak-cryptographic-algorithm
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.security.SensitiveData
import semmle.python.security.Crypto
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof SensitiveDataSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
}
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
src.getSource(), "Sensitive data"

View File

@@ -2,6 +2,8 @@
* @name OLD QUERY: Deserializing untrusted input
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
* @kind path-problem
* @id py/old/unsafe-deserialization
* @problem.severity error
*/
import python

View File

@@ -3,6 +3,8 @@
* @description URL redirection based on unvalidated user input
* may cause redirection to malicious web sites.
* @kind path-problem
* @problem.severity error
* @id py/old/url-redirection
*/
import python

View File

@@ -0,0 +1,28 @@
from django.conf.urls import url
from clickhouse_driver import Client
from clickhouse_driver import connect
from aioch import Client as aiochClient
class MyClient(Client):
def dummy(self):
return None
def show_user(request, username):
# BAD -- Untrusted user input is directly injected into the sql query using async library 'aioch'
aclient = aiochClient("localhost")
progress = await aclient.execute_with_progress("SELECT * FROM users WHERE username = '%s'" % username)
# BAD -- Untrusted user input is directly injected into the sql query using native client of library 'clickhouse_driver'
Client('localhost').execute("SELECT * FROM users WHERE username = '%s'" % username)
# GOOD -- query uses prepared statements
query = "SELECT * FROM users WHERE username = %(username)s"
Client('localhost').execute(query, {"username": username})
# BAD -- PEP249 interface
conn = connect('clickhouse://localhost')
cursor = conn.cursor()
cursor.execute("SELECT * FROM users WHERE username = '%s'" % username)
urlpatterns = [url(r'^users/(?P<username>[^/]+)$', show_user)]

View File

@@ -0,0 +1,59 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
If a database query (such as a SQL or NoSQL query) is built from
user-provided data without sufficient sanitization, a user
may be able to run malicious database queries.
</p>
</overview>
<recommendation>
<p>
Most database connector libraries offer a way of safely
embedding untrusted data into a query by means of query parameters
or prepared statements.
</p>
</recommendation>
<example>
<p>
In the following snippet, a user is fetched from a <code>ClickHouse</code> database
using five different queries. In the "BAD" cases the query is built directly from user-controlled data.
In the "GOOD" case the user-controlled data is safely embedded into the query by using query parameters.
</p>
<p>
In the first case, the query executed via aioch Client. aioch - is a module
for asynchronous queries to database.
</p>
<p>
In the second and third cases, the connection is established via `Client` class.
This class implement different method to execute a query.
</p>
<p>
In the forth case, good pattern is presented. Query parameters are send through
second dict-like argument.
</p>
<p>
In the fifth case, there is example of PEP249 interface usage.
</p>
<p>
In the sixth case, there is custom Class usge which is a subclass of default Client.
</p>
<sample src="ClickHouseSQLInjection.py" />
</example>
<references>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/SQL_injection">SQL injection</a>.</li>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html">SQL Injection Prevention Cheat Sheet</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,22 @@
/**
* @id py/yandex/clickhouse-sql-injection
* @name Clickhouse SQL query built from user-controlled sources
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @tags security
* external/cwe/cwe-089
* external/owasp/owasp-a1
*/
import python
import experimental.semmle.python.frameworks.ClickHouseDriver
import semmle.python.security.dataflow.SqlInjection
import DataFlow::PathGraph
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -0,0 +1,50 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>If an LDAP query or DN is built using string concatenation or string formatting, and the
components of the concatenation include user input without any proper sanitization, a user
is likely to be able to run malicious LDAP queries.</p>
</overview>
<recommendation>
<p>If user input must be included in an LDAP query or DN, it should be escaped to
avoid a malicious user providing special characters that change the meaning
of the query. In Python2, user input should be escaped with <code>ldap.dn.escape_dn_chars</code>
or <code>ldap.filter.escape_filter_chars</code>, while in Python3, user input should be escaped with
<code>ldap3.utils.dn.escape_rdn</code> or <code>ldap3.utils.conv.escape_filter_chars</code>
depending on the component tainted by the user. A good practice is to escape filter characters
that could change the meaning of the query (https://tools.ietf.org/search/rfc4515#section-3).</p>
</recommendation>
<example>
<p>In the following examples, the code accepts both <code>username</code> and <code>dc</code> from the user,
which it then uses to build a LDAP query and DN.</p>
<p>The first and the second example uses the unsanitized user input directly
in the search filter and DN for the LDAP query.
A malicious user could provide special characters to change the meaning of these
components, and search for a completely different set of values.</p>
<sample src="examples/example_bad1.py" />
<sample src="examples/example_bad2.py" />
<p>In the third and four example, the input provided by the user is sanitized before it is included in the search filter or DN.
This ensures the meaning of the query cannot be changed by a malicious user.</p>
<sample src="examples/example_good1.py" />
<sample src="examples/example_good2.py" />
</example>
<references>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/LDAP_Injection_Prevention_Cheat_Sheet.html">LDAP Injection Prevention Cheat Sheet</a>.</li>
<li>OWASP: <a href="https://owasp.org/www-community/attacks/LDAP_Injection">LDAP Injection</a>.</li>
<li>SonarSource: <a href="https://rules.sonarsource.com/python/RSPEC-2078">RSPEC-2078</a>.</li>
<li>Python2: <a href="https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html">LDAP Documentation</a>.</li>
<li>Python3: <a href="https://ldap3.readthedocs.io/en/latest/">LDAP Documentation</a>.</li>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/LDAP_injection">LDAP injection</a>.</li>
<li>BlackHat: <a href="https://www.blackhat.com/presentations/bh-europe-08/Alonso-Parada/Whitepaper/bh-eu-08-alonso-parada-WP.pdf">LDAP Injection and Blind LDAP Injection</a>.</li>
<li>LDAP: <a href="https://ldap.com/2018/05/04/understanding-and-defending-against-ldap-injection-attacks/">Understanding and Defending Against LDAP Injection Attacks</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name LDAP query built from user-controlled sources
* @description Building an LDAP query from user-controlled sources is vulnerable to insertion of
* malicious LDAP code by the user.
* @kind path-problem
* @problem.severity error
* @id py/ldap-injection
* @tags experimental
* security
* external/cwe/cwe-090
*/
// Determine precision above
import python
import experimental.semmle.python.security.injection.LDAP
import DataFlow::PathGraph
from LDAPInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ LDAP query parameter comes from $@.", sink.getNode(),
"This", source.getNode(), "a user-provided value"

View File

@@ -0,0 +1,15 @@
from flask import request, Flask
import ldap
@app.route("/normal")
def normal():
unsafe_dc = request.args['dc']
unsafe_filter = request.args['username']
dn = "dc={}".format(unsafe_dc)
search_filter = "(user={})".format(unsafe_filter)
ldap_connection = ldap.initialize("ldap://127.0.0.1")
user = ldap_connection.search_s(
dn, ldap.SCOPE_SUBTREE, search_filter)

View File

@@ -0,0 +1,15 @@
from flask import request, Flask
import ldap3
@app.route("/normal")
def normal():
unsafe_dc = request.args['dc']
unsafe_filter = request.args['username']
dn = "dc={}".format(unsafe_dc)
search_filter = "(user={})".format(unsafe_filter)
srv = ldap3.Server('ldap://127.0.0.1')
conn = ldap3.Connection(srv, user=dn, auto_bind=True)
conn.search(dn, search_filter)

View File

@@ -0,0 +1,20 @@
from flask import request, Flask
import ldap
import ldap.filter
import ldap.dn
@app.route("/normal")
def normal():
unsafe_dc = request.args['dc']
unsafe_filter = request.args['username']
safe_dc = ldap.dn.escape_dn_chars(unsafe_dc)
safe_filter = ldap.filter.escape_filter_chars(unsafe_filter)
dn = "dc={}".format(safe_dc)
search_filter = "(user={})".format(safe_filter)
ldap_connection = ldap.initialize("ldap://127.0.0.1")
user = ldap_connection.search_s(
dn, ldap.SCOPE_SUBTREE, search_filter)

View File

@@ -0,0 +1,20 @@
from flask import request, Flask
import ldap3
from ldap3.utils.dn import escape_rdn
from ldap3.utils.conv import escape_filter_chars
@app.route("/normal")
def normal():
unsafe_dc = request.args['dc']
unsafe_filter = request.args['username']
safe_dc = escape_rdn(unsafe_dc)
safe_filter = escape_filter_chars(unsafe_filter)
dn = "dc={}".format(safe_dc)
search_filter = "(user={})".format(safe_filter)
srv = ldap3.Server('ldap://127.0.0.1')
conn = ldap3.Connection(srv, user=dn, auto_bind=True)
conn.search(dn, search_filter)

View File

@@ -0,0 +1,31 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>If an LDAP query doesn't carry any kind of authentication, anonymous binds causes an empty or None-set password
to result in a successful authentication.</p>
</overview>
<recommendation>
<p>Use a non-empty password while establishing an LDAP connection.</p>
</recommendation>
<example>
<p>In the following examples, the code builds a LDAP query whose execution carries no authentication or binds anonymously.</p>
<sample src="examples/auth_bad_2.py" />
<sample src="examples/auth_bad_3.py" />
<p>In the third and fourth examples, the authentication is established using a password.</p>
<sample src="examples/auth_good_2.py" />
<sample src="examples/auth_good_3.py" />
</example>
<references>
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-4433">RSPEC-4433</a>.</li>
<li>Python2: <a href="https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html">LDAP Documentation</a>.</li>
<li>Python3: <a href="https://ldap3.readthedocs.io/en/latest/">LDAP Documentation</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,31 @@
/**
* @name Improper LDAP Authentication
* @description A user-controlled query carries no authentication
* @kind problem
* @problem.severity warning
* @id py/improper-ldap-auth
* @tags experimental
* security
* external/cwe/cwe-287
*/
// Determine precision above
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
predicate authenticatesImproperly(LDAPBind ldapBind) {
(
DataFlow::localFlow(DataFlow::exprNode(any(None noneName)), ldapBind.getPassword()) or
not exists(ldapBind.getPassword())
)
or
exists(StrConst emptyString |
emptyString.getText() = "" and
DataFlow::localFlow(DataFlow::exprNode(emptyString), ldapBind.getPassword())
)
}
from LDAPBind ldapBind
where authenticatesImproperly(ldapBind)
select ldapBind, "The following LDAP bind operation is executed without authentication"

View File

@@ -0,0 +1,14 @@
from flask import request, Flask
import ldap
import ldap.filter
import ldap.dn
@app.route("/bind_example")
def bind_example():
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind('cn=root', "")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)

View File

@@ -0,0 +1,13 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
from ldap3.utils.dn import escape_rdn
from ldap3.utils.conv import escape_filter_chars
@app.route("/passwordNone")
def passwordNone():
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, user='user_dn', password=None)
status, result, response, _ = conn.search(dn, search_filter)

View File

@@ -0,0 +1,14 @@
from flask import request, Flask
import ldap
import ldap.filter
import ldap.dn
@app.route("/bind_example")
def bind_example():
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind('cn=root', "SecurePa$$!")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)

View File

@@ -0,0 +1,14 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
from ldap3.utils.dn import escape_rdn
from ldap3.utils.conv import escape_filter_chars
@app.route("/passwordFromEnv")
def passwordFromEnv():
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, user='user_dn',
password="SecurePa$$!")
status, result, response, _ = conn.search(dn, search_filter)

View File

@@ -0,0 +1,45 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
be able to modify the meaning of the expression. In particular, such a user may be able to provide
a regular expression fragment that takes exponential time in the worst case, and use that to
perform a Denial of Service attack.
</p>
</overview>
<recommendation>
<p>
Before embedding user input into a regular expression, use a sanitization function such as
<code>re.escape</code> to escape meta-characters that have a special meaning regarding
regular expressions' syntax.
</p>
</recommendation>
<example>
<p>
The following examples are based on a simple Flask web server environment.
</p>
<p>
The following example shows a HTTP request parameter that is used to construct a regular expression
without sanitizing it first:
</p>
<sample src="re_bad.py" />
<p>
Instead, the request parameter should be sanitized first, for example using the function
<code>re.escape</code>. This ensures that the user cannot insert characters which have a
special meaning in regular expressions.
</p>
<sample src="re_good.py" />
</example>
<references>
<li>OWASP: <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.</li>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
<li>Python docs: <a href="https://docs.python.org/3/library/re.html">re</a>.</li>
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2631">RSPEC-2631</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,29 @@
/**
* @name Regular expression injection
* @description User input should not be used in regular expressions without first being escaped,
* otherwise a malicious user may be able to inject an expression that could require
* exponential time on certain inputs.
* @kind path-problem
* @problem.severity error
* @id py/regex-injection
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
// determine precision above
import python
import experimental.semmle.python.security.injection.RegexInjection
import DataFlow::PathGraph
from
RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
RegexInjectionSink regexInjectionSink, Attribute methodAttribute
where
config.hasFlowPath(source, sink) and
regexInjectionSink = sink.getNode() and
methodAttribute = regexInjectionSink.getRegexMethod()
select sink.getNode(), source, sink,
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
source.getNode(), "user-provided value", methodAttribute,
regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()

View File

@@ -0,0 +1,15 @@
from flask import request, Flask
import re
@app.route("/direct")
def direct():
unsafe_pattern = request.args["pattern"]
re.search(unsafe_pattern, "")
@app.route("/compile")
def compile():
unsafe_pattern = request.args["pattern"]
compiled_pattern = re.compile(unsafe_pattern)
compiled_pattern.search("")

View File

@@ -0,0 +1,17 @@
from flask import request, Flask
import re
@app.route("/direct")
def direct():
unsafe_pattern = request.args['pattern']
safe_pattern = re.escape(unsafe_pattern)
re.search(safe_pattern, "")
@app.route("/compile")
def compile():
unsafe_pattern = request.args['pattern']
safe_pattern = re.escape(unsafe_pattern)
compiled_pattern = re.compile(safe_pattern)
compiled_pattern.search("")

View File

@@ -0,0 +1,178 @@
/**
* This version resides in the experimental area and provides a space for
* external contributors to place new concepts, keeping to our preferred
* structure while remaining in the experimental area.
*
* Provides abstract classes representing generic concepts such as file system
* access or system command execution, for which individual framework libraries
* provide concrete subclasses.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/** Provides classes for modeling Regular Expression-related APIs. */
module RegexExecution {
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the executed expression.
*/
abstract DataFlow::Node getRegexNode();
/**
* Gets the library used to execute the regular expression.
*/
abstract string getRegexModule();
}
}
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
RegexExecution() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
string getRegexModule() { result = range.getRegexModule() }
}
/** Provides classes for modeling Regular Expression escape-related APIs. */
module RegexEscape {
/**
* A data-flow node that escapes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexEscape` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the escaped expression.
*/
abstract DataFlow::Node getRegexNode();
}
}
/**
* A data-flow node that escapes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexEscape::Range` instead.
*/
class RegexEscape extends DataFlow::Node {
RegexEscape::Range range;
RegexEscape() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
}
/** Provides classes for modeling LDAP query execution-related APIs. */
module LDAPQuery {
/**
* A data-flow node that collects methods executing a LDAP query.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LDAPQuery` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the executed expression.
*/
abstract DataFlow::Node getQuery();
}
}
/**
* A data-flow node that collect methods executing a LDAP query.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPQuery::Range` instead.
*/
class LDAPQuery extends DataFlow::Node {
LDAPQuery::Range range;
LDAPQuery() { this = range }
/**
* Gets the argument containing the executed expression.
*/
DataFlow::Node getQuery() { result = range.getQuery() }
}
/** Provides classes for modeling LDAP components escape-related APIs. */
module LDAPEscape {
/**
* A data-flow node that collects functions escaping LDAP components.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LDAPEscape` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the escaped expression.
*/
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that collects functions escaping LDAP components.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPEscape::Range` instead.
*/
class LDAPEscape extends DataFlow::Node {
LDAPEscape::Range range;
LDAPEscape() { this = range }
/**
* Gets the argument containing the escaped expression.
*/
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides classes for modeling LDAP bind-related APIs. */
module LDAPBind {
/**
* A data-flow node that collects methods binding a LDAP connection.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LDAPBind` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the binding expression.
*/
abstract DataFlow::Node getPassword();
}
}
/**
* A data-flow node that collects methods binding a LDAP connection.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPBind::Range` instead.
*/
class LDAPBind extends DataFlow::Node {
LDAPBind::Range range;
LDAPBind() { this = range }
DataFlow::Node getPassword() { result = range.getPassword() }
}

View File

@@ -0,0 +1,6 @@
/**
* Helper file that imports all framework modeling.
*/
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.LDAP

View File

@@ -0,0 +1,85 @@
/**
* Provides classes modeling security-relevant aspects of `clickhouse-driver` and `aioch` PyPI packages.
* See
* - https://pypi.org/project/clickhouse-driver/
* - https://pypi.org/project/aioch/
* - https://clickhouse-driver.readthedocs.io/en/latest/
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
/**
* Provides models for `clickhouse-driver` and `aioch` PyPI packages.
* See
* - https://pypi.org/project/clickhouse-driver/
* - https://pypi.org/project/aioch/
* - https://clickhouse-driver.readthedocs.io/en/latest/
*/
module ClickHouseDriver {
/** Gets a reference to the `clickhouse_driver` module. */
API::Node clickhouse_driver() { result = API::moduleImport("clickhouse_driver") }
/** Gets a reference to the `aioch` module. This module allows to make async db queries. */
API::Node aioch() { result = API::moduleImport("aioch") }
/**
* `clickhouse_driver` implements PEP249,
* providing ways to execute SQL statements against a database.
*/
class ClickHouseDriverPEP249 extends PEP249ModuleApiNode {
ClickHouseDriverPEP249() { this = clickhouse_driver() }
}
module Client {
/** Gets a reference to a Client call. */
private DataFlow::Node client_ref() {
result = clickhouse_driver().getMember("Client").getASubclass*().getAUse()
or
result = aioch().getMember("Client").getASubclass*().getAUse()
}
/** A direct instantiation of `clickhouse_driver.Client`. */
private class ClientInstantiation extends DataFlow::CallCfgNode {
ClientInstantiation() { this.getFunction() = client_ref() }
}
/** Gets a reference to an instance of `clickhouse_driver.Client`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof ClientInstantiation
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `clickhouse_driver.Client`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/** clickhouse_driver.Client execute methods */
private string execute_function() {
result in ["execute_with_progress", "execute", "execute_iter"]
}
/** Gets a reference to the `clickhouse_driver.Client.execute` method */
private DataFlow::LocalSourceNode clickhouse_execute(DataFlow::TypeTracker t) {
t.startInAttr(execute_function()) and
result = Client::instance()
or
exists(DataFlow::TypeTracker t2 | result = clickhouse_execute(t2).track(t2, t))
}
/** Gets a reference to the `clickhouse_driver.Client.execute` method */
DataFlow::Node clickhouse_execute() {
clickhouse_execute(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** A call to the `clickhouse_driver.Client.execute` method */
private class ExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ExecuteCall() { this.getFunction() = clickhouse_execute() }
override DataFlow::Node getSql() { result.asCfgNode() = node.getArg(0) }
}
}

View File

@@ -0,0 +1,195 @@
/**
* Provides classes modeling security-relevant aspects of the LDAP libraries.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for Python's ldap-related libraries.
*/
private module LDAP {
/**
* Provides models for the `python-ldap` PyPI package (imported as `ldap`).
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/index.html
*/
private module LDAP2 {
/** Gets a reference to the `ldap` module. */
API::Node ldap() { result = API::moduleImport("ldap") }
/** Returns a `ldap` module instance */
API::Node ldapInitialize() { result = ldap().getMember("initialize") }
/** Gets a reference to a `ldap` operation. */
private DataFlow::LocalSourceNode ldapOperation(DataFlow::TypeTracker t) {
t.start() and
result.(DataFlow::AttrRead).getObject().getALocalSource() = ldapInitialize().getACall()
or
exists(DataFlow::TypeTracker t2 | result = ldapOperation(t2).track(t2, t))
}
/**
* List of `ldap` methods used to execute a query.
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
*/
private class LDAP2QueryMethods extends string {
LDAP2QueryMethods() {
this in ["search", "search_s", "search_st", "search_ext", "search_ext_s"]
}
}
/** Gets a reference to a `ldap` operation. */
private DataFlow::Node ldapOperation() {
ldapOperation(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** Gets a reference to a `ldap` query. */
private DataFlow::Node ldapQuery() {
result = ldapOperation() and
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2QueryMethods
}
/**
* A class to find `ldap` methods executing a query.
*
* See `LDAP2QueryMethods`
*/
private class LDAP2Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
LDAP2Query() { this.getFunction() = ldapQuery() }
override DataFlow::Node getQuery() {
result in [this.getArg(0), this.getArg(2), this.getArgByName("filterstr")]
}
}
/**
* List of `ldap` methods used for binding.
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
*/
private class LDAP2BindMethods extends string {
LDAP2BindMethods() {
this in [
"bind", "bind_s", "simple_bind", "simple_bind_s", "sasl_interactive_bind_s",
"sasl_non_interactive_bind_s", "sasl_external_bind_s", "sasl_gssapi_bind_s"
]
}
}
/** Gets a reference to a `ldap` bind. */
private DataFlow::Node ldapBind() {
result = ldapOperation() and
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2BindMethods
}
/**
* A class to find `ldap` methods binding a connection.
*
* See `LDAP2BindMethods`
*/
private class LDAP2Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
LDAP2Bind() { this.getFunction() = ldapBind() }
override DataFlow::Node getPassword() {
result in [this.getArg(1), this.getArgByName("cred")]
}
}
/**
* A class to find calls to `ldap.dn.escape_dn_chars`.
*
* See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
*/
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP2EscapeDNCall() { this = ldap().getMember("dn").getMember("escape_dn_chars").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* A class to find calls to `ldap.filter.escape_filter_chars`.
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap-filter.html#ldap.filter.escape_filter_chars
*/
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP2EscapeFilterCall() {
this = ldap().getMember("filter").getMember("escape_filter_chars").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}
/**
* Provides models for the `ldap3` PyPI package
*
* See https://pypi.org/project/ldap3/
*/
private module LDAP3 {
/** Gets a reference to the `ldap3` module. */
API::Node ldap3() { result = API::moduleImport("ldap3") }
/** Gets a reference to the `ldap3` `utils` module. */
API::Node ldap3Utils() { result = ldap3().getMember("utils") }
/** Returns a `ldap3` module `Server` instance */
API::Node ldap3Server() { result = ldap3().getMember("Server") }
/** Returns a `ldap3` module `Connection` instance */
API::Node ldap3Connection() { result = ldap3().getMember("Connection") }
/**
* A class to find `ldap3` methods executing a query.
*/
private class LDAP3Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
LDAP3Query() {
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
ldap3Connection().getACall() and
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "search"
}
override DataFlow::Node getQuery() { result in [this.getArg(0), this.getArg(1)] }
}
/**
* A class to find `ldap3` methods binding a connection.
*/
class LDAP3Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
LDAP3Bind() { this = ldap3Connection().getACall() }
override DataFlow::Node getPassword() {
result in [this.getArg(2), this.getArgByName("password")]
}
}
/**
* A class to find calls to `ldap3.utils.dn.escape_rdn`.
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
*/
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP3EscapeDNCall() { this = ldap3Utils().getMember("dn").getMember("escape_rdn").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* A class to find calls to `ldap3.utils.conv.escape_filter_chars`.
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/conv.py#L91
*/
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP3EscapeFilterCall() {
this = ldap3Utils().getMember("conv").getMember("escape_filter_chars").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}
}

View File

@@ -0,0 +1,100 @@
/**
* Provides classes modeling security-relevant aspects of the standard libraries.
* Note: some modeling is done internally in the dataflow/taint tracking implementation.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for Python's `re` library.
*
* See https://docs.python.org/3/library/re.html
*/
private module Re {
/**
* List of `re` methods immediately executing an expression.
*
* See https://docs.python.org/3/library/re.html#module-contents
*/
private class RegexExecutionMethods extends string {
RegexExecutionMethods() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
}
/**
* A class to find `re` methods immediately executing an expression.
*
* See `RegexExecutionMethods`
*/
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
DataFlow::Node regexNode;
DirectRegex() {
this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and
regexNode = this.getArg(0)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override string getRegexModule() { result = "re" }
}
/**
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
DataFlow::Node regexNode;
CompiledRegex() {
exists(DataFlow::CallCfgNode patternCall, DataFlow::AttrRead reMethod |
this.getFunction() = reMethod and
patternCall = API::moduleImport("re").getMember("compile").getACall() and
patternCall.flowsTo(reMethod.getObject()) and
reMethod.getAttributeName() instanceof RegexExecutionMethods and
regexNode = patternCall.getArg(0)
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override string getRegexModule() { result = "re" }
}
/**
* A class to find `re` methods escaping an expression.
*
* See https://docs.python.org/3/library/re.html#re.escape
*/
class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range {
DataFlow::Node regexNode;
ReEscape() {
this = API::moduleImport("re").getMember("escape").getACall() and
regexNode = this.getArg(0)
}
override DataFlow::Node getRegexNode() { result = regexNode }
}
}

Some files were not shown because too many files have changed in this diff Show More