mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
Merge branch 'main' into jorgectf/python/deserialization
This commit is contained in:
@@ -1,20 +0,0 @@
|
||||
/**
|
||||
* Contains customizations to the standard library.
|
||||
*
|
||||
* This module is imported by `python.qll`, so any customizations defined here automatically
|
||||
* apply to all queries.
|
||||
*
|
||||
* Typical examples of customizations include adding new subclasses of abstract classes such as
|
||||
* the `RemoteFlowSource::Range` and `AdditionalTaintStep` classes associated with the security
|
||||
* queries to model frameworks that are not covered by the standard library.
|
||||
*/
|
||||
|
||||
import python
|
||||
/* General import that is useful */
|
||||
// import semmle.python.dataflow.new.DataFlow
|
||||
//
|
||||
/* for extending `TaintTracking::AdditionalTaintStep` */
|
||||
// import semmle.python.dataflow.new.TaintTracking
|
||||
//
|
||||
/* for extending `RemoteFlowSource::Range` */
|
||||
// import semmle.python.dataflow.new.RemoteFlowSources
|
||||
@@ -4,18 +4,22 @@
|
||||
* @kind problem
|
||||
* @tags security
|
||||
* correctness
|
||||
* security/cwe/cwe-94
|
||||
* security/cwe/cwe-95
|
||||
* @problem.severity error
|
||||
* @security-severity 9.8
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/use-of-input
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.ApiGraphs
|
||||
|
||||
from CallNode call, Context context, ControlFlowNode func
|
||||
from DataFlow::CallCfgNode call
|
||||
where
|
||||
context.getAVersion().includes(2, _) and
|
||||
call.getFunction() = func and
|
||||
func.pointsTo(context, Value::named("input"), _) and
|
||||
not func.pointsTo(context, Value::named("raw_input"), _)
|
||||
major_version() = 2 and
|
||||
call = API::builtin("input").getACall() and
|
||||
call != API::builtin("raw_input").getACall()
|
||||
select call, "The unsafe built-in function 'input' is used in Python 2."
|
||||
|
||||
@@ -12,88 +12,12 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.functions.ModificationOfParameterWithDefault
|
||||
import DataFlow::PathGraph
|
||||
|
||||
predicate safe_method(string name) {
|
||||
name = "count" or
|
||||
name = "index" or
|
||||
name = "copy" or
|
||||
name = "get" or
|
||||
name = "has_key" or
|
||||
name = "items" or
|
||||
name = "keys" or
|
||||
name = "values" or
|
||||
name = "iteritems" or
|
||||
name = "iterkeys" or
|
||||
name = "itervalues" or
|
||||
name = "__contains__" or
|
||||
name = "__getitem__" or
|
||||
name = "__getattribute__"
|
||||
}
|
||||
|
||||
/** Gets the truthiness (non emptyness) of the default of `p` if that value is mutable */
|
||||
private boolean mutableDefaultValue(Parameter p) {
|
||||
exists(Dict d | p.getDefault() = d |
|
||||
exists(d.getAKey()) and result = true
|
||||
or
|
||||
not exists(d.getAKey()) and result = false
|
||||
)
|
||||
or
|
||||
exists(List l | p.getDefault() = l |
|
||||
exists(l.getAnElt()) and result = true
|
||||
or
|
||||
not exists(l.getAnElt()) and result = false
|
||||
)
|
||||
}
|
||||
|
||||
class NonEmptyMutableValue extends TaintKind {
|
||||
NonEmptyMutableValue() { this = "non-empty mutable value" }
|
||||
}
|
||||
|
||||
class EmptyMutableValue extends TaintKind {
|
||||
EmptyMutableValue() { this = "empty mutable value" }
|
||||
|
||||
override boolean booleanValue() { result = false }
|
||||
}
|
||||
|
||||
class MutableDefaultValue extends TaintSource {
|
||||
boolean nonEmpty;
|
||||
|
||||
MutableDefaultValue() { nonEmpty = mutableDefaultValue(this.(NameNode).getNode()) }
|
||||
|
||||
override string toString() { result = "mutable default value" }
|
||||
|
||||
override predicate isSourceOf(TaintKind kind) {
|
||||
nonEmpty = false and kind instanceof EmptyMutableValue
|
||||
or
|
||||
nonEmpty = true and kind instanceof NonEmptyMutableValue
|
||||
}
|
||||
}
|
||||
|
||||
private ClassValue mutable_class() {
|
||||
result = Value::named("list") or
|
||||
result = Value::named("dict")
|
||||
}
|
||||
|
||||
class Mutation extends TaintSink {
|
||||
Mutation() {
|
||||
exists(AugAssign a | a.getTarget().getAFlowNode() = this)
|
||||
or
|
||||
exists(Call c, Attribute a | c.getFunc() = a |
|
||||
a.getObject().getAFlowNode() = this and
|
||||
not safe_method(a.getName()) and
|
||||
this.(ControlFlowNode).pointsTo().getClass() = mutable_class()
|
||||
)
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) {
|
||||
kind instanceof EmptyMutableValue
|
||||
or
|
||||
kind instanceof NonEmptyMutableValue
|
||||
}
|
||||
}
|
||||
|
||||
from TaintedPathSource src, TaintedPathSink sink
|
||||
where src.flowsTo(sink)
|
||||
select sink.getSink(), src, sink, "$@ flows to here and is mutated.", src.getSource(),
|
||||
from
|
||||
ModificationOfParameterWithDefault::Configuration config, DataFlow::PathNode source,
|
||||
DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "$@ flows to here and is mutated.", source.getNode(),
|
||||
"Default value"
|
||||
|
||||
@@ -9,7 +9,7 @@ information being thrown away.</p>
|
||||
|
||||
<p>A return value is considered to be trivial if it is <code>None</code> or it is a parameter (parameters, usually <code>self</code> are often
|
||||
returned to assist with method chaining, but can be ignored).
|
||||
A return value is also assumed to be trivial if it is ignored for 75% or more of calls.
|
||||
A return value is also assumed to be trivial if it is ignored for more than 25% of calls.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
|
||||
@@ -4,7 +4,9 @@
|
||||
* and is therefore associated with security risks.
|
||||
* @kind problem
|
||||
* @tags security
|
||||
* external/cwe/cwe-200
|
||||
* @problem.severity error
|
||||
* @security-severity 6.5
|
||||
* @sub-severity low
|
||||
* @precision high
|
||||
* @id py/bind-socket-all-network-interfaces
|
||||
@@ -25,7 +27,7 @@ private string vulnerableHostname() {
|
||||
}
|
||||
|
||||
/** Gets a reference to a hostname that can be used to bind to all interfaces. */
|
||||
private DataFlow::LocalSourceNode vulnerableHostnameRef(DataFlow::TypeTracker t, string hostname) {
|
||||
private DataFlow::TypeTrackingNode vulnerableHostnameRef(DataFlow::TypeTracker t, string hostname) {
|
||||
t.start() and
|
||||
exists(StrConst allInterfacesStrConst | hostname = vulnerableHostname() |
|
||||
allInterfacesStrConst.getText() = hostname and
|
||||
@@ -41,7 +43,7 @@ DataFlow::Node vulnerableHostnameRef(string hostname) {
|
||||
}
|
||||
|
||||
/** Gets a reference to a tuple for which the first element is a hostname that can be used to bind to all interfaces. */
|
||||
private DataFlow::LocalSourceNode vulnerableAddressTuple(DataFlow::TypeTracker t, string hostname) {
|
||||
private DataFlow::TypeTrackingNode vulnerableAddressTuple(DataFlow::TypeTracker t, string hostname) {
|
||||
t.start() and
|
||||
result.asExpr() = any(Tuple tup | tup.getElt(0) = vulnerableHostnameRef(hostname).asExpr())
|
||||
or
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
* @kind path-problem
|
||||
* @precision low
|
||||
* @problem.severity error
|
||||
* @security-severity 7.8
|
||||
* @tags security external/cwe/cwe-20
|
||||
*/
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.8
|
||||
* @precision high
|
||||
* @id py/incomplete-hostname-regexp
|
||||
* @tags correctness
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.8
|
||||
* @precision high
|
||||
* @id py/incomplete-url-substring-sanitization
|
||||
* @tags correctness
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/path-injection
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
* @kind path-problem
|
||||
* @id py/tarslip
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @precision medium
|
||||
* @tags security
|
||||
* external/cwe/cwe-022
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* user to change the meaning of the command.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 9.8
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/command-line-injection
|
||||
@@ -18,7 +19,7 @@ import python
|
||||
import semmle.python.security.dataflow.CommandInjection
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
from CommandInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
|
||||
"a user-provided value"
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* cause a cross-site scripting vulnerability.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 6.1
|
||||
* @precision medium
|
||||
* @id py/jinja2/autoescape-false
|
||||
* @tags security
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* allows for a cross-site scripting vulnerability.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 6.1
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/reflective-xss
|
||||
@@ -16,7 +17,7 @@ import python
|
||||
import semmle.python.security.dataflow.ReflectedXSS
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from ReflectedXssConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
from ReflectedXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
|
||||
source.getNode(), "a user-provided value"
|
||||
|
||||
@@ -9,6 +9,13 @@ If a database query (such as a SQL or NoSQL query) is built from
|
||||
user-provided data without sufficient sanitization, a user
|
||||
may be able to run malicious database queries.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
This also includes using the <code>TextClause</code> class in the
|
||||
<code><a href="https://pypi.org/project/SQLAlchemy/">SQLAlchemy</a></code> PyPI package,
|
||||
which is used to represent a literal SQL fragment and is inserted directly into the
|
||||
final SQL when used in a query built using the ORM.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
@@ -52,5 +59,6 @@ vulnerable to SQL injection attacks. In this example, if <code>username</code> w
|
||||
<references>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/SQL_injection">SQL injection</a>.</li>
|
||||
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html">SQL Injection Prevention Cheat Sheet</a>.</li>
|
||||
<li><a href="https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text.params.text">SQLAlchemy documentation for TextClause</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* malicious SQL code by the user.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 8.8
|
||||
* @precision high
|
||||
* @id py/sql-injection
|
||||
* @tags security
|
||||
@@ -15,7 +16,7 @@ import python
|
||||
import semmle.python.security.dataflow.SqlInjection
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
from SqlInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
|
||||
"a user-provided value"
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* code execution.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 9.3
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/code-injection
|
||||
@@ -18,7 +19,7 @@ import python
|
||||
import semmle.python.security.dataflow.CodeInjection
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
from CodeInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
|
||||
source.getNode(), "A user-provided value"
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
* developing a subsequent exploit.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 5.4
|
||||
* @precision high
|
||||
* @id py/stack-trace-exposure
|
||||
* @tags security
|
||||
@@ -16,7 +17,7 @@ import python
|
||||
import semmle.python.security.dataflow.StackTraceExposure
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from StackTraceExposureConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
from StackTraceExposure::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "$@ may be exposed to an external user", source.getNode(),
|
||||
"Error information"
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Running a Flask app in debug mode may allow an attacker to run arbitrary code through the Werkzeug debugger.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @id py/flask-debug
|
||||
* @tags security
|
||||
@@ -16,7 +17,7 @@ import semmle.python.ApiGraphs
|
||||
import semmle.python.frameworks.Flask
|
||||
|
||||
/** Gets a reference to a truthy literal. */
|
||||
private DataFlow::LocalSourceNode truthyLiteral(DataFlow::TypeTracker t) {
|
||||
private DataFlow::TypeTrackingNode truthyLiteral(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result.asExpr().(ImmutableLiteral).booleanValue() = true
|
||||
or
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Accepting unknown host keys can allow man-in-the-middle attacks.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @id py/paramiko-missing-host-key-validation
|
||||
* @tags security
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
<p>
|
||||
Encryption is key to the security of most, if not all, online communication.
|
||||
Using Transport Layer Security (TLS) can ensure that communication cannot be interrupted by an interloper.
|
||||
For this reason, is is unwise to disable the verification that TLS provides.
|
||||
For this reason, it is unwise to disable the verification that TLS provides.
|
||||
Functions in the <code>requests</code> module provide verification by default, and it is only when
|
||||
explicitly turned off using <code>verify=False</code> that no verification occurs.
|
||||
</p>
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Making a request without certificate validation can allow man-in-the-middle attacks.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @precision medium
|
||||
* @id py/request-without-cert-validation
|
||||
* @tags security
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* expose it to an attacker.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @id py/clear-text-logging-sensitive-data
|
||||
* @tags security
|
||||
@@ -13,25 +14,13 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
import semmle.python.security.SensitiveData
|
||||
import semmle.python.security.ClearText
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
import DataFlow::PathGraph
|
||||
import semmle.python.security.dataflow.CleartextLogging::CleartextLogging
|
||||
|
||||
class CleartextLoggingConfiguration extends TaintTracking::Configuration {
|
||||
CleartextLoggingConfiguration() { this = "ClearTextLogging" }
|
||||
|
||||
override predicate isSource(DataFlow::Node src, TaintKind kind) {
|
||||
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
|
||||
sink.asCfgNode() instanceof ClearTextLogging::Sink and
|
||||
kind instanceof SensitiveData
|
||||
}
|
||||
}
|
||||
|
||||
from CleartextLoggingConfiguration config, TaintedPathSource source, TaintedPathSink sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getSink(), source, sink, "Sensitive data returned by $@ is logged here.",
|
||||
source.getSource(), source.getCfgNode().(SensitiveData::Source).repr()
|
||||
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, string classification
|
||||
where
|
||||
config.hasFlowPath(source, sink) and
|
||||
classification = source.getNode().(Source).getClassification()
|
||||
select sink.getNode(), source, sink, "$@ is logged here.", source.getNode(),
|
||||
"Sensitive data (" + classification + ")"
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* attacker.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @id py/clear-text-storage-sensitive-data
|
||||
* @tags security
|
||||
@@ -13,25 +14,13 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
import semmle.python.security.SensitiveData
|
||||
import semmle.python.security.ClearText
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
import DataFlow::PathGraph
|
||||
import semmle.python.security.dataflow.CleartextStorage::CleartextStorage
|
||||
|
||||
class CleartextStorageConfiguration extends TaintTracking::Configuration {
|
||||
CleartextStorageConfiguration() { this = "ClearTextStorage" }
|
||||
|
||||
override predicate isSource(DataFlow::Node src, TaintKind kind) {
|
||||
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
|
||||
sink.asCfgNode() instanceof ClearTextStorage::Sink and
|
||||
kind instanceof SensitiveData
|
||||
}
|
||||
}
|
||||
|
||||
from CleartextStorageConfiguration config, TaintedPathSource source, TaintedPathSink sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getSink(), source, sink, "Sensitive data from $@ is stored here.", source.getSource(),
|
||||
source.getCfgNode().(SensitiveData::Source).repr()
|
||||
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, string classification
|
||||
where
|
||||
config.hasFlowPath(source, sink) and
|
||||
classification = source.getNode().(Source).getClassification()
|
||||
select sink.getNode(), source, sink, "$@ is stored here.", source.getNode(),
|
||||
"Sensitive data (" + classification + ")"
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @id py/weak-crypto-key
|
||||
* @tags security
|
||||
|
||||
@@ -15,22 +15,28 @@
|
||||
secure than it appears to be.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
This query alerts on any use of a weak cryptographic algorithm, that is
|
||||
not a hashing algorithm. Use of broken or weak cryptographic hash
|
||||
functions are handled by the
|
||||
<code>py/weak-sensitive-data-hashing</code> query.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
Ensure that you use a strong, modern cryptographic
|
||||
algorithm. Use at least AES-128 or RSA-2048 for
|
||||
encryption, and SHA-2 or SHA-3 for secure hashing.
|
||||
algorithm, such as AES-128 or RSA-2048.
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
<example>
|
||||
|
||||
<p>
|
||||
The following code uses the <code>pycrypto</code>
|
||||
The following code uses the <code>pycryptodome</code>
|
||||
library to encrypt some secret data. When you create a cipher using
|
||||
<code>pycrypto</code> you must specify the encryption
|
||||
<code>pycryptodome</code> you must specify the encryption
|
||||
algorithm to use. The first example uses DES, which is an
|
||||
older algorithm that is now considered weak. The second
|
||||
example uses AES, which is a stronger modern algorithm.
|
||||
@@ -39,8 +45,12 @@
|
||||
<sample src="examples/broken_crypto.py" />
|
||||
|
||||
<p>
|
||||
WARNING: Although the second example above is more robust,
|
||||
pycrypto is no longer actively maintained so we recommend using <code>cryptography</code> instead.
|
||||
NOTICE: the original
|
||||
<code><a href="https://pypi.org/project/pycrypto/">pycrypto</a></code>
|
||||
PyPI package that provided the <code>Crypto</code> module is not longer
|
||||
actively maintained, so you should use the
|
||||
<code><a href="https://pypi.org/project/pycryptodome/">pycryptodome</a></code>
|
||||
PyPI package instead (which has a compatible API).
|
||||
</p>
|
||||
|
||||
</example>
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
/**
|
||||
* @name Use of a broken or weak cryptographic algorithm
|
||||
* @description Using broken or weak cryptographic algorithms can compromise security.
|
||||
* @kind path-problem
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @id py/weak-cryptographic-algorithm
|
||||
* @tags security
|
||||
@@ -10,21 +11,15 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.security.SensitiveData
|
||||
import semmle.python.security.Crypto
|
||||
import semmle.python.Concepts
|
||||
|
||||
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
|
||||
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
|
||||
|
||||
override predicate isSource(TaintTracking::Source source) {
|
||||
source instanceof SensitiveDataSource
|
||||
}
|
||||
|
||||
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
|
||||
}
|
||||
|
||||
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
|
||||
where config.hasFlowPath(src, sink)
|
||||
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
|
||||
src.getSource(), "Sensitive data"
|
||||
from Cryptography::CryptographicOperation operation, Cryptography::CryptographicAlgorithm algorithm
|
||||
where
|
||||
algorithm = operation.getAlgorithm() and
|
||||
algorithm.isWeak() and
|
||||
// `Cryptography::HashingAlgorithm` and `Cryptography::PasswordHashingAlgorithm` are
|
||||
// handled by `py/weak-sensitive-data-hashing`
|
||||
algorithm instanceof Cryptography::EncryptionAlgorithm
|
||||
select operation,
|
||||
"The cryptographic algorithm " + algorithm.getName() +
|
||||
" is broken or weak, and should not be used."
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
* @id py/insecure-default-protocol
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @tags security
|
||||
* external/cwe/cwe-327
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* @id py/insecure-protocol
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @tags security
|
||||
* external/cwe/cwe-327
|
||||
@@ -26,37 +27,33 @@ class ProtocolConfiguration extends DataFlow::Node {
|
||||
unsafe_context_creation(this, _)
|
||||
}
|
||||
|
||||
AstNode getNode() { result = this.asCfgNode().(CallNode).getFunction().getNode() }
|
||||
DataFlow::Node getNode() { result = this.(DataFlow::CallCfgNode).getFunction() }
|
||||
}
|
||||
|
||||
// Helper for pretty printer `callName`.
|
||||
// This is a consequence of missing pretty priting.
|
||||
// We do not want to evaluate our bespoke pretty printer
|
||||
// for all `AstNode`s so we define a sub class of interesting ones.
|
||||
//
|
||||
// Note that AstNode is abstract and AstNode_ is a library class, so
|
||||
// we have to extend @py_ast_node.
|
||||
class Nameable extends @py_ast_node {
|
||||
// for all `DataFlow::Node`s so we define a sub class of interesting ones.
|
||||
class Nameable extends DataFlow::Node {
|
||||
Nameable() {
|
||||
this = any(ProtocolConfiguration pc).getNode()
|
||||
or
|
||||
exists(Nameable attr | this = attr.(Attribute).getObject())
|
||||
this = any(Nameable attr).(DataFlow::AttrRef).getObject()
|
||||
}
|
||||
|
||||
string toString() { result = "AstNode" }
|
||||
}
|
||||
|
||||
string callName(Nameable call) {
|
||||
result = call.(Name).getId()
|
||||
result = call.asExpr().(Name).getId()
|
||||
or
|
||||
exists(Attribute a | a = call | result = callName(a.getObject()) + "." + a.getName())
|
||||
exists(DataFlow::AttrRef a | a = call |
|
||||
result = callName(a.getObject()) + "." + a.getAttributeName()
|
||||
)
|
||||
}
|
||||
|
||||
string configName(ProtocolConfiguration protocolConfiguration) {
|
||||
result =
|
||||
"call to " + callName(protocolConfiguration.asCfgNode().(CallNode).getFunction().getNode())
|
||||
result = "call to " + callName(protocolConfiguration.(DataFlow::CallCfgNode).getFunction())
|
||||
or
|
||||
not protocolConfiguration.asCfgNode() instanceof CallNode and
|
||||
not protocolConfiguration instanceof DataFlow::CallCfgNode and
|
||||
not protocolConfiguration instanceof ContextCreation and
|
||||
result = "context modification"
|
||||
}
|
||||
|
||||
@@ -13,12 +13,12 @@ class PyOpenSSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
|
||||
}
|
||||
|
||||
override string getProtocol() {
|
||||
exists(ControlFlowNode protocolArg, PyOpenSSL pyo |
|
||||
protocolArg in [node.getArg(0), node.getArgByName("method")]
|
||||
exists(DataFlow::Node protocolArg, PyOpenSSL pyo |
|
||||
protocolArg in [this.getArg(0), this.getArgByName("method")]
|
||||
|
|
||||
protocolArg =
|
||||
[pyo.specific_version(result).getAUse(), pyo.unspecific_version(result).getAUse()]
|
||||
.asCfgNode()
|
||||
protocolArg in [
|
||||
pyo.specific_version(result).getAUse(), pyo.unspecific_version(result).getAUse()
|
||||
]
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -29,7 +29,7 @@ class ConnectionCall extends ConnectionCreation, DataFlow::CallCfgNode {
|
||||
}
|
||||
|
||||
override DataFlow::CfgNode getContext() {
|
||||
result.getNode() in [node.getArg(0), node.getArgByName("context")]
|
||||
result in [this.getArg(0), this.getArgByName("context")]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,8 +43,8 @@ class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode {
|
||||
}
|
||||
|
||||
override ProtocolVersion getRestriction() {
|
||||
API::moduleImport("OpenSSL").getMember("SSL").getMember("OP_NO_" + result).getAUse().asCfgNode() in [
|
||||
node.getArg(0), node.getArgByName("options")
|
||||
API::moduleImport("OpenSSL").getMember("SSL").getMember("OP_NO_" + result).getAUse() in [
|
||||
this.getArg(0), this.getArgByName("options")
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,15 +11,15 @@ class SSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
|
||||
SSLContextCreation() { this = API::moduleImport("ssl").getMember("SSLContext").getACall() }
|
||||
|
||||
override string getProtocol() {
|
||||
exists(ControlFlowNode protocolArg, Ssl ssl |
|
||||
protocolArg in [node.getArg(0), node.getArgByName("protocol")]
|
||||
exists(DataFlow::Node protocolArg, Ssl ssl |
|
||||
protocolArg in [this.getArg(0), this.getArgByName("protocol")]
|
||||
|
|
||||
protocolArg =
|
||||
[ssl.specific_version(result).getAUse(), ssl.unspecific_version(result).getAUse()]
|
||||
.asCfgNode()
|
||||
)
|
||||
or
|
||||
not exists(node.getAnArg()) and
|
||||
not exists(this.getArg(_)) and
|
||||
not exists(this.getArgByName(_)) and
|
||||
result = "TLS"
|
||||
}
|
||||
}
|
||||
@@ -39,12 +39,10 @@ API::Node sslContextInstance() {
|
||||
result = API::moduleImport("ssl").getMember(["SSLContext", "create_default_context"]).getReturn()
|
||||
}
|
||||
|
||||
class WrapSocketCall extends ConnectionCreation, DataFlow::CallCfgNode {
|
||||
class WrapSocketCall extends ConnectionCreation, DataFlow::MethodCallNode {
|
||||
WrapSocketCall() { this = sslContextInstance().getMember("wrap_socket").getACall() }
|
||||
|
||||
override DataFlow::Node getContext() {
|
||||
result = this.getFunction().(DataFlow::AttrRead).getObject()
|
||||
}
|
||||
override DataFlow::Node getContext() { result = this.getObject() }
|
||||
}
|
||||
|
||||
class OptionsAugOr extends ProtocolRestriction, DataFlow::CfgNode {
|
||||
@@ -133,7 +131,7 @@ class ContextSetVersion extends ProtocolRestriction, ProtocolUnrestriction, Data
|
||||
|
||||
ContextSetVersion() {
|
||||
exists(DataFlow::AttrWrite aw |
|
||||
aw.getObject().asCfgNode() = node and
|
||||
this = aw.getObject() and
|
||||
aw.getAttributeName() = "minimum_version" and
|
||||
aw.getValue() =
|
||||
API::moduleImport("ssl").getMember("TLSVersion").getMember(restriction).getAUse()
|
||||
|
||||
104
python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.qhelp
Normal file
104
python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.qhelp
Normal file
@@ -0,0 +1,104 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>
|
||||
Using a broken or weak cryptographic hash function can leave data
|
||||
vulnerable, and should not be used in security related code.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
A strong cryptographic hash function should be resistant to:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
pre-image attacks: if you know a hash value <code>h(x)</code>,
|
||||
you should not be able to easily find the input <code>x</code>.
|
||||
</li>
|
||||
<li>
|
||||
collision attacks: if you know a hash value <code>h(x)</code>,
|
||||
you should not be able to easily find a different input <code>y</code>
|
||||
with the same hash value <code>h(x) = h(y)</code>.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
In cases with a limited input space, such as for passwords, the hash
|
||||
function also needs to be computationally expensive to be resistant to
|
||||
brute-force attacks. Passwords should also have an unique salt applied
|
||||
before hashing, but that is not considered by this query.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
As an example, both MD5 and SHA-1 are known to be vulnerable to collision attacks.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Since it's OK to use a weak cryptographic hash function in a non-security
|
||||
context, this query only alerts when these are used to hash sensitive
|
||||
data (such as passwords, certificates, usernames).
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Use of broken or weak cryptographic algorithms that are not hashing algorithms, is
|
||||
handled by the <code>py/weak-cryptographic-algorithm</code> query.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
Ensure that you use a strong, modern cryptographic hash function:
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
such as Argon2, scrypt, bcrypt, or PBKDF2 for passwords and other data with limited input space.
|
||||
</li>
|
||||
<li>
|
||||
such as SHA-2, or SHA-3 in other cases.
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</recommendation>
|
||||
<example>
|
||||
|
||||
<p>
|
||||
The following example shows two functions for checking whether the hash
|
||||
of a certificate matches a known value -- to prevent tampering.
|
||||
|
||||
The first function uses MD5 that is known to be vulnerable to collision attacks.
|
||||
|
||||
The second function uses SHA-256 that is a strong cryptographic hashing function.
|
||||
</p>
|
||||
|
||||
<sample src="examples/weak_certificate_hashing.py" />
|
||||
|
||||
</example>
|
||||
<example>
|
||||
<p>
|
||||
The following example shows two functions for hashing passwords.
|
||||
|
||||
The first function uses SHA-256 to hash passwords. Although SHA-256 is a
|
||||
strong cryptographic hash function, it is not suitable for password
|
||||
hashing since it is not computationally expensive.
|
||||
</p>
|
||||
|
||||
<sample src="examples/weak_password_hashing_bad.py" />
|
||||
|
||||
|
||||
<p>
|
||||
The second function uses Argon2 (through the <code>argon2-cffi</code>
|
||||
PyPI package), which is a strong password hashing algorithm (and
|
||||
includes a per-password salt by default).
|
||||
</p>
|
||||
|
||||
<sample src="examples/weak_password_hashing_good.py" />
|
||||
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html">Password Storage Cheat Sheet</a></li>
|
||||
</references>
|
||||
|
||||
</qhelp>
|
||||
49
python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.ql
Normal file
49
python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.ql
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* @name Use of a broken or weak cryptographic hashing algorithm on sensitive data
|
||||
* @description Using broken or weak cryptographic hashing algorithms can compromise security.
|
||||
* @kind path-problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @id py/weak-sensitive-data-hashing
|
||||
* @tags security
|
||||
* external/cwe/cwe-327
|
||||
* external/cwe/cwe-328
|
||||
* external/cwe/cwe-916
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.dataflow.WeakSensitiveDataHashing
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
DataFlow::PathNode source, DataFlow::PathNode sink, string ending, string algorithmName,
|
||||
string classification
|
||||
where
|
||||
exists(NormalHashFunction::Configuration config |
|
||||
config.hasFlowPath(source, sink) and
|
||||
algorithmName = sink.getNode().(NormalHashFunction::Sink).getAlgorithmName() and
|
||||
classification = source.getNode().(NormalHashFunction::Source).getClassification() and
|
||||
ending = "."
|
||||
)
|
||||
or
|
||||
exists(ComputationallyExpensiveHashFunction::Configuration config |
|
||||
config.hasFlowPath(source, sink) and
|
||||
algorithmName = sink.getNode().(ComputationallyExpensiveHashFunction::Sink).getAlgorithmName() and
|
||||
classification =
|
||||
source.getNode().(ComputationallyExpensiveHashFunction::Source).getClassification() and
|
||||
(
|
||||
sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
|
||||
ending = "."
|
||||
or
|
||||
not sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
|
||||
ending =
|
||||
" for " + classification +
|
||||
" hashing, since it is not a computationally expensive hash function."
|
||||
)
|
||||
)
|
||||
select sink.getNode(), source, sink,
|
||||
"$@ is used in a hashing algorithm (" + algorithmName + ") that is insecure" + ending,
|
||||
source.getNode(), "Sensitive data (" + classification + ")"
|
||||
@@ -0,0 +1,9 @@
|
||||
import hashlib
|
||||
|
||||
def certificate_matches_known_hash_bad(certificate, known_hash):
|
||||
hash = hashlib.md5(certificate).hexdigest() # BAD
|
||||
return hash == known_hash
|
||||
|
||||
def certificate_matches_known_hash_good(certificate, known_hash):
|
||||
hash = hashlib.sha256(certificate).hexdigest() # GOOD
|
||||
return hash == known_hash
|
||||
@@ -0,0 +1,4 @@
|
||||
import hashlib
|
||||
|
||||
def get_password_hash(password: str, salt: str):
|
||||
return hashlib.sha256(password + salt).hexdigest() # BAD
|
||||
@@ -0,0 +1,9 @@
|
||||
from argon2 import PasswordHasher
|
||||
|
||||
def get_initial_hash(password: str):
|
||||
ph = PasswordHasher()
|
||||
return ph.hash(password) # GOOD
|
||||
|
||||
def check_password(password: str, known_hash):
|
||||
ph = PasswordHasher()
|
||||
return ph.verify(known_hash, password) # GOOD
|
||||
@@ -4,6 +4,7 @@
|
||||
* @kind problem
|
||||
* @id py/insecure-temporary-file
|
||||
* @problem.severity error
|
||||
* @security-severity 7.0
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @tags external/cwe/cwe-377
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* @kind path-problem
|
||||
* @id py/unsafe-deserialization
|
||||
* @problem.severity error
|
||||
* @security-severity 9.8
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @tags external/cwe/cwe-502
|
||||
@@ -15,6 +16,6 @@ import python
|
||||
import semmle.python.security.dataflow.UnsafeDeserialization
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
from UnsafeDeserialization::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* may cause redirection to malicious web sites.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 6.1
|
||||
* @sub-severity low
|
||||
* @id py/url-redirection
|
||||
* @tags security
|
||||
@@ -15,7 +16,7 @@ import python
|
||||
import semmle.python.security.dataflow.UrlRedirect
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from UrlRedirectConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
from UrlRedirect::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "Untrusted URL redirection due to $@.", source.getNode(),
|
||||
"A user-provided value"
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* @kind problem
|
||||
* @id py/overly-permissive-file
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.8
|
||||
* @sub-severity high
|
||||
* @precision medium
|
||||
* @tags external/cwe/cwe-732
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* @description Credentials are hard coded in the source code of the application.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 9.8
|
||||
* @precision medium
|
||||
* @id py/hardcoded-credentials
|
||||
* @tags security
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
* @tags security
|
||||
* correctness
|
||||
* @problem.severity error
|
||||
* @security-severity 4.2
|
||||
* @sub-severity high
|
||||
* @precision low
|
||||
* @id py/use-of-exec
|
||||
|
||||
@@ -30,5 +30,11 @@ predicate modification_of_locals(ControlFlowNode f) {
|
||||
}
|
||||
|
||||
from AstNode a, ControlFlowNode f
|
||||
where modification_of_locals(f) and a = f.getNode()
|
||||
where
|
||||
modification_of_locals(f) and
|
||||
a = f.getNode() and
|
||||
// in module level scope `locals() == globals()`
|
||||
// see https://docs.python.org/3/library/functions.html#locals
|
||||
// FP report in https://github.com/github/codeql/issues/6674
|
||||
not a.getScope() instanceof ModuleScope
|
||||
select a, "Modification of the locals() dictionary will have no effect on the local variables."
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
* be counted as user written code.
|
||||
* @kind metric
|
||||
* @tags summary
|
||||
* lines-of-code
|
||||
* @id py/summary/lines-of-user-code
|
||||
*/
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ predicate unused_local(Name unused, LocalVariable v) {
|
||||
def.getVariable() = v and
|
||||
def.isUnused() and
|
||||
not exists(def.getARedef()) and
|
||||
not exists(annotation_without_assignment(v)) and
|
||||
def.isRelevant() and
|
||||
not v = any(Nonlocal n).getAVariable() and
|
||||
not exists(def.getNode().getParentNode().(FunctionDef).getDefinedFunction().getADecorator()) and
|
||||
@@ -26,6 +27,17 @@ predicate unused_local(Name unused, LocalVariable v) {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets any annotation of the local variable `v` that does not also reassign its value.
|
||||
*
|
||||
* TODO: This predicate should not be needed. Rather, annotated "assignments" that do not actually
|
||||
* assign a value should not result in the creation of an SSA variable (which then goes unused).
|
||||
*/
|
||||
private AnnAssign annotation_without_assignment(LocalVariable v) {
|
||||
result.getTarget() = v.getAStore() and
|
||||
not exists(result.getValue())
|
||||
}
|
||||
|
||||
from Name unused, LocalVariable v
|
||||
where
|
||||
unused_local(unused, v) and
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
- description: Standard Code Scanning queries for Python
|
||||
- qlpack: codeql-python
|
||||
- queries: .
|
||||
- apply: code-scanning-selectors.yml
|
||||
from: codeql-suite-helpers
|
||||
from: codeql/suite-helpers
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
- description: Standard LGTM queries for Python, including ones not displayed by default
|
||||
- qlpack: codeql-python
|
||||
- queries: .
|
||||
- apply: lgtm-selectors.yml
|
||||
from: codeql-suite-helpers
|
||||
from: codeql/suite-helpers
|
||||
# These are only for IDE use.
|
||||
- exclude:
|
||||
tags contain:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
- description: Standard LGTM queries for Python
|
||||
- apply: codeql-suites/python-lgtm-full.qls
|
||||
- apply: lgtm-displayed-only.yml
|
||||
from: codeql-suite-helpers
|
||||
from: codeql/suite-helpers
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
- description: Security-and-quality queries for Python
|
||||
- qlpack: codeql-python
|
||||
- queries: .
|
||||
- apply: security-and-quality-selectors.yml
|
||||
from: codeql-suite-helpers
|
||||
from: codeql/suite-helpers
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
- description: Security-extended queries for Python
|
||||
- qlpack: codeql-python
|
||||
- queries: .
|
||||
- apply: security-extended-selectors.yml
|
||||
from: codeql-suite-helpers
|
||||
from: codeql/suite-helpers
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
/**
|
||||
* WARNING: Use of this module is DEPRECATED.
|
||||
* All new queries should use `import python`.
|
||||
*/
|
||||
|
||||
import python
|
||||
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @name Clear-text logging of sensitive information
|
||||
* @description OLD QUERY: Logging sensitive information without encryption or hashing can
|
||||
* expose it to an attacker.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/old/clear-text-logging-sensitive-data
|
||||
* @deprecated
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
import semmle.python.security.SensitiveData
|
||||
import semmle.python.security.ClearText
|
||||
|
||||
class CleartextLoggingConfiguration extends TaintTracking::Configuration {
|
||||
CleartextLoggingConfiguration() { this = "ClearTextLogging" }
|
||||
|
||||
override predicate isSource(DataFlow::Node src, TaintKind kind) {
|
||||
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
|
||||
sink.asCfgNode() instanceof ClearTextLogging::Sink and
|
||||
kind instanceof SensitiveData
|
||||
}
|
||||
}
|
||||
|
||||
from CleartextLoggingConfiguration config, TaintedPathSource source, TaintedPathSink sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getSink(), source, sink, "Sensitive data returned by $@ is logged here.",
|
||||
source.getSource(), source.getCfgNode().(SensitiveData::Source).repr()
|
||||
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @name Clear-text storage of sensitive information
|
||||
* @description OLD QUERY: Sensitive information stored without encryption or hashing can expose it to an
|
||||
* attacker.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/old/clear-text-storage-sensitive-data
|
||||
* @deprecated
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
import semmle.python.security.SensitiveData
|
||||
import semmle.python.security.ClearText
|
||||
|
||||
class CleartextStorageConfiguration extends TaintTracking::Configuration {
|
||||
CleartextStorageConfiguration() { this = "ClearTextStorage" }
|
||||
|
||||
override predicate isSource(DataFlow::Node src, TaintKind kind) {
|
||||
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
|
||||
sink.asCfgNode() instanceof ClearTextStorage::Sink and
|
||||
kind instanceof SensitiveData
|
||||
}
|
||||
}
|
||||
|
||||
from CleartextStorageConfiguration config, TaintedPathSource source, TaintedPathSink sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getSink(), source, sink, "Sensitive data from $@ is stored here.", source.getSource(),
|
||||
source.getCfgNode().(SensitiveData::Source).repr()
|
||||
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* @name OLD QUERY: Use of a broken or weak cryptographic algorithm
|
||||
* @description Using broken or weak cryptographic algorithms can compromise security.
|
||||
* @kind path-problem
|
||||
* @problem.severity warning
|
||||
* @id py/old/weak-cryptographic-algorithm
|
||||
* @deprecated
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.security.SensitiveData
|
||||
import semmle.python.security.Crypto
|
||||
|
||||
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
|
||||
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
|
||||
|
||||
override predicate isSource(TaintTracking::Source source) {
|
||||
source instanceof SensitiveDataSource
|
||||
}
|
||||
|
||||
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
|
||||
}
|
||||
|
||||
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
|
||||
where config.hasFlowPath(src, sink)
|
||||
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
|
||||
src.getSource(), "Sensitive data"
|
||||
@@ -0,0 +1,50 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>If an LDAP query or DN is built using string concatenation or string formatting, and the
|
||||
components of the concatenation include user input without any proper sanitization, a user
|
||||
is likely to be able to run malicious LDAP queries.</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>If user input must be included in an LDAP query or DN, it should be escaped to
|
||||
avoid a malicious user providing special characters that change the meaning
|
||||
of the query. In Python2, user input should be escaped with <code>ldap.dn.escape_dn_chars</code>
|
||||
or <code>ldap.filter.escape_filter_chars</code>, while in Python3, user input should be escaped with
|
||||
<code>ldap3.utils.dn.escape_rdn</code> or <code>ldap3.utils.conv.escape_filter_chars</code>
|
||||
depending on the component tainted by the user. A good practice is to escape filter characters
|
||||
that could change the meaning of the query (https://tools.ietf.org/search/rfc4515#section-3).</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>In the following examples, the code accepts both <code>username</code> and <code>dc</code> from the user,
|
||||
which it then uses to build a LDAP query and DN.</p>
|
||||
|
||||
<p>The first and the second example uses the unsanitized user input directly
|
||||
in the search filter and DN for the LDAP query.
|
||||
A malicious user could provide special characters to change the meaning of these
|
||||
components, and search for a completely different set of values.</p>
|
||||
|
||||
<sample src="examples/example_bad1.py" />
|
||||
<sample src="examples/example_bad2.py" />
|
||||
|
||||
<p>In the third and four example, the input provided by the user is sanitized before it is included in the search filter or DN.
|
||||
This ensures the meaning of the query cannot be changed by a malicious user.</p>
|
||||
|
||||
<sample src="examples/example_good1.py" />
|
||||
<sample src="examples/example_good2.py" />
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/LDAP_Injection_Prevention_Cheat_Sheet.html">LDAP Injection Prevention Cheat Sheet</a>.</li>
|
||||
<li>OWASP: <a href="https://owasp.org/www-community/attacks/LDAP_Injection">LDAP Injection</a>.</li>
|
||||
<li>SonarSource: <a href="https://rules.sonarsource.com/python/RSPEC-2078">RSPEC-2078</a>.</li>
|
||||
<li>Python2: <a href="https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html">LDAP Documentation</a>.</li>
|
||||
<li>Python3: <a href="https://ldap3.readthedocs.io/en/latest/">LDAP Documentation</a>.</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/LDAP_injection">LDAP injection</a>.</li>
|
||||
<li>BlackHat: <a href="https://www.blackhat.com/presentations/bh-europe-08/Alonso-Parada/Whitepaper/bh-eu-08-alonso-parada-WP.pdf">LDAP Injection and Blind LDAP Injection</a>.</li>
|
||||
<li>LDAP: <a href="https://ldap.com/2018/05/04/understanding-and-defending-against-ldap-injection-attacks/">Understanding and Defending Against LDAP Injection Attacks</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
21
python/ql/src/experimental/Security/CWE-090/LDAPInjection.ql
Normal file
21
python/ql/src/experimental/Security/CWE-090/LDAPInjection.ql
Normal file
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* @name LDAP query built from user-controlled sources
|
||||
* @description Building an LDAP query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious LDAP code by the user.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/ldap-injection
|
||||
* @tags experimental
|
||||
* security
|
||||
* external/cwe/cwe-090
|
||||
*/
|
||||
|
||||
// Determine precision above
|
||||
import python
|
||||
import experimental.semmle.python.security.injection.LDAP
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from LDAPInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "$@ LDAP query parameter comes from $@.", sink.getNode(),
|
||||
"This", source.getNode(), "a user-provided value"
|
||||
@@ -0,0 +1,15 @@
|
||||
from flask import request, Flask
|
||||
import ldap
|
||||
|
||||
|
||||
@app.route("/normal")
|
||||
def normal():
|
||||
unsafe_dc = request.args['dc']
|
||||
unsafe_filter = request.args['username']
|
||||
|
||||
dn = "dc={}".format(unsafe_dc)
|
||||
search_filter = "(user={})".format(unsafe_filter)
|
||||
|
||||
ldap_connection = ldap.initialize("ldap://127.0.0.1")
|
||||
user = ldap_connection.search_s(
|
||||
dn, ldap.SCOPE_SUBTREE, search_filter)
|
||||
@@ -0,0 +1,15 @@
|
||||
from flask import request, Flask
|
||||
import ldap3
|
||||
|
||||
|
||||
@app.route("/normal")
|
||||
def normal():
|
||||
unsafe_dc = request.args['dc']
|
||||
unsafe_filter = request.args['username']
|
||||
|
||||
dn = "dc={}".format(unsafe_dc)
|
||||
search_filter = "(user={})".format(unsafe_filter)
|
||||
|
||||
srv = ldap3.Server('ldap://127.0.0.1')
|
||||
conn = ldap3.Connection(srv, user=dn, auto_bind=True)
|
||||
conn.search(dn, search_filter)
|
||||
@@ -0,0 +1,20 @@
|
||||
from flask import request, Flask
|
||||
import ldap
|
||||
import ldap.filter
|
||||
import ldap.dn
|
||||
|
||||
|
||||
@app.route("/normal")
|
||||
def normal():
|
||||
unsafe_dc = request.args['dc']
|
||||
unsafe_filter = request.args['username']
|
||||
|
||||
safe_dc = ldap.dn.escape_dn_chars(unsafe_dc)
|
||||
safe_filter = ldap.filter.escape_filter_chars(unsafe_filter)
|
||||
|
||||
dn = "dc={}".format(safe_dc)
|
||||
search_filter = "(user={})".format(safe_filter)
|
||||
|
||||
ldap_connection = ldap.initialize("ldap://127.0.0.1")
|
||||
user = ldap_connection.search_s(
|
||||
dn, ldap.SCOPE_SUBTREE, search_filter)
|
||||
@@ -0,0 +1,20 @@
|
||||
from flask import request, Flask
|
||||
import ldap3
|
||||
from ldap3.utils.dn import escape_rdn
|
||||
from ldap3.utils.conv import escape_filter_chars
|
||||
|
||||
|
||||
@app.route("/normal")
|
||||
def normal():
|
||||
unsafe_dc = request.args['dc']
|
||||
unsafe_filter = request.args['username']
|
||||
|
||||
safe_dc = escape_rdn(unsafe_dc)
|
||||
safe_filter = escape_filter_chars(unsafe_filter)
|
||||
|
||||
dn = "dc={}".format(safe_dc)
|
||||
search_filter = "(user={})".format(safe_filter)
|
||||
|
||||
srv = ldap3.Server('ldap://127.0.0.1')
|
||||
conn = ldap3.Connection(srv, user=dn, auto_bind=True)
|
||||
conn.search(dn, search_filter)
|
||||
@@ -0,0 +1,31 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>If an LDAP query doesn't carry any kind of authentication, anonymous binds causes an empty or None-set password
|
||||
to result in a successful authentication.</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>Use a non-empty password while establishing an LDAP connection.</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>In the following examples, the code builds a LDAP query whose execution carries no authentication or binds anonymously.</p>
|
||||
|
||||
<sample src="examples/auth_bad_2.py" />
|
||||
<sample src="examples/auth_bad_3.py" />
|
||||
|
||||
<p>In the third and fourth examples, the authentication is established using a password.</p>
|
||||
|
||||
<sample src="examples/auth_good_2.py" />
|
||||
<sample src="examples/auth_good_3.py" />
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-4433">RSPEC-4433</a>.</li>
|
||||
<li>Python2: <a href="https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html">LDAP Documentation</a>.</li>
|
||||
<li>Python3: <a href="https://ldap3.readthedocs.io/en/latest/">LDAP Documentation</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* @name Improper LDAP Authentication
|
||||
* @description A user-controlled query carries no authentication
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @id py/improper-ldap-auth
|
||||
* @tags experimental
|
||||
* security
|
||||
* external/cwe/cwe-287
|
||||
*/
|
||||
|
||||
// Determine precision above
|
||||
import python
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
|
||||
predicate authenticatesImproperly(LDAPBind ldapBind) {
|
||||
(
|
||||
DataFlow::localFlow(DataFlow::exprNode(any(None noneName)), ldapBind.getPassword()) or
|
||||
not exists(ldapBind.getPassword())
|
||||
)
|
||||
or
|
||||
exists(StrConst emptyString |
|
||||
emptyString.getText() = "" and
|
||||
DataFlow::localFlow(DataFlow::exprNode(emptyString), ldapBind.getPassword())
|
||||
)
|
||||
}
|
||||
|
||||
from LDAPBind ldapBind
|
||||
where authenticatesImproperly(ldapBind)
|
||||
select ldapBind, "The following LDAP bind operation is executed without authentication"
|
||||
@@ -0,0 +1,14 @@
|
||||
from flask import request, Flask
|
||||
import ldap
|
||||
import ldap.filter
|
||||
import ldap.dn
|
||||
|
||||
|
||||
@app.route("/bind_example")
|
||||
def bind_example():
|
||||
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
|
||||
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
|
||||
|
||||
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
|
||||
ldap_connection.bind('cn=root', "")
|
||||
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
|
||||
@@ -0,0 +1,13 @@
|
||||
from ldap3 import Server, Connection, ALL
|
||||
from flask import request, Flask
|
||||
from ldap3.utils.dn import escape_rdn
|
||||
from ldap3.utils.conv import escape_filter_chars
|
||||
|
||||
@app.route("/passwordNone")
|
||||
def passwordNone():
|
||||
dn = "dc={}".format(escape_rdn(request.args['dc']))
|
||||
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
|
||||
|
||||
srv = Server('servername', get_info=ALL)
|
||||
conn = Connection(srv, user='user_dn', password=None)
|
||||
status, result, response, _ = conn.search(dn, search_filter)
|
||||
@@ -0,0 +1,14 @@
|
||||
from flask import request, Flask
|
||||
import ldap
|
||||
import ldap.filter
|
||||
import ldap.dn
|
||||
|
||||
|
||||
@app.route("/bind_example")
|
||||
def bind_example():
|
||||
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
|
||||
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
|
||||
|
||||
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
|
||||
ldap_connection.bind('cn=root', "SecurePa$$!")
|
||||
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
|
||||
@@ -0,0 +1,14 @@
|
||||
from ldap3 import Server, Connection, ALL
|
||||
from flask import request, Flask
|
||||
from ldap3.utils.dn import escape_rdn
|
||||
from ldap3.utils.conv import escape_filter_chars
|
||||
|
||||
@app.route("/passwordFromEnv")
|
||||
def passwordFromEnv():
|
||||
dn = "dc={}".format(escape_rdn(request.args['dc']))
|
||||
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
|
||||
|
||||
srv = Server('servername', get_info=ALL)
|
||||
conn = Connection(srv, user='user_dn',
|
||||
password="SecurePa$$!")
|
||||
status, result, response, _ = conn.search(dn, search_filter)
|
||||
@@ -0,0 +1,23 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>Failing to ensure the utilization of SSL in an LDAP connection can cause the entire communication
|
||||
to be sent in cleartext making it easier for an attacker to intercept it.</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>Always set <code>use_SSL</code> to <code>True</code>, call <code>start_tls_s()</code> or set a proper option flag (<code>ldap.OPT_X_TLS_XXXXXX</code>).</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>This example shows both good and bad ways to deal with this issue under Python 3.</p>
|
||||
|
||||
<p>The first one sets <code>use_SSL</code> to true as a keyword argument whereas the second one fails to provide a value for it, so
|
||||
the default one is used (<code>False</code>).</p>
|
||||
<sample src="examples/LDAPInsecureAuth.py" />
|
||||
</example>
|
||||
|
||||
</qhelp>
|
||||
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* @name Python Insecure LDAP Authentication
|
||||
* @description Python LDAP Insecure LDAP Authentication
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/insecure-ldap-auth
|
||||
* @tags experimental
|
||||
* security
|
||||
* external/cwe/cwe-522
|
||||
* external/cwe/cwe-523
|
||||
*/
|
||||
|
||||
// determine precision above
|
||||
import python
|
||||
import DataFlow::PathGraph
|
||||
import experimental.semmle.python.security.LDAPInsecureAuth
|
||||
|
||||
from LDAPInsecureAuthConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "$@ is authenticated insecurely.", sink.getNode(),
|
||||
"This LDAP host"
|
||||
@@ -0,0 +1,20 @@
|
||||
from ldap3 import Server, Connection, ALL
|
||||
from flask import request, Flask
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/good")
|
||||
def good():
|
||||
srv = Server(host, port, use_ssl=True)
|
||||
conn = Connection(srv, dn, password)
|
||||
conn.search(dn, search_filter)
|
||||
return conn.response
|
||||
|
||||
|
||||
@app.route("/bad")
|
||||
def bad():
|
||||
srv = Server(host, port)
|
||||
conn = Connection(srv, dn, password)
|
||||
conn.search(dn, search_filter)
|
||||
return conn.response
|
||||
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @name XPath query built from user-controlled sources
|
||||
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious Xpath code by the user.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @precision high
|
||||
* @id py/xpath-injection
|
||||
* @tags security
|
||||
* external/cwe/cwe-643
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.BarrierGuards
|
||||
import XpathInjection::XpathInjection
|
||||
import DataFlow::PathGraph
|
||||
|
||||
class XpathInjectionConfiguration extends TaintTracking::Configuration {
|
||||
XpathInjectionConfiguration() { this = "PathNotNormalizedConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof Source }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
|
||||
}
|
||||
|
||||
from XpathInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink, source, sink, "This Xpath query depends on $@.", source, "a user-provided value"
|
||||
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
|
||||
*
|
||||
* Note, for performance reasons: only import this file if
|
||||
* `XpathInjection::Configuration` is needed, otherwise
|
||||
* `XpathInjectionCustomizations` should be imported instead.
|
||||
*/
|
||||
|
||||
private import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
|
||||
*/
|
||||
module XpathInjection {
|
||||
import XpathInjectionCustomizations::XpathInjection
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "Xpath Injection" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof Source }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
|
||||
|
||||
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
|
||||
guard instanceof SanitizerGuard
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
/**
|
||||
* Provides class and predicates to track external data that
|
||||
* may represent malicious xpath query objects.
|
||||
*
|
||||
* This module is intended to be imported into a taint-tracking query.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.BarrierGuards
|
||||
|
||||
/** Models Xpath Injection related classes and functions */
|
||||
module XpathInjection {
|
||||
/**
|
||||
* A data flow source for "XPath injection" vulnerabilities.
|
||||
*/
|
||||
abstract class Source extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* A data flow sink for "XPath injection" vulnerabilities.
|
||||
*/
|
||||
abstract class Sink extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* A sanitizer for "XPath injection" vulnerabilities.
|
||||
*/
|
||||
abstract class Sanitizer extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* A sanitizer guard for "XPath injection" vulnerabilities.
|
||||
*/
|
||||
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
|
||||
|
||||
/**
|
||||
* A source of remote user input, considered as a flow source.
|
||||
*/
|
||||
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
|
||||
|
||||
/** Returns an API node referring to `lxml.etree` */
|
||||
API::Node etree() { result = API::moduleImport("lxml").getMember("etree") }
|
||||
|
||||
/** Returns an API node referring to `lxml.etree` */
|
||||
API::Node etreeFromString() { result = etree().getMember("fromstring") }
|
||||
|
||||
/** Returns an API node referring to `lxml.etree.parse` */
|
||||
API::Node etreeParse() { result = etree().getMember("parse") }
|
||||
|
||||
/** Returns an API node referring to `lxml.etree.parse` */
|
||||
API::Node libxml2parseFile() { result = API::moduleImport("libxml2").getMember("parseFile") }
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to `etree.XPath` or `etree.ETXPath` call.
|
||||
*
|
||||
* from lxml import etree
|
||||
* root = etree.XML("<xmlContent>")
|
||||
* find_text = etree.XPath("`sink`")
|
||||
* find_text = etree.ETXPath("`sink`")
|
||||
*/
|
||||
private class EtreeXpathArgument extends Sink {
|
||||
EtreeXpathArgument() { this = etree().getMember(["XPath", "ETXPath"]).getACall().getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to the `etree.XPath` call.
|
||||
*
|
||||
* from lxml import etree
|
||||
* root = etree.fromstring(file(XML_DB).read(), XMLParser())
|
||||
* find_text = root.xpath("`sink`")
|
||||
*/
|
||||
private class EtreeFromstringXpathArgument extends Sink {
|
||||
EtreeFromstringXpathArgument() {
|
||||
this = etreeFromString().getReturn().getMember("xpath").getACall().getArg(0)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to the `xpath` call to a parsed xml document.
|
||||
*
|
||||
* from lxml import etree
|
||||
* from io import StringIO
|
||||
* f = StringIO('<foo><bar></bar></foo>')
|
||||
* tree = etree.parse(f)
|
||||
* r = tree.xpath('`sink`')
|
||||
*/
|
||||
private class ParseXpathArgument extends Sink {
|
||||
ParseXpathArgument() { this = etreeParse().getReturn().getMember("xpath").getACall().getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to the `xpathEval` call to a parsed libxml2 document.
|
||||
*
|
||||
* import libxml2
|
||||
* tree = libxml2.parseFile("file.xml")
|
||||
* r = tree.xpathEval('`sink`')
|
||||
*/
|
||||
private class ParseFileXpathEvalArgument extends Sink {
|
||||
ParseFileXpathEvalArgument() {
|
||||
this = libxml2parseFile().getReturn().getMember("xpathEval").getACall().getArg(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
/**
|
||||
* @name XPath query built from user-controlled sources
|
||||
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious Xpath code by the user.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @precision high
|
||||
* @id py/xpath-injection
|
||||
* @tags security
|
||||
* external/cwe/cwe-643
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.security.strings.Untrusted
|
||||
/* Sources */
|
||||
import semmle.python.web.HttpRequest
|
||||
/* Sinks */
|
||||
import experimental.semmle.python.security.injection.Xpath
|
||||
|
||||
class XpathInjectionConfiguration extends TaintTracking::Configuration {
|
||||
XpathInjectionConfiguration() { this = "Xpath injection configuration" }
|
||||
|
||||
override predicate isSource(TaintTracking::Source source) {
|
||||
source instanceof HttpRequestTaintSource
|
||||
}
|
||||
|
||||
override predicate isSink(TaintTracking::Sink sink) {
|
||||
sink instanceof XpathInjection::XpathInjectionSink
|
||||
}
|
||||
}
|
||||
|
||||
from XpathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
|
||||
where config.hasFlowPath(src, sink)
|
||||
select sink.getSink(), src, sink, "This Xpath query depends on $@.", src.getSource(),
|
||||
"a user-provided value"
|
||||
@@ -0,0 +1,6 @@
|
||||
import python
|
||||
import semmle.python.security.performance.SuperlinearBackTracking
|
||||
|
||||
from PolynomialBackTrackingTerm t
|
||||
where t.getLocation().getFile().getBaseName() = "KnownCVEs.py"
|
||||
select t.getRegex(), t, t.getReason()
|
||||
@@ -0,0 +1,108 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
|
||||
<qhelp>
|
||||
|
||||
<include src="ReDoSIntroduction.inc.qhelp" />
|
||||
|
||||
<example>
|
||||
<p>
|
||||
|
||||
Consider this use of a regular expression, which removes
|
||||
all leading and trailing whitespace in a string:
|
||||
|
||||
</p>
|
||||
|
||||
<sample language="python">
|
||||
re.sub(r"^\s+|\s+$", "", text) # BAD
|
||||
</sample>
|
||||
|
||||
<p>
|
||||
|
||||
The sub-expression <code>"\s+$"</code> will match the
|
||||
whitespace characters in <code>text</code> from left to right, but it
|
||||
can start matching anywhere within a whitespace sequence. This is
|
||||
problematic for strings that do <strong>not</strong> end with a whitespace
|
||||
character. Such a string will force the regular expression engine to
|
||||
process each whitespace sequence once per whitespace character in the
|
||||
sequence.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
This ultimately means that the time cost of trimming a
|
||||
string is quadratic in the length of the string. So a string like
|
||||
<code>"a b"</code> will take milliseconds to process, but a similar
|
||||
string with a million spaces instead of just one will take several
|
||||
minutes.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
Avoid this problem by rewriting the regular expression to
|
||||
not contain the ambiguity about when to start matching whitespace
|
||||
sequences. For instance, by using a negative look-behind
|
||||
(<code>^\s+|(?<!\s)\s+$</code>), or just by using the built-in strip
|
||||
method (<code>text.strip()</code>).
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
Note that the sub-expression <code>"^\s+"</code> is
|
||||
<strong>not</strong> problematic as the <code>^</code> anchor restricts
|
||||
when that sub-expression can start matching, and as the regular
|
||||
expression engine matches from left to right.
|
||||
|
||||
</p>
|
||||
|
||||
</example>
|
||||
|
||||
<example>
|
||||
|
||||
<p>
|
||||
|
||||
As a similar, but slightly subtler problem, consider the
|
||||
regular expression that matches lines with numbers, possibly written
|
||||
using scientific notation:
|
||||
</p>
|
||||
|
||||
<sample language="python">
|
||||
^0\.\d+E?\d+$ # BAD
|
||||
</sample>
|
||||
|
||||
<p>
|
||||
|
||||
The problem with this regular expression is in the
|
||||
sub-expression <code>\d+E?\d+</code> because the second
|
||||
<code>\d+</code> can start matching digits anywhere after the first
|
||||
match of the first <code>\d+</code> if there is no <code>E</code> in
|
||||
the input string.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
This is problematic for strings that do <strong>not</strong>
|
||||
end with a digit. Such a string will force the regular expression
|
||||
engine to process each digit sequence once per digit in the sequence,
|
||||
again leading to a quadratic time complexity.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
To make the processing faster, the regular expression
|
||||
should be rewritten such that the two <code>\d+</code> sub-expressions
|
||||
do not have overlapping matches: <code>^0\.\d+(E\d+)?$</code>.
|
||||
|
||||
</p>
|
||||
|
||||
</example>
|
||||
|
||||
<include src="ReDoSReferences.inc.qhelp"/>
|
||||
|
||||
</qhelp>
|
||||
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @name Polynomial regular expression used on uncontrolled data
|
||||
* @description A regular expression that can require polynomial time
|
||||
* to match may be vulnerable to denial-of-service attacks.
|
||||
* @kind path-problem
|
||||
* @problem.severity warning
|
||||
* @precision high
|
||||
* @id py/polynomial-redos
|
||||
* @tags security
|
||||
* external/cwe/cwe-730
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.performance.SuperlinearBackTracking
|
||||
import semmle.python.security.dataflow.PolynomialReDoS
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
|
||||
PolynomialReDoS::Sink sinkNode, PolynomialBackTrackingTerm regexp
|
||||
where
|
||||
config.hasFlowPath(source, sink) and
|
||||
sinkNode = sink.getNode() and
|
||||
regexp.getRootTerm() = sinkNode.getRegExp()
|
||||
// not (
|
||||
// source.getNode().(Source).getKind() = "url" and
|
||||
// regexp.isAtEndLine()
|
||||
// )
|
||||
select sinkNode.getHighlight(), source, sink,
|
||||
"This $@ that depends on $@ may run slow on strings " + regexp.getPrefixMessage() +
|
||||
"with many repetitions of '" + regexp.getPumpString() + "'.", regexp, "regular expression",
|
||||
source.getNode(), "a user-provided value"
|
||||
34
python/ql/src/experimental/Security/CWE-730/ReDoS.qhelp
Normal file
34
python/ql/src/experimental/Security/CWE-730/ReDoS.qhelp
Normal file
@@ -0,0 +1,34 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
|
||||
<qhelp>
|
||||
|
||||
<include src="ReDoSIntroduction.inc.qhelp" />
|
||||
|
||||
<example>
|
||||
<p>
|
||||
Consider this regular expression:
|
||||
</p>
|
||||
<sample language="python">
|
||||
^_(__|.)+_$
|
||||
</sample>
|
||||
<p>
|
||||
Its sub-expression <code>"(__|.)+?"</code> can match the string <code>"__"</code> either by the
|
||||
first alternative <code>"__"</code> to the left of the <code>"|"</code> operator, or by two
|
||||
repetitions of the second alternative <code>"."</code> to the right. Thus, a string consisting
|
||||
of an odd number of underscores followed by some other character will cause the regular
|
||||
expression engine to run for an exponential amount of time before rejecting the input.
|
||||
</p>
|
||||
<p>
|
||||
This problem can be avoided by rewriting the regular expression to remove the ambiguity between
|
||||
the two branches of the alternative inside the repetition:
|
||||
</p>
|
||||
<sample language="python">
|
||||
^_(__|[^_])+_$
|
||||
</sample>
|
||||
</example>
|
||||
|
||||
<include src="ReDoSReferences.inc.qhelp"/>
|
||||
|
||||
</qhelp>
|
||||
25
python/ql/src/experimental/Security/CWE-730/ReDoS.ql
Normal file
25
python/ql/src/experimental/Security/CWE-730/ReDoS.ql
Normal file
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* @name Inefficient regular expression
|
||||
* @description A regular expression that requires exponential time to match certain inputs
|
||||
* can be a performance bottleneck, and may be vulnerable to denial-of-service
|
||||
* attacks.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @precision high
|
||||
* @id py/redos
|
||||
* @tags security
|
||||
* external/cwe/cwe-730
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.performance.ExponentialBackTracking
|
||||
|
||||
from RegExpTerm t, string pump, State s, string prefixMsg
|
||||
where
|
||||
hasReDoSResult(t, pump, s, prefixMsg) and
|
||||
// exclude verbose mode regexes for now
|
||||
not t.getRegex().getAMode() = "VERBOSE"
|
||||
select t,
|
||||
"This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
|
||||
"containing many repetitions of '" + pump + "'."
|
||||
@@ -0,0 +1,54 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>
|
||||
|
||||
Some regular expressions take a long time to match certain
|
||||
input strings to the point where the time it takes to match a string
|
||||
of length <i>n</i> is proportional to <i>n<sup>k</sup></i> or even
|
||||
<i>2<sup>n</sup></i>. Such regular expressions can negatively affect
|
||||
performance, or even allow a malicious user to perform a Denial of
|
||||
Service ("DoS") attack by crafting an expensive input string for the
|
||||
regular expression to match.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
The regular expression engine provided by Python uses a backtracking non-deterministic finite
|
||||
automata to implement regular expression matching. While this approach
|
||||
is space-efficient and allows supporting advanced features like
|
||||
capture groups, it is not time-efficient in general. The worst-case
|
||||
time complexity of such an automaton can be polynomial or even
|
||||
exponential, meaning that for strings of a certain shape, increasing
|
||||
the input length by ten characters may make the automaton about 1000
|
||||
times slower.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
Typically, a regular expression is affected by this
|
||||
problem if it contains a repetition of the form <code>r*</code> or
|
||||
<code>r+</code> where the sub-expression <code>r</code> is ambiguous
|
||||
in the sense that it can match some string in multiple ways. More
|
||||
information about the precise circumstances can be found in the
|
||||
references.
|
||||
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
|
||||
Modify the regular expression to remove the ambiguity, or
|
||||
ensure that the strings matched with the regular expression are short
|
||||
enough that the time-complexity does not matter.
|
||||
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,16 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<references>
|
||||
<li>
|
||||
OWASP:
|
||||
<a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.
|
||||
</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Time_complexity">Time complexity</a>.</li>
|
||||
<li>James Kirrage, Asiri Rathnayake, Hayo Thielecke:
|
||||
<a href="http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf">Static Analysis for Regular Expression Denial-of-Service Attack</a>.
|
||||
</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,45 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>
|
||||
Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
|
||||
be able to modify the meaning of the expression. In particular, such a user may be able to provide
|
||||
a regular expression fragment that takes exponential time in the worst case, and use that to
|
||||
perform a Denial of Service attack.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
Before embedding user input into a regular expression, use a sanitization function such as
|
||||
<code>re.escape</code> to escape meta-characters that have a special meaning regarding
|
||||
regular expressions' syntax.
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
The following examples are based on a simple Flask web server environment.
|
||||
</p>
|
||||
<p>
|
||||
The following example shows a HTTP request parameter that is used to construct a regular expression
|
||||
without sanitizing it first:
|
||||
</p>
|
||||
<sample src="re_bad.py" />
|
||||
<p>
|
||||
Instead, the request parameter should be sanitized first, for example using the function
|
||||
<code>re.escape</code>. This ensures that the user cannot insert characters which have a
|
||||
special meaning in regular expressions.
|
||||
</p>
|
||||
<sample src="re_good.py" />
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
|
||||
<li>Python docs: <a href="https://docs.python.org/3/library/re.html">re</a>.</li>
|
||||
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2631">RSPEC-2631</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* @name Regular expression injection
|
||||
* @description User input should not be used in regular expressions without first being escaped,
|
||||
* otherwise a malicious user may be able to inject an expression that could require
|
||||
* exponential time on certain inputs.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/regex-injection
|
||||
* @tags security
|
||||
* external/cwe/cwe-730
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
// determine precision above
|
||||
import python
|
||||
import experimental.semmle.python.security.injection.RegexInjection
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
|
||||
RegexInjectionSink regexInjectionSink, Attribute methodAttribute
|
||||
where
|
||||
config.hasFlowPath(source, sink) and
|
||||
regexInjectionSink = sink.getNode() and
|
||||
methodAttribute = regexInjectionSink.getRegexMethod()
|
||||
select sink.getNode(), source, sink,
|
||||
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
|
||||
source.getNode(), "user-provided value", methodAttribute,
|
||||
regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()
|
||||
15
python/ql/src/experimental/Security/CWE-730/re_bad.py
Normal file
15
python/ql/src/experimental/Security/CWE-730/re_bad.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from flask import request, Flask
|
||||
import re
|
||||
|
||||
|
||||
@app.route("/direct")
|
||||
def direct():
|
||||
unsafe_pattern = request.args["pattern"]
|
||||
re.search(unsafe_pattern, "")
|
||||
|
||||
|
||||
@app.route("/compile")
|
||||
def compile():
|
||||
unsafe_pattern = request.args["pattern"]
|
||||
compiled_pattern = re.compile(unsafe_pattern)
|
||||
compiled_pattern.search("")
|
||||
17
python/ql/src/experimental/Security/CWE-730/re_good.py
Normal file
17
python/ql/src/experimental/Security/CWE-730/re_good.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from flask import request, Flask
|
||||
import re
|
||||
|
||||
|
||||
@app.route("/direct")
|
||||
def direct():
|
||||
unsafe_pattern = request.args['pattern']
|
||||
safe_pattern = re.escape(unsafe_pattern)
|
||||
re.search(safe_pattern, "")
|
||||
|
||||
|
||||
@app.route("/compile")
|
||||
def compile():
|
||||
unsafe_pattern = request.args['pattern']
|
||||
safe_pattern = re.escape(unsafe_pattern)
|
||||
compiled_pattern = re.compile(safe_pattern)
|
||||
compiled_pattern.search("")
|
||||
@@ -0,0 +1,40 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
Passing user-controlled sources into NoSQL queries can result in a NoSQL injection flaw.
|
||||
This tainted NoSQL query containing a user-controlled source can then execute a malicious query in a NoSQL database such as MongoDB.
|
||||
In order for the user-controlled source to taint the NoSQL query, the user-controller source must be converted into a Python object using something like <code>json.loads</code> or <code>xmltodict.parse</code>.
|
||||
</p>
|
||||
<p>
|
||||
Because a user-controlled source is passed into the query, the malicious user can have complete control over the query itself.
|
||||
When the tainted query is executed, the malicious user can commit malicious actions such as bypassing role restrictions or accessing and modifying restricted data in the NoSQL database.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
NoSQL injections can be prevented by escaping user-input's special characters that are passed into the NoSQL query from the user-supplied source.
|
||||
Alternatively, using a sanitize library such as MongoSanitizer will ensure that user-supplied sources can not act as a malicious query.
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>In the example below, the user-supplied source is passed to a MongoDB function that queries the MongoDB database.</p>
|
||||
<sample src="examples/NoSQLInjection-bad.py" />
|
||||
<p> This can be fixed by using a sanitizer library like MongoSanitizer as shown in this annotated code version below.</p>
|
||||
<sample src="examples/NoSQLInjection-good.py" />
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>Mongoengine: <a href="http://mongoengine.org/">Documentation</a>.</li>
|
||||
<li>Flask-Mongoengine: <a href="http://docs.mongoengine.org/projects/flask-mongoengine/en/latest/">Documentation</a>.</li>
|
||||
<li>PyMongo: <a href="https://pypi.org/project/pymongo/">Documentation</a>.</li>
|
||||
<li>Flask-PyMongo: <a href="https://flask-pymongo.readthedocs.io/en/latest/">Documentation</a>.</li>
|
||||
<li>OWASP: <a href="https://owasp.org/www-pdf-archive/GOD16-NOSQL.pdf">NoSQL Injection</a>.</li>
|
||||
<li>Security Stack Exchange Discussion: <a href="https://security.stackexchange.com/questions/83231/mongodb-nosql-injection-in-python-code">Question 83231</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* @name NoSQL Injection
|
||||
* @description Building a NoSQL query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious NoSQL code by the user.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/nosql-injection
|
||||
* @tags experimental
|
||||
* security
|
||||
* external/cwe/cwe-943
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.semmle.python.security.injection.NoSQLInjection
|
||||
|
||||
from CustomPathNode source, CustomPathNode sink
|
||||
where noSQLInjectionFlow(source, sink)
|
||||
select sink, source, sink, "$@ NoSQL query contains an unsanitized $@", sink, "This", source,
|
||||
"user-provided value"
|
||||
@@ -0,0 +1,13 @@
|
||||
from flask import Flask, request
|
||||
from flask_pymongo import PyMongo
|
||||
import json
|
||||
|
||||
mongo = PyMongo(app)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def home_page():
|
||||
unsanitized_search = request.args['search']
|
||||
json_search = json.loads(unsanitized_search)
|
||||
|
||||
result = mongo.db.user.find({'name': json_search})
|
||||
@@ -0,0 +1,15 @@
|
||||
from flask import Flask, request
|
||||
from flask_pymongo import PyMongo
|
||||
from mongosanitizer.sanitizer import sanitize
|
||||
import json
|
||||
|
||||
mongo = PyMongo(app)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def home_page():
|
||||
unsafe_search = request.args['search']
|
||||
json_search = json.loads(unsafe_search)
|
||||
safe_search = sanitize(unsanitized_search)
|
||||
|
||||
result = client.db.collection.find_one({'data': safe_search})
|
||||
@@ -14,6 +14,73 @@ private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import experimental.semmle.python.Frameworks
|
||||
|
||||
/** Provides classes for modeling Regular Expression-related APIs. */
|
||||
module RegexExecution {
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RegexExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the executed expression.
|
||||
*/
|
||||
abstract DataFlow::Node getRegexNode();
|
||||
|
||||
/**
|
||||
* Gets the library used to execute the regular expression.
|
||||
*/
|
||||
abstract string getRegexModule();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegexExecution::Range` instead.
|
||||
*/
|
||||
class RegexExecution extends DataFlow::Node {
|
||||
RegexExecution::Range range;
|
||||
|
||||
RegexExecution() { this = range }
|
||||
|
||||
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
|
||||
|
||||
string getRegexModule() { result = range.getRegexModule() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling Regular Expression escape-related APIs. */
|
||||
module RegexEscape {
|
||||
/**
|
||||
* A data-flow node that escapes a regular expression.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RegexEscape` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the escaped expression.
|
||||
*/
|
||||
abstract DataFlow::Node getRegexNode();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that escapes a regular expression.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegexEscape::Range` instead.
|
||||
*/
|
||||
class RegexEscape extends DataFlow::Node {
|
||||
RegexEscape::Range range;
|
||||
|
||||
RegexEscape() { this = range }
|
||||
|
||||
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling XML parsing APIs. */
|
||||
module XMLParsing {
|
||||
/**
|
||||
@@ -99,3 +166,213 @@ class XMLParser extends DataFlow::Node {
|
||||
*/
|
||||
predicate mayBeDangerous() { range.mayBeDangerous() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling LDAP query execution-related APIs. */
|
||||
module LDAPQuery {
|
||||
/**
|
||||
* A data-flow node that collects methods executing a LDAP query.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `LDAPQuery` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the executed expression.
|
||||
*/
|
||||
abstract DataFlow::Node getQuery();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that collect methods executing a LDAP query.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `LDAPQuery::Range` instead.
|
||||
*/
|
||||
class LDAPQuery extends DataFlow::Node {
|
||||
LDAPQuery::Range range;
|
||||
|
||||
LDAPQuery() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the argument containing the executed expression.
|
||||
*/
|
||||
DataFlow::Node getQuery() { result = range.getQuery() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling LDAP components escape-related APIs. */
|
||||
module LDAPEscape {
|
||||
/**
|
||||
* A data-flow node that collects functions escaping LDAP components.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `LDAPEscape` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the escaped expression.
|
||||
*/
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that collects functions escaping LDAP components.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `LDAPEscape::Range` instead.
|
||||
*/
|
||||
class LDAPEscape extends DataFlow::Node {
|
||||
LDAPEscape::Range range;
|
||||
|
||||
LDAPEscape() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the argument containing the escaped expression.
|
||||
*/
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling LDAP bind-related APIs. */
|
||||
module LDAPBind {
|
||||
/**
|
||||
* A data-flow node that collects methods binding a LDAP connection.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `LDAPBind` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the binding host.
|
||||
*/
|
||||
abstract DataFlow::Node getHost();
|
||||
|
||||
/**
|
||||
* Gets the argument containing the binding expression.
|
||||
*/
|
||||
abstract DataFlow::Node getPassword();
|
||||
|
||||
/**
|
||||
* Holds if the binding process use SSL.
|
||||
*/
|
||||
abstract predicate useSSL();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that collects methods binding a LDAP connection.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `LDAPBind::Range` instead.
|
||||
*/
|
||||
class LDAPBind extends DataFlow::Node {
|
||||
LDAPBind::Range range;
|
||||
|
||||
LDAPBind() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the argument containing the binding host.
|
||||
*/
|
||||
DataFlow::Node getHost() { result = range.getHost() }
|
||||
|
||||
/**
|
||||
* Gets the argument containing the binding expression.
|
||||
*/
|
||||
DataFlow::Node getPassword() { result = range.getPassword() }
|
||||
|
||||
/**
|
||||
* Holds if the binding process use SSL.
|
||||
*/
|
||||
predicate useSSL() { range.useSSL() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling SQL sanitization libraries. */
|
||||
module SQLEscape {
|
||||
/**
|
||||
* A data-flow node that collects functions that escape SQL statements.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `SQLEscape` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the raw SQL statement.
|
||||
*/
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that collects functions escaping SQL statements.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `SQLEscape::Range` instead.
|
||||
*/
|
||||
class SQLEscape extends DataFlow::Node {
|
||||
SQLEscape::Range range;
|
||||
|
||||
SQLEscape() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the argument containing the raw SQL statement.
|
||||
*/
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling NoSQL execution APIs. */
|
||||
module NoSQLQuery {
|
||||
/**
|
||||
* A data-flow node that executes NoSQL queries.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `NoSQLQuery` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the NoSQL query to be executed. */
|
||||
abstract DataFlow::Node getQuery();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes NoSQL queries.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `NoSQLQuery::Range` instead.
|
||||
*/
|
||||
class NoSQLQuery extends DataFlow::Node {
|
||||
NoSQLQuery::Range range;
|
||||
|
||||
NoSQLQuery() { this = range }
|
||||
|
||||
/** Gets the argument that specifies the NoSQL query to be executed. */
|
||||
DataFlow::Node getQuery() { result = range.getQuery() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling NoSQL sanitization-related APIs. */
|
||||
module NoSQLSanitizer {
|
||||
/**
|
||||
* A data-flow node that collects functions sanitizing NoSQL queries.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `NoSQLSanitizer` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the NoSQL query to be sanitized. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that collects functions sanitizing NoSQL queries.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `NoSQLSanitizer::Range` instead.
|
||||
*/
|
||||
class NoSQLSanitizer extends DataFlow::Node {
|
||||
NoSQLSanitizer::Range range;
|
||||
|
||||
NoSQLSanitizer() { this = range }
|
||||
|
||||
/** Gets the argument that specifies the NoSQL query to be sanitized. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
@@ -4,3 +4,5 @@
|
||||
|
||||
private import experimental.semmle.python.frameworks.Stdlib
|
||||
private import experimental.semmle.python.frameworks.XML
|
||||
private import experimental.semmle.python.frameworks.LDAP
|
||||
private import experimental.semmle.python.frameworks.NoSQL
|
||||
|
||||
263
python/ql/src/experimental/semmle/python/frameworks/LDAP.qll
Normal file
263
python/ql/src/experimental/semmle/python/frameworks/LDAP.qll
Normal file
@@ -0,0 +1,263 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the LDAP libraries.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for Python's ldap-related libraries.
|
||||
*/
|
||||
private module LDAP {
|
||||
/**
|
||||
* Provides models for the `python-ldap` PyPI package (imported as `ldap`).
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/index.html
|
||||
*/
|
||||
private module LDAP2 {
|
||||
/** Gets a reference to the `ldap` module. */
|
||||
API::Node ldap() { result = API::moduleImport("ldap") }
|
||||
|
||||
/** Returns a `ldap` module instance */
|
||||
API::Node ldapInitialize() { result = ldap().getMember("initialize") }
|
||||
|
||||
/** Gets a reference to a `ldap` operation. */
|
||||
private DataFlow::TypeTrackingNode ldapOperation(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result.(DataFlow::AttrRead).getObject().getALocalSource() = ldapInitialize().getACall()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = ldapOperation(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* List of `ldap` methods used to execute a query.
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
|
||||
*/
|
||||
private class LDAP2QueryMethods extends string {
|
||||
LDAP2QueryMethods() {
|
||||
this in ["search", "search_s", "search_st", "search_ext", "search_ext_s"]
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets a reference to a `ldap` operation. */
|
||||
private DataFlow::Node ldapOperation() {
|
||||
ldapOperation(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
}
|
||||
|
||||
/** Gets a reference to a `ldap` query. */
|
||||
private DataFlow::Node ldapQuery() {
|
||||
result = ldapOperation() and
|
||||
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2QueryMethods
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `ldap` methods executing a query.
|
||||
*
|
||||
* See `LDAP2QueryMethods`
|
||||
*/
|
||||
private class LDAP2Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
|
||||
LDAP2Query() { this.getFunction() = ldapQuery() }
|
||||
|
||||
override DataFlow::Node getQuery() {
|
||||
result in [this.getArg(0), this.getArg(2), this.getArgByName("filterstr")]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List of `ldap` methods used for binding.
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
|
||||
*/
|
||||
private class LDAP2BindMethods extends string {
|
||||
LDAP2BindMethods() {
|
||||
this in [
|
||||
"bind", "bind_s", "simple_bind", "simple_bind_s", "sasl_interactive_bind_s",
|
||||
"sasl_non_interactive_bind_s", "sasl_external_bind_s", "sasl_gssapi_bind_s"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets a reference to a `ldap` bind. */
|
||||
private DataFlow::Node ldapBind() {
|
||||
result = ldapOperation() and
|
||||
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2BindMethods
|
||||
}
|
||||
|
||||
/**List of SSL-demanding options */
|
||||
private class LDAPSSLOptions extends DataFlow::Node {
|
||||
LDAPSSLOptions() { this = ldap().getMember("OPT_X_TLS_" + ["DEMAND", "HARD"]).getAUse() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `ldap` methods binding a connection.
|
||||
*
|
||||
* See `LDAP2BindMethods`
|
||||
*/
|
||||
private class LDAP2Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
|
||||
LDAP2Bind() { this.getFunction() = ldapBind() }
|
||||
|
||||
override DataFlow::Node getPassword() {
|
||||
result in [this.getArg(1), this.getArgByName("cred")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getHost() {
|
||||
exists(DataFlow::CallCfgNode initialize |
|
||||
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = initialize and
|
||||
initialize = ldapInitialize().getACall() and
|
||||
result = initialize.getArg(0)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate useSSL() {
|
||||
// use initialize to correlate `this` and so avoid FP in several instances
|
||||
exists(DataFlow::CallCfgNode initialize |
|
||||
// ldap.set_option(ldap.OPT_X_TLS_%s)
|
||||
ldap().getMember("set_option").getACall().getArg(_) instanceof LDAPSSLOptions
|
||||
or
|
||||
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = initialize and
|
||||
initialize = ldapInitialize().getACall() and
|
||||
(
|
||||
// ldap_connection.start_tls_s()
|
||||
// see https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#ldap.LDAPObject.start_tls_s
|
||||
exists(DataFlow::MethodCallNode startTLS |
|
||||
startTLS.getObject().getALocalSource() = initialize and
|
||||
startTLS.getMethodName() = "start_tls_s"
|
||||
)
|
||||
or
|
||||
// ldap_connection.set_option(ldap.OPT_X_TLS_%s, True)
|
||||
exists(DataFlow::CallCfgNode setOption |
|
||||
setOption.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
|
||||
initialize and
|
||||
setOption.getFunction().(DataFlow::AttrRead).getAttributeName() = "set_option" and
|
||||
setOption.getArg(0) instanceof LDAPSSLOptions and
|
||||
not DataFlow::exprNode(any(False falseExpr))
|
||||
.(DataFlow::LocalSourceNode)
|
||||
.flowsTo(setOption.getArg(1))
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find calls to `ldap.dn.escape_dn_chars`.
|
||||
*
|
||||
* See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
|
||||
*/
|
||||
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
|
||||
LDAP2EscapeDNCall() { this = ldap().getMember("dn").getMember("escape_dn_chars").getACall() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find calls to `ldap.filter.escape_filter_chars`.
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap-filter.html#ldap.filter.escape_filter_chars
|
||||
*/
|
||||
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
|
||||
LDAP2EscapeFilterCall() {
|
||||
this = ldap().getMember("filter").getMember("escape_filter_chars").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides models for the `ldap3` PyPI package
|
||||
*
|
||||
* See https://pypi.org/project/ldap3/
|
||||
*/
|
||||
private module LDAP3 {
|
||||
/** Gets a reference to the `ldap3` module. */
|
||||
API::Node ldap3() { result = API::moduleImport("ldap3") }
|
||||
|
||||
/** Gets a reference to the `ldap3` `utils` module. */
|
||||
API::Node ldap3Utils() { result = ldap3().getMember("utils") }
|
||||
|
||||
/** Returns a `ldap3` module `Server` instance */
|
||||
API::Node ldap3Server() { result = ldap3().getMember("Server") }
|
||||
|
||||
/** Returns a `ldap3` module `Connection` instance */
|
||||
API::Node ldap3Connection() { result = ldap3().getMember("Connection") }
|
||||
|
||||
/**
|
||||
* A class to find `ldap3` methods executing a query.
|
||||
*/
|
||||
private class LDAP3Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
|
||||
LDAP3Query() {
|
||||
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
|
||||
ldap3Connection().getACall() and
|
||||
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "search"
|
||||
}
|
||||
|
||||
override DataFlow::Node getQuery() { result in [this.getArg(0), this.getArg(1)] }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `ldap3` methods binding a connection.
|
||||
*/
|
||||
class LDAP3Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
|
||||
LDAP3Bind() { this = ldap3Connection().getACall() }
|
||||
|
||||
override DataFlow::Node getPassword() {
|
||||
result in [this.getArg(2), this.getArgByName("password")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getHost() {
|
||||
exists(DataFlow::CallCfgNode serverCall |
|
||||
serverCall = ldap3Server().getACall() and
|
||||
this.getArg(0).getALocalSource() = serverCall and
|
||||
result = serverCall.getArg(0)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate useSSL() {
|
||||
exists(DataFlow::CallCfgNode serverCall |
|
||||
serverCall = ldap3Server().getACall() and
|
||||
this.getArg(0).getALocalSource() = serverCall and
|
||||
DataFlow::exprNode(any(True trueExpr))
|
||||
.(DataFlow::LocalSourceNode)
|
||||
.flowsTo([serverCall.getArg(2), serverCall.getArgByName("use_ssl")])
|
||||
)
|
||||
or
|
||||
// ldap_connection.start_tls_s()
|
||||
// see https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#ldap.LDAPObject.start_tls_s
|
||||
exists(DataFlow::MethodCallNode startTLS |
|
||||
startTLS.getMethodName() = "start_tls_s" and
|
||||
startTLS.getObject().getALocalSource() = this
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find calls to `ldap3.utils.dn.escape_rdn`.
|
||||
*
|
||||
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
|
||||
*/
|
||||
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
|
||||
LDAP3EscapeDNCall() { this = ldap3Utils().getMember("dn").getMember("escape_rdn").getACall() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find calls to `ldap3.utils.conv.escape_filter_chars`.
|
||||
*
|
||||
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/conv.py#L91
|
||||
*/
|
||||
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
|
||||
LDAP3EscapeFilterCall() {
|
||||
this = ldap3Utils().getMember("conv").getMember("escape_filter_chars").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
}
|
||||
}
|
||||
215
python/ql/src/experimental/semmle/python/frameworks/NoSQL.qll
Normal file
215
python/ql/src/experimental/semmle/python/frameworks/NoSQL.qll
Normal file
@@ -0,0 +1,215 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the standard libraries.
|
||||
* Note: some modeling is done internally in the dataflow/taint tracking implementation.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
private module NoSQL {
|
||||
// API Nodes returning `Mongo` instances.
|
||||
/** Gets a reference to `pymongo.MongoClient` */
|
||||
private API::Node pyMongo() {
|
||||
result = API::moduleImport("pymongo").getMember("MongoClient").getReturn()
|
||||
}
|
||||
|
||||
/** Gets a reference to `flask_pymongo.PyMongo` */
|
||||
private API::Node flask_PyMongo() {
|
||||
result = API::moduleImport("flask_pymongo").getMember("PyMongo").getReturn()
|
||||
}
|
||||
|
||||
/** Gets a reference to `mongoengine` */
|
||||
private API::Node mongoEngine() { result = API::moduleImport("mongoengine") }
|
||||
|
||||
/** Gets a reference to `flask_mongoengine.MongoEngine` */
|
||||
private API::Node flask_MongoEngine() {
|
||||
result = API::moduleImport("flask_mongoengine").getMember("MongoEngine").getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to an initialized `Mongo` instance.
|
||||
* See `pyMongo()`, `flask_PyMongo()`
|
||||
*/
|
||||
private API::Node mongoInstance() {
|
||||
result = pyMongo() or
|
||||
result = flask_PyMongo()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to an initialized `Mongo` DB instance.
|
||||
* See `mongoEngine()`, `flask_MongoEngine()`
|
||||
*/
|
||||
private API::Node mongoDBInstance() {
|
||||
result = mongoEngine().getMember(["get_db", "connect"]).getReturn() or
|
||||
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn() or
|
||||
result = flask_MongoEngine().getMember("get_db").getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` DB use.
|
||||
*
|
||||
* See `mongoInstance()`, `mongoDBInstance()`.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode mongoDB(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
(
|
||||
exists(SubscriptNode subscript |
|
||||
subscript.getObject() = mongoInstance().getAUse().asCfgNode() and
|
||||
result.asCfgNode() = subscript
|
||||
)
|
||||
or
|
||||
result.(DataFlow::AttrRead).getObject() = mongoInstance().getAUse()
|
||||
or
|
||||
result = mongoDBInstance().getAUse()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = mongoDB(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` DB use.
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db` would be a use of a `Mongo` instance, and so the result.
|
||||
*/
|
||||
private DataFlow::Node mongoDB() { mongoDB(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection use.
|
||||
*
|
||||
* See `mongoDB()`.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode mongoCollection(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
(
|
||||
exists(SubscriptNode subscript | result.asCfgNode() = subscript |
|
||||
subscript.getObject() = mongoDB().asCfgNode()
|
||||
)
|
||||
or
|
||||
result.(DataFlow::AttrRead).getObject() = mongoDB()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = mongoCollection(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection use.
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db.user` would be a use of a `Mongo` collection, and so the result.
|
||||
*/
|
||||
private DataFlow::Node mongoCollection() {
|
||||
mongoCollection(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
}
|
||||
|
||||
/** This class represents names of find_* relevant `Mongo` collection-level operation methods. */
|
||||
private class MongoCollectionMethodNames extends string {
|
||||
MongoCollectionMethodNames() {
|
||||
this in [
|
||||
"find", "find_raw_batches", "find_one", "find_one_and_delete", "find_and_modify",
|
||||
"find_one_and_replace", "find_one_and_update", "find_one_or_404"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection method.
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db.user.find` would be a collection method, and so the result.
|
||||
*/
|
||||
private DataFlow::Node mongoCollectionMethod() {
|
||||
mongoCollection() = result.(DataFlow::AttrRead).getObject() and
|
||||
result.(DataFlow::AttrRead).getAttributeName() instanceof MongoCollectionMethodNames
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection method call
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db.user.find({'name': safe_search})` would be a collection method call, and so the result.
|
||||
*/
|
||||
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
|
||||
MongoCollectionCall() { this.getFunction() = mongoCollectionMethod() }
|
||||
|
||||
override DataFlow::Node getQuery() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a call from a class whose base is a reference to `mongoEngine()` or `flask_MongoEngine()`'s
|
||||
* `Document` or `EmbeddedDocument` objects and its attribute is `objects`.
|
||||
*
|
||||
* ```py
|
||||
* from flask_mongoengine import MongoEngine
|
||||
* db = MongoEngine(app)
|
||||
* class Movie(db.Document):
|
||||
* title = db.StringField(required=True)
|
||||
*
|
||||
* Movie.objects(__raw__=json_search)
|
||||
* ```
|
||||
*
|
||||
* `Movie.objects(__raw__=json_search)` would be the result.
|
||||
*/
|
||||
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
|
||||
MongoEngineObjectsCall() {
|
||||
this =
|
||||
[mongoEngine(), flask_MongoEngine()]
|
||||
.getMember(["Document", "EmbeddedDocument"])
|
||||
.getASubclass()
|
||||
.getMember("objects")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getQuery() { result = this.getArgByName(_) }
|
||||
}
|
||||
|
||||
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
|
||||
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
|
||||
MongoSanitizerCall() {
|
||||
this =
|
||||
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* ObjectId returns a string representing an id.
|
||||
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
|
||||
* then ObjectId will throw an error preventing the query from running.
|
||||
*/
|
||||
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
|
||||
BsonObjectIdCall() {
|
||||
this =
|
||||
API::moduleImport(["bson", "bson.objectid", "bson.json_util"])
|
||||
.getMember("ObjectId")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
}
|
||||
@@ -9,3 +9,91 @@ private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for Python's `re` library.
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html
|
||||
*/
|
||||
private module Re {
|
||||
/**
|
||||
* List of `re` methods immediately executing an expression.
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#module-contents
|
||||
*/
|
||||
private class RegexExecutionMethods extends string {
|
||||
RegexExecutionMethods() {
|
||||
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods immediately executing an expression.
|
||||
*
|
||||
* See `RegexExecutionMethods`
|
||||
*/
|
||||
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
DirectRegex() {
|
||||
this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and
|
||||
regexNode = this.getArg(0)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
|
||||
override string getRegexModule() { result = "re" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
|
||||
*
|
||||
* Given the following example:
|
||||
*
|
||||
* ```py
|
||||
* pattern = re.compile(input)
|
||||
* pattern.match(s)
|
||||
* ```
|
||||
*
|
||||
* This class will identify that `re.compile` compiles `input` and afterwards
|
||||
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
|
||||
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
|
||||
*
|
||||
*
|
||||
* See `RegexExecutionMethods`
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#regular-expression-objects
|
||||
*/
|
||||
private class CompiledRegex extends DataFlow::MethodCallNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
CompiledRegex() {
|
||||
exists(DataFlow::MethodCallNode patternCall |
|
||||
patternCall = API::moduleImport("re").getMember("compile").getACall() and
|
||||
patternCall.flowsTo(this.getObject()) and
|
||||
this.getMethodName() instanceof RegexExecutionMethods and
|
||||
regexNode = patternCall.getArg(0)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
|
||||
override string getRegexModule() { result = "re" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods escaping an expression.
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#re.escape
|
||||
*/
|
||||
class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
ReEscape() {
|
||||
this = API::moduleImport("re").getMember("escape").getACall() and
|
||||
regexNode = this.getArg(0)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package.
|
||||
* See https://pypi.org/project/xmltodict/
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `xmltodict` PyPI package.
|
||||
* See https://pypi.org/project/xmltodict/
|
||||
*/
|
||||
private module XmlToDictModel {
|
||||
/** Gets a reference to the `xmltodict` module. */
|
||||
API::Node xmltodict() { result = API::moduleImport("xmltodict") }
|
||||
|
||||
/**
|
||||
* A call to `xmltodict.parse`
|
||||
* See https://github.com/martinblech/xmltodict/blob/ae19c452ca000bf243bfc16274c060bf3bf7cf51/xmltodict.py#L198
|
||||
*/
|
||||
private class XmlToDictParseCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
XmlToDictParseCall() { this = xmltodict().getMember("parse").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() { none() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "XML" }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting LDAP injection vulnerabilities
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
import experimental.semmle.python.Concepts
|
||||
|
||||
string getFullHostRegex() { result = "(?i)ldap://.+" }
|
||||
|
||||
string getSchemaRegex() { result = "(?i)ldap(://)?" }
|
||||
|
||||
string getPrivateHostRegex() {
|
||||
result =
|
||||
"(?i)localhost(?:[:/?#].*)?|127\\.0\\.0\\.1(?:[:/?#].*)?|10(?:\\.[0-9]+){3}(?:[:/?#].*)?|172\\.16(?:\\.[0-9]+){2}(?:[:/?#].*)?|192.168(?:\\.[0-9]+){2}(?:[:/?#].*)?|\\[?0:0:0:0:0:0:0:1\\]?(?:[:/?#].*)?|\\[?::1\\]?(?:[:/?#].*)?"
|
||||
}
|
||||
|
||||
// "ldap://somethingon.theinternet.com"
|
||||
class LDAPFullHost extends StrConst {
|
||||
LDAPFullHost() {
|
||||
exists(string s |
|
||||
s = this.getText() and
|
||||
s.regexpMatch(getFullHostRegex()) and
|
||||
// check what comes after the `ldap://` prefix
|
||||
not s.substring(7, s.length()).regexpMatch(getPrivateHostRegex())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class LDAPSchema extends StrConst {
|
||||
LDAPSchema() { this.getText().regexpMatch(getSchemaRegex()) }
|
||||
}
|
||||
|
||||
class LDAPPrivateHost extends StrConst {
|
||||
LDAPPrivateHost() { this.getText().regexpMatch(getPrivateHostRegex()) }
|
||||
}
|
||||
|
||||
predicate concatAndCompareAgainstFullHostRegex(LDAPSchema schema, StrConst host) {
|
||||
not host instanceof LDAPPrivateHost and
|
||||
(schema.getText() + host.getText()).regexpMatch(getFullHostRegex())
|
||||
}
|
||||
|
||||
// "ldap://" + "somethingon.theinternet.com"
|
||||
class LDAPBothStrings extends BinaryExpr {
|
||||
LDAPBothStrings() { concatAndCompareAgainstFullHostRegex(this.getLeft(), this.getRight()) }
|
||||
}
|
||||
|
||||
// schema + host
|
||||
class LDAPBothVar extends BinaryExpr {
|
||||
LDAPBothVar() {
|
||||
exists(SsaVariable schemaVar, SsaVariable hostVar |
|
||||
this.getLeft() = schemaVar.getVariable().getALoad() and // getAUse is incompatible with Expr
|
||||
this.getRight() = hostVar.getVariable().getALoad() and
|
||||
concatAndCompareAgainstFullHostRegex(schemaVar
|
||||
.getDefinition()
|
||||
.getImmediateDominator()
|
||||
.getNode(), hostVar.getDefinition().getImmediateDominator().getNode())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// schema + "somethingon.theinternet.com"
|
||||
class LDAPVarString extends BinaryExpr {
|
||||
LDAPVarString() {
|
||||
exists(SsaVariable schemaVar |
|
||||
this.getLeft() = schemaVar.getVariable().getALoad() and
|
||||
concatAndCompareAgainstFullHostRegex(schemaVar
|
||||
.getDefinition()
|
||||
.getImmediateDominator()
|
||||
.getNode(), this.getRight())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// "ldap://" + host
|
||||
class LDAPStringVar extends BinaryExpr {
|
||||
LDAPStringVar() {
|
||||
exists(SsaVariable hostVar |
|
||||
this.getRight() = hostVar.getVariable().getALoad() and
|
||||
concatAndCompareAgainstFullHostRegex(this.getLeft(),
|
||||
hostVar.getDefinition().getImmediateDominator().getNode())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting LDAP insecure authentications.
|
||||
*/
|
||||
class LDAPInsecureAuthConfig extends TaintTracking::Configuration {
|
||||
LDAPInsecureAuthConfig() { this = "LDAPInsecureAuthConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
source instanceof RemoteFlowSource or
|
||||
source.asExpr() instanceof LDAPFullHost or
|
||||
source.asExpr() instanceof LDAPBothStrings or
|
||||
source.asExpr() instanceof LDAPBothVar or
|
||||
source.asExpr() instanceof LDAPVarString or
|
||||
source.asExpr() instanceof LDAPStringVar
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
exists(LDAPBind ldapBind | not ldapBind.useSSL() and sink = ldapBind.getHost())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting LDAP injection vulnerabilities
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting LDAP injections.
|
||||
*/
|
||||
class LDAPInjectionFlowConfig extends TaintTracking::Configuration {
|
||||
LDAPInjectionFlowConfig() { this = "LDAPInjectionFlowConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink = any(LDAPQuery ldapQuery).getQuery() }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) {
|
||||
sanitizer = any(LDAPEscape ldapEsc).getAnInput()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.DataFlow2
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.TaintTracking2
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
import semmle.python.security.dataflow.ChainedConfigs12
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.Concepts
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting string-to-dict conversions.
|
||||
*/
|
||||
class RFSToDictConfig extends TaintTracking::Configuration {
|
||||
RFSToDictConfig() { this = "RFSToDictConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
exists(Decoding decoding | decoding.getFormat() = "JSON" and sink = decoding.getOutput())
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) {
|
||||
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting NoSQL injections (previously converted to a dict).
|
||||
*/
|
||||
class FromDataDictToSink extends TaintTracking2::Configuration {
|
||||
FromDataDictToSink() { this = "FromDataDictToSink" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
exists(Decoding decoding | decoding.getFormat() = "JSON" and source = decoding.getOutput())
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink = any(NoSQLQuery noSQLQuery).getQuery() }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) {
|
||||
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A predicate checking string-to-dict conversion and its arrival to a NoSQL injection sink.
|
||||
*/
|
||||
predicate noSQLInjectionFlow(CustomPathNode source, CustomPathNode sink) {
|
||||
exists(
|
||||
RFSToDictConfig config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
|
||||
FromDataDictToSink config2
|
||||
|
|
||||
config.hasFlowPath(source.asNode1(), mid1) and
|
||||
config2.hasFlowPath(mid2, sink.asNode2()) and
|
||||
mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting regular expression injection
|
||||
* vulnerabilities.
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
|
||||
/**
|
||||
* A class to find methods executing regular expressions.
|
||||
*
|
||||
* See `RegexExecution`
|
||||
*/
|
||||
class RegexInjectionSink extends DataFlow::Node {
|
||||
string regexModule;
|
||||
Attribute regexMethod;
|
||||
|
||||
RegexInjectionSink() {
|
||||
exists(RegexExecution reExec |
|
||||
this = reExec.getRegexNode() and
|
||||
regexModule = reExec.getRegexModule() and
|
||||
regexMethod = reExec.(DataFlow::CallCfgNode).getFunction().asExpr().(Attribute)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the argument containing the executed expression.
|
||||
*/
|
||||
string getRegexModule() { result = regexModule }
|
||||
|
||||
/**
|
||||
* Gets the method used to execute the regular expression.
|
||||
*/
|
||||
Attribute getRegexMethod() { result = regexMethod }
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting regular expression injections.
|
||||
*/
|
||||
class RegexInjectionFlowConfig extends TaintTracking::Configuration {
|
||||
RegexInjectionFlowConfig() { this = "RegexInjectionFlowConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) {
|
||||
sanitizer = any(RegexEscape reEscape).getRegexNode()
|
||||
}
|
||||
}
|
||||
@@ -1,115 +0,0 @@
|
||||
/**
|
||||
* Provides class and predicates to track external data that
|
||||
* may represent malicious xpath query objects.
|
||||
*
|
||||
* This module is intended to be imported into a taint-tracking query
|
||||
* to extend `TaintKind` and `TaintSink`.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
import semmle.python.web.HttpRequest
|
||||
|
||||
/** Models Xpath Injection related classes and functions */
|
||||
module XpathInjection {
|
||||
/** Returns a class value which refers to `lxml.etree` */
|
||||
Value etree() { result = Value::named("lxml.etree") }
|
||||
|
||||
/** Returns a class value which refers to `lxml.etree` */
|
||||
Value libxml2parseFile() { result = Value::named("libxml2.parseFile") }
|
||||
|
||||
/** A generic taint sink that is vulnerable to Xpath injection. */
|
||||
abstract class XpathInjectionSink extends TaintSink { }
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to the `etree.XPath` call.
|
||||
*
|
||||
* from lxml import etree
|
||||
* root = etree.XML("<xmlContent>")
|
||||
* find_text = etree.XPath("`sink`")
|
||||
*/
|
||||
private class EtreeXpathArgument extends XpathInjectionSink {
|
||||
override string toString() { result = "lxml.etree.XPath" }
|
||||
|
||||
EtreeXpathArgument() {
|
||||
exists(CallNode call | call.getFunction().(AttrNode).getObject("XPath").pointsTo(etree()) |
|
||||
call.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
|
||||
}
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to the `etree.EtXpath` call.
|
||||
*
|
||||
* from lxml import etree
|
||||
* root = etree.XML("<xmlContent>")
|
||||
* find_text = etree.EtXPath("`sink`")
|
||||
*/
|
||||
private class EtreeETXpathArgument extends XpathInjectionSink {
|
||||
override string toString() { result = "lxml.etree.ETXpath" }
|
||||
|
||||
EtreeETXpathArgument() {
|
||||
exists(CallNode call | call.getFunction().(AttrNode).getObject("ETXPath").pointsTo(etree()) |
|
||||
call.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
|
||||
}
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to the `xpath` call to a parsed xml document.
|
||||
*
|
||||
* from lxml import etree
|
||||
* from io import StringIO
|
||||
* f = StringIO('<foo><bar></bar></foo>')
|
||||
* tree = etree.parse(f)
|
||||
* r = tree.xpath('`sink`')
|
||||
*/
|
||||
private class ParseXpathArgument extends XpathInjectionSink {
|
||||
override string toString() { result = "lxml.etree.parse.xpath" }
|
||||
|
||||
ParseXpathArgument() {
|
||||
exists(
|
||||
CallNode parseCall, CallNode xpathCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
||||
|
|
||||
parseCall.getFunction().(AttrNode).getObject("parse").pointsTo(etree()) and
|
||||
assign.getValue().(Call).getAFlowNode() = parseCall and
|
||||
xpathCall.getFunction().(AttrNode).getObject("xpath") = obj and
|
||||
var.getAUse() = obj and
|
||||
assign.getATarget() = var.getAStore() and
|
||||
xpathCall.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
|
||||
}
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to the `xpathEval` call to a parsed libxml2 document.
|
||||
*
|
||||
* import libxml2
|
||||
* tree = libxml2.parseFile("file.xml")
|
||||
* r = tree.xpathEval('`sink`')
|
||||
*/
|
||||
private class ParseFileXpathEvalArgument extends XpathInjectionSink {
|
||||
override string toString() { result = "libxml2.parseFile.xpathEval" }
|
||||
|
||||
ParseFileXpathEvalArgument() {
|
||||
exists(
|
||||
CallNode parseCall, CallNode xpathCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
||||
|
|
||||
parseCall.getFunction().(AttrNode).pointsTo(libxml2parseFile()) and
|
||||
assign.getValue().(Call).getAFlowNode() = parseCall and
|
||||
xpathCall.getFunction().(AttrNode).getObject("xpathEval") = obj and
|
||||
var.getAUse() = obj and
|
||||
assign.getATarget() = var.getAStore() and
|
||||
xpathCall.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
|
||||
}
|
||||
}
|
||||
@@ -1,420 +0,0 @@
|
||||
/** Step Summaries and Type Tracking */
|
||||
|
||||
private import TypeTrackerSpecific
|
||||
|
||||
/**
|
||||
* Any string that may appear as the name of a piece of content. This will usually include things like:
|
||||
* - Attribute names (in Python)
|
||||
* - Property names (in JavaScript)
|
||||
*
|
||||
* In general, this can also be used to model things like stores to specific list indices. To ensure
|
||||
* correctness, it is important that
|
||||
*
|
||||
* - different types of content do not have overlapping names, and
|
||||
* - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of
|
||||
* content instead.
|
||||
*/
|
||||
class ContentName extends string {
|
||||
ContentName() { this = getPossibleContentName() }
|
||||
}
|
||||
|
||||
/** Either a content name, or the empty string (representing no content). */
|
||||
class OptionalContentName extends string {
|
||||
OptionalContentName() { this instanceof ContentName or this = "" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
private newtype TStepSummary =
|
||||
LevelStep() or
|
||||
CallStep() or
|
||||
ReturnStep() or
|
||||
StoreStep(ContentName content) or
|
||||
LoadStep(ContentName content)
|
||||
|
||||
/**
|
||||
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
|
||||
*
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
class StepSummary extends TStepSummary {
|
||||
/** Gets a textual representation of this step summary. */
|
||||
string toString() {
|
||||
this instanceof LevelStep and result = "level"
|
||||
or
|
||||
this instanceof CallStep and result = "call"
|
||||
or
|
||||
this instanceof ReturnStep and result = "return"
|
||||
or
|
||||
exists(string content | this = StoreStep(content) | result = "store " + content)
|
||||
or
|
||||
exists(string content | this = LoadStep(content) | result = "load " + content)
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides predicates for updating step summaries (`StepSummary`s). */
|
||||
module StepSummary {
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
cached
|
||||
predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
|
||||
exists(Node mid | nodeFrom.flowsTo(mid) and smallstep(mid, nodeTo, summary))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Unlike `StepSummary::step`, this predicate does not compress
|
||||
* type-preserving steps.
|
||||
*/
|
||||
predicate smallstep(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
|
||||
jumpStep(nodeFrom, nodeTo) and
|
||||
summary = LevelStep()
|
||||
or
|
||||
callStep(nodeFrom, nodeTo) and summary = CallStep()
|
||||
or
|
||||
returnStep(nodeFrom, nodeTo) and
|
||||
summary = ReturnStep()
|
||||
or
|
||||
exists(string content |
|
||||
localSourceStoreStep(nodeFrom, nodeTo, content) and
|
||||
summary = StoreStep(content)
|
||||
or
|
||||
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
|
||||
*
|
||||
* Note that `nodeTo` will always be a local source node that flows to the place where the content
|
||||
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
|
||||
* from the point of view of the execution of the program.
|
||||
*
|
||||
* For instance, if we interpret attribute writes in Python as writing to content with the same
|
||||
* name as the attribute and consider the following snippet
|
||||
*
|
||||
* ```python
|
||||
* def foo(y):
|
||||
* x = Foo()
|
||||
* bar(x)
|
||||
* x.attr = y
|
||||
* baz(x)
|
||||
*
|
||||
* def bar(x):
|
||||
* z = x.attr
|
||||
* ```
|
||||
* for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
|
||||
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
|
||||
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
|
||||
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
|
||||
*/
|
||||
predicate localSourceStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
|
||||
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
|
||||
}
|
||||
}
|
||||
|
||||
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to track a value to a given dataflow node.
|
||||
*
|
||||
* This can be used to track objects that implement a certain API in order to
|
||||
* recognize calls to that API. Note that type-tracking does not by itself provide a
|
||||
* source/sink relation, that is, it may determine that a node has a given type,
|
||||
* but it won't determine where that type came from.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```ql
|
||||
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* result = myType(t2).track(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
|
||||
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
|
||||
*/
|
||||
class TypeTracker extends TTypeTracker {
|
||||
Boolean hasCall;
|
||||
OptionalContentName content;
|
||||
|
||||
TypeTracker() { this = MkTypeTracker(hasCall, content) }
|
||||
|
||||
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
|
||||
cached
|
||||
TypeTracker append(StepSummary step) {
|
||||
step = LevelStep() and result = this
|
||||
or
|
||||
step = CallStep() and result = MkTypeTracker(true, content)
|
||||
or
|
||||
step = ReturnStep() and hasCall = false and result = this
|
||||
or
|
||||
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
|
||||
or
|
||||
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this summary. */
|
||||
string toString() {
|
||||
exists(string withCall, string withContent |
|
||||
(if hasCall = true then withCall = "with" else withCall = "without") and
|
||||
(if content != "" then withContent = " with content " + content else withContent = "") and
|
||||
result = "type tracker " + withCall + " call steps" + withContent
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking.
|
||||
*/
|
||||
predicate start() { hasCall = false and content = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
|
||||
* The type tracking only ends after the content has been loaded.
|
||||
*/
|
||||
predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking
|
||||
* when tracking a parameter into a call, but not out of it.
|
||||
*/
|
||||
predicate call() { hasCall = true and content = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the end point of type tracking.
|
||||
*/
|
||||
predicate end() { content = "" }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Holds if this type has been tracked into a call.
|
||||
*/
|
||||
boolean hasCall() { result = hasCall }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Gets the content associated with this type tracker.
|
||||
*/
|
||||
string getContent() { result = content }
|
||||
|
||||
/**
|
||||
* Gets a type tracker that starts where this one has left off to allow continued
|
||||
* tracking.
|
||||
*
|
||||
* This predicate is only defined if the type is not associated to a piece of content.
|
||||
*/
|
||||
TypeTracker continue() { content = "" and result = this }
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
|
||||
result = this.append(pragma[only_bind_into](summary))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Unlike `TypeTracker::step`, this predicate exposes all edges
|
||||
* in the flow graph, and not just the edges between `Node`s.
|
||||
* It may therefore be less performant.
|
||||
*
|
||||
* Type tracking predicates using small steps typically take the following form:
|
||||
* ```ql
|
||||
* DataFlow::Node myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* t = t2.smallstep(myType(t2), result)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() {
|
||||
* result = myType(DataFlow::TypeTracker::end())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
|
||||
result = this.append(summary)
|
||||
)
|
||||
or
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
result = this
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides predicates for implementing custom `TypeTracker`s. */
|
||||
module TypeTracker {
|
||||
/**
|
||||
* Gets a valid end point of type tracking.
|
||||
*/
|
||||
TypeTracker end() { result.end() }
|
||||
}
|
||||
|
||||
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to back-track a use of a value to a given dataflow node.
|
||||
*
|
||||
* This can for example be used to track callbacks that are passed to a certain API,
|
||||
* so we can model specific parameters of that callback as having a certain type.
|
||||
*
|
||||
* Note that type back-tracking does not provide a source/sink relation, that is,
|
||||
* it may determine that a node will be used in an API call somewhere, but it won't
|
||||
* determine exactly where that use was, or the path that led to the use.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for back-tracking some callback type `myCallback`:
|
||||
*
|
||||
* ```ql
|
||||
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
|
||||
* t.start() and
|
||||
* result = (< some API call >).getArgument(< n >).getALocalSource()
|
||||
* or
|
||||
* exists (DataFlow::TypeBackTracker t2 |
|
||||
* result = myCallback(t2).backtrack(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
|
||||
* `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
|
||||
*/
|
||||
class TypeBackTracker extends TTypeBackTracker {
|
||||
Boolean hasReturn;
|
||||
string content;
|
||||
|
||||
TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
|
||||
|
||||
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
|
||||
TypeBackTracker prepend(StepSummary step) {
|
||||
step = LevelStep() and result = this
|
||||
or
|
||||
step = CallStep() and hasReturn = false and result = this
|
||||
or
|
||||
step = ReturnStep() and result = MkTypeBackTracker(true, content)
|
||||
or
|
||||
exists(string p |
|
||||
step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
|
||||
)
|
||||
or
|
||||
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this summary. */
|
||||
string toString() {
|
||||
exists(string withReturn, string withContent |
|
||||
(if hasReturn = true then withReturn = "with" else withReturn = "without") and
|
||||
(if content != "" then withContent = " with content " + content else withContent = "") and
|
||||
result = "type back-tracker " + withReturn + " return steps" + withContent
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking.
|
||||
*/
|
||||
predicate start() { hasReturn = false and content = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the end point of type tracking.
|
||||
*/
|
||||
predicate end() { content = "" }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Holds if this type has been back-tracked into a call through return edge.
|
||||
*/
|
||||
boolean hasReturn() { result = hasReturn }
|
||||
|
||||
/**
|
||||
* Gets a type tracker that starts where this one has left off to allow continued
|
||||
* tracking.
|
||||
*
|
||||
* This predicate is only defined if the type has not been tracked into a piece of content.
|
||||
*/
|
||||
TypeBackTracker continue() { content = "" and result = this }
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a backwards
|
||||
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
|
||||
this = result.prepend(pragma[only_bind_into](summary))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a backwards
|
||||
* local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
|
||||
*
|
||||
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
|
||||
* in the flowgraph, and not just the edges between
|
||||
* `LocalSourceNode`s. It may therefore be less performant.
|
||||
*
|
||||
* Type tracking predicates using small steps typically take the following form:
|
||||
* ```ql
|
||||
* DataFlow::Node myType(DataFlow::TypeBackTracker t) {
|
||||
* t.start() and
|
||||
* result = < some API call >.getArgument(< n >)
|
||||
* or
|
||||
* exists (DataFlow::TypeBackTracker t2 |
|
||||
* t = t2.smallstep(result, myType(t2))
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() {
|
||||
* result = myType(DataFlow::TypeBackTracker::end())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
|
||||
this = result.prepend(summary)
|
||||
)
|
||||
or
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
this = result
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides predicates for implementing custom `TypeBackTracker`s. */
|
||||
module TypeBackTracker {
|
||||
/**
|
||||
* Gets a valid end point of type back-tracking.
|
||||
*/
|
||||
TypeBackTracker end() { result.end() }
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user