Merge branch 'main' into jorgectf/python/deserialization

This commit is contained in:
Rasmus Wriedt Larsen
2021-09-28 16:49:33 +02:00
7130 changed files with 597514 additions and 224228 deletions

View File

@@ -1,20 +0,0 @@
/**
* Contains customizations to the standard library.
*
* This module is imported by `python.qll`, so any customizations defined here automatically
* apply to all queries.
*
* Typical examples of customizations include adding new subclasses of abstract classes such as
* the `RemoteFlowSource::Range` and `AdditionalTaintStep` classes associated with the security
* queries to model frameworks that are not covered by the standard library.
*/
import python
/* General import that is useful */
// import semmle.python.dataflow.new.DataFlow
//
/* for extending `TaintTracking::AdditionalTaintStep` */
// import semmle.python.dataflow.new.TaintTracking
//
/* for extending `RemoteFlowSource::Range` */
// import semmle.python.dataflow.new.RemoteFlowSources

View File

@@ -4,18 +4,22 @@
* @kind problem
* @tags security
* correctness
* security/cwe/cwe-94
* security/cwe/cwe-95
* @problem.severity error
* @security-severity 9.8
* @sub-severity high
* @precision high
* @id py/use-of-input
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.ApiGraphs
from CallNode call, Context context, ControlFlowNode func
from DataFlow::CallCfgNode call
where
context.getAVersion().includes(2, _) and
call.getFunction() = func and
func.pointsTo(context, Value::named("input"), _) and
not func.pointsTo(context, Value::named("raw_input"), _)
major_version() = 2 and
call = API::builtin("input").getACall() and
call != API::builtin("raw_input").getACall()
select call, "The unsafe built-in function 'input' is used in Python 2."

View File

@@ -12,88 +12,12 @@
*/
import python
import semmle.python.security.Paths
import semmle.python.functions.ModificationOfParameterWithDefault
import DataFlow::PathGraph
predicate safe_method(string name) {
name = "count" or
name = "index" or
name = "copy" or
name = "get" or
name = "has_key" or
name = "items" or
name = "keys" or
name = "values" or
name = "iteritems" or
name = "iterkeys" or
name = "itervalues" or
name = "__contains__" or
name = "__getitem__" or
name = "__getattribute__"
}
/** Gets the truthiness (non emptyness) of the default of `p` if that value is mutable */
private boolean mutableDefaultValue(Parameter p) {
exists(Dict d | p.getDefault() = d |
exists(d.getAKey()) and result = true
or
not exists(d.getAKey()) and result = false
)
or
exists(List l | p.getDefault() = l |
exists(l.getAnElt()) and result = true
or
not exists(l.getAnElt()) and result = false
)
}
class NonEmptyMutableValue extends TaintKind {
NonEmptyMutableValue() { this = "non-empty mutable value" }
}
class EmptyMutableValue extends TaintKind {
EmptyMutableValue() { this = "empty mutable value" }
override boolean booleanValue() { result = false }
}
class MutableDefaultValue extends TaintSource {
boolean nonEmpty;
MutableDefaultValue() { nonEmpty = mutableDefaultValue(this.(NameNode).getNode()) }
override string toString() { result = "mutable default value" }
override predicate isSourceOf(TaintKind kind) {
nonEmpty = false and kind instanceof EmptyMutableValue
or
nonEmpty = true and kind instanceof NonEmptyMutableValue
}
}
private ClassValue mutable_class() {
result = Value::named("list") or
result = Value::named("dict")
}
class Mutation extends TaintSink {
Mutation() {
exists(AugAssign a | a.getTarget().getAFlowNode() = this)
or
exists(Call c, Attribute a | c.getFunc() = a |
a.getObject().getAFlowNode() = this and
not safe_method(a.getName()) and
this.(ControlFlowNode).pointsTo().getClass() = mutable_class()
)
}
override predicate sinks(TaintKind kind) {
kind instanceof EmptyMutableValue
or
kind instanceof NonEmptyMutableValue
}
}
from TaintedPathSource src, TaintedPathSink sink
where src.flowsTo(sink)
select sink.getSink(), src, sink, "$@ flows to here and is mutated.", src.getSource(),
from
ModificationOfParameterWithDefault::Configuration config, DataFlow::PathNode source,
DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is mutated.", source.getNode(),
"Default value"

View File

@@ -9,7 +9,7 @@ information being thrown away.</p>
<p>A return value is considered to be trivial if it is <code>None</code> or it is a parameter (parameters, usually <code>self</code> are often
returned to assist with method chaining, but can be ignored).
A return value is also assumed to be trivial if it is ignored for 75% or more of calls.
A return value is also assumed to be trivial if it is ignored for more than 25% of calls.
</p>
</overview>

View File

@@ -4,7 +4,9 @@
* and is therefore associated with security risks.
* @kind problem
* @tags security
* external/cwe/cwe-200
* @problem.severity error
* @security-severity 6.5
* @sub-severity low
* @precision high
* @id py/bind-socket-all-network-interfaces
@@ -25,7 +27,7 @@ private string vulnerableHostname() {
}
/** Gets a reference to a hostname that can be used to bind to all interfaces. */
private DataFlow::LocalSourceNode vulnerableHostnameRef(DataFlow::TypeTracker t, string hostname) {
private DataFlow::TypeTrackingNode vulnerableHostnameRef(DataFlow::TypeTracker t, string hostname) {
t.start() and
exists(StrConst allInterfacesStrConst | hostname = vulnerableHostname() |
allInterfacesStrConst.getText() = hostname and
@@ -41,7 +43,7 @@ DataFlow::Node vulnerableHostnameRef(string hostname) {
}
/** Gets a reference to a tuple for which the first element is a hostname that can be used to bind to all interfaces. */
private DataFlow::LocalSourceNode vulnerableAddressTuple(DataFlow::TypeTracker t, string hostname) {
private DataFlow::TypeTrackingNode vulnerableAddressTuple(DataFlow::TypeTracker t, string hostname) {
t.start() and
result.asExpr() = any(Tuple tup | tup.getElt(0) = vulnerableHostnameRef(hostname).asExpr())
or

View File

@@ -5,6 +5,7 @@
* @kind path-problem
* @precision low
* @problem.severity error
* @security-severity 7.8
* @tags security external/cwe/cwe-20
*/

View File

@@ -3,6 +3,7 @@
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
* @kind problem
* @problem.severity warning
* @security-severity 7.8
* @precision high
* @id py/incomplete-hostname-regexp
* @tags correctness

View File

@@ -3,6 +3,7 @@
* @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing.
* @kind problem
* @problem.severity warning
* @security-severity 7.8
* @precision high
* @id py/incomplete-url-substring-sanitization
* @tags correctness

View File

@@ -3,6 +3,7 @@
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
* @kind path-problem
* @problem.severity error
* @security-severity 7.5
* @sub-severity high
* @precision high
* @id py/path-injection

View File

@@ -6,6 +6,7 @@
* @kind path-problem
* @id py/tarslip
* @problem.severity error
* @security-severity 7.5
* @precision medium
* @tags security
* external/cwe/cwe-022

View File

@@ -4,6 +4,7 @@
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @security-severity 9.8
* @sub-severity high
* @precision high
* @id py/command-line-injection
@@ -18,7 +19,7 @@ import python
import semmle.python.security.dataflow.CommandInjection
import DataFlow::PathGraph
from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from CommandInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -4,6 +4,7 @@
* cause a cross-site scripting vulnerability.
* @kind problem
* @problem.severity error
* @security-severity 6.1
* @precision medium
* @id py/jinja2/autoescape-false
* @tags security

View File

@@ -4,6 +4,7 @@
* allows for a cross-site scripting vulnerability.
* @kind path-problem
* @problem.severity error
* @security-severity 6.1
* @sub-severity high
* @precision high
* @id py/reflective-xss
@@ -16,7 +17,7 @@ import python
import semmle.python.security.dataflow.ReflectedXSS
import DataFlow::PathGraph
from ReflectedXssConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from ReflectedXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
source.getNode(), "a user-provided value"

View File

@@ -9,6 +9,13 @@ If a database query (such as a SQL or NoSQL query) is built from
user-provided data without sufficient sanitization, a user
may be able to run malicious database queries.
</p>
<p>
This also includes using the <code>TextClause</code> class in the
<code><a href="https://pypi.org/project/SQLAlchemy/">SQLAlchemy</a></code> PyPI package,
which is used to represent a literal SQL fragment and is inserted directly into the
final SQL when used in a query built using the ORM.
</p>
</overview>
<recommendation>
@@ -52,5 +59,6 @@ vulnerable to SQL injection attacks. In this example, if <code>username</code> w
<references>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/SQL_injection">SQL injection</a>.</li>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html">SQL Injection Prevention Cheat Sheet</a>.</li>
<li><a href="https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text.params.text">SQLAlchemy documentation for TextClause</a>.</li>
</references>
</qhelp>

View File

@@ -4,6 +4,7 @@
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @security-severity 8.8
* @precision high
* @id py/sql-injection
* @tags security
@@ -15,7 +16,7 @@ import python
import semmle.python.security.dataflow.SqlInjection
import DataFlow::PathGraph
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from SqlInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -4,6 +4,7 @@
* code execution.
* @kind path-problem
* @problem.severity error
* @security-severity 9.3
* @sub-severity high
* @precision high
* @id py/code-injection
@@ -18,7 +19,7 @@ import python
import semmle.python.security.dataflow.CodeInjection
import DataFlow::PathGraph
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from CodeInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
source.getNode(), "A user-provided value"

View File

@@ -5,6 +5,7 @@
* developing a subsequent exploit.
* @kind path-problem
* @problem.severity error
* @security-severity 5.4
* @precision high
* @id py/stack-trace-exposure
* @tags security
@@ -16,7 +17,7 @@ import python
import semmle.python.security.dataflow.StackTraceExposure
import DataFlow::PathGraph
from StackTraceExposureConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from StackTraceExposure::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ may be exposed to an external user", source.getNode(),
"Error information"

View File

@@ -3,6 +3,7 @@
* @description Running a Flask app in debug mode may allow an attacker to run arbitrary code through the Werkzeug debugger.
* @kind problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id py/flask-debug
* @tags security
@@ -16,7 +17,7 @@ import semmle.python.ApiGraphs
import semmle.python.frameworks.Flask
/** Gets a reference to a truthy literal. */
private DataFlow::LocalSourceNode truthyLiteral(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode truthyLiteral(DataFlow::TypeTracker t) {
t.start() and
result.asExpr().(ImmutableLiteral).booleanValue() = true
or

View File

@@ -3,6 +3,7 @@
* @description Accepting unknown host keys can allow man-in-the-middle attacks.
* @kind problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id py/paramiko-missing-host-key-validation
* @tags security

View File

@@ -7,7 +7,7 @@
<p>
Encryption is key to the security of most, if not all, online communication.
Using Transport Layer Security (TLS) can ensure that communication cannot be interrupted by an interloper.
For this reason, is is unwise to disable the verification that TLS provides.
For this reason, it is unwise to disable the verification that TLS provides.
Functions in the <code>requests</code> module provide verification by default, and it is only when
explicitly turned off using <code>verify=False</code> that no verification occurs.
</p>

View File

@@ -3,6 +3,7 @@
* @description Making a request without certificate validation can allow man-in-the-middle attacks.
* @kind problem
* @problem.severity error
* @security-severity 7.5
* @precision medium
* @id py/request-without-cert-validation
* @tags security

View File

@@ -4,6 +4,7 @@
* expose it to an attacker.
* @kind path-problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id py/clear-text-logging-sensitive-data
* @tags security
@@ -13,25 +14,13 @@
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
private import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
import semmle.python.security.dataflow.CleartextLogging::CleartextLogging
class CleartextLoggingConfiguration extends TaintTracking::Configuration {
CleartextLoggingConfiguration() { this = "ClearTextLogging" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextLogging::Sink and
kind instanceof SensitiveData
}
}
from CleartextLoggingConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data returned by $@ is logged here.",
source.getSource(), source.getCfgNode().(SensitiveData::Source).repr()
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, string classification
where
config.hasFlowPath(source, sink) and
classification = source.getNode().(Source).getClassification()
select sink.getNode(), source, sink, "$@ is logged here.", source.getNode(),
"Sensitive data (" + classification + ")"

View File

@@ -4,6 +4,7 @@
* attacker.
* @kind path-problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id py/clear-text-storage-sensitive-data
* @tags security
@@ -13,25 +14,13 @@
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
private import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
import semmle.python.security.dataflow.CleartextStorage::CleartextStorage
class CleartextStorageConfiguration extends TaintTracking::Configuration {
CleartextStorageConfiguration() { this = "ClearTextStorage" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextStorage::Sink and
kind instanceof SensitiveData
}
}
from CleartextStorageConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data from $@ is stored here.", source.getSource(),
source.getCfgNode().(SensitiveData::Source).repr()
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, string classification
where
config.hasFlowPath(source, sink) and
classification = source.getNode().(Source).getClassification()
select sink.getNode(), source, sink, "$@ is stored here.", source.getNode(),
"Sensitive data (" + classification + ")"

View File

@@ -3,6 +3,7 @@
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
* @kind problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id py/weak-crypto-key
* @tags security

View File

@@ -15,22 +15,28 @@
secure than it appears to be.
</p>
<p>
This query alerts on any use of a weak cryptographic algorithm, that is
not a hashing algorithm. Use of broken or weak cryptographic hash
functions are handled by the
<code>py/weak-sensitive-data-hashing</code> query.
</p>
</overview>
<recommendation>
<p>
Ensure that you use a strong, modern cryptographic
algorithm. Use at least AES-128 or RSA-2048 for
encryption, and SHA-2 or SHA-3 for secure hashing.
algorithm, such as AES-128 or RSA-2048.
</p>
</recommendation>
<example>
<p>
The following code uses the <code>pycrypto</code>
The following code uses the <code>pycryptodome</code>
library to encrypt some secret data. When you create a cipher using
<code>pycrypto</code> you must specify the encryption
<code>pycryptodome</code> you must specify the encryption
algorithm to use. The first example uses DES, which is an
older algorithm that is now considered weak. The second
example uses AES, which is a stronger modern algorithm.
@@ -39,8 +45,12 @@
<sample src="examples/broken_crypto.py" />
<p>
WARNING: Although the second example above is more robust,
pycrypto is no longer actively maintained so we recommend using <code>cryptography</code> instead.
NOTICE: the original
<code><a href="https://pypi.org/project/pycrypto/">pycrypto</a></code>
PyPI package that provided the <code>Crypto</code> module is not longer
actively maintained, so you should use the
<code><a href="https://pypi.org/project/pycryptodome/">pycryptodome</a></code>
PyPI package instead (which has a compatible API).
</p>
</example>

View File

@@ -1,8 +1,9 @@
/**
* @name Use of a broken or weak cryptographic algorithm
* @description Using broken or weak cryptographic algorithms can compromise security.
* @kind path-problem
* @kind problem
* @problem.severity warning
* @security-severity 7.5
* @precision high
* @id py/weak-cryptographic-algorithm
* @tags security
@@ -10,21 +11,15 @@
*/
import python
import semmle.python.security.Paths
import semmle.python.security.SensitiveData
import semmle.python.security.Crypto
import semmle.python.Concepts
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof SensitiveDataSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
}
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
src.getSource(), "Sensitive data"
from Cryptography::CryptographicOperation operation, Cryptography::CryptographicAlgorithm algorithm
where
algorithm = operation.getAlgorithm() and
algorithm.isWeak() and
// `Cryptography::HashingAlgorithm` and `Cryptography::PasswordHashingAlgorithm` are
// handled by `py/weak-sensitive-data-hashing`
algorithm instanceof Cryptography::EncryptionAlgorithm
select operation,
"The cryptographic algorithm " + algorithm.getName() +
" is broken or weak, and should not be used."

View File

@@ -5,6 +5,7 @@
* @id py/insecure-default-protocol
* @kind problem
* @problem.severity warning
* @security-severity 7.5
* @precision high
* @tags security
* external/cwe/cwe-327

View File

@@ -4,6 +4,7 @@
* @id py/insecure-protocol
* @kind problem
* @problem.severity warning
* @security-severity 7.5
* @precision high
* @tags security
* external/cwe/cwe-327
@@ -26,37 +27,33 @@ class ProtocolConfiguration extends DataFlow::Node {
unsafe_context_creation(this, _)
}
AstNode getNode() { result = this.asCfgNode().(CallNode).getFunction().getNode() }
DataFlow::Node getNode() { result = this.(DataFlow::CallCfgNode).getFunction() }
}
// Helper for pretty printer `callName`.
// This is a consequence of missing pretty priting.
// We do not want to evaluate our bespoke pretty printer
// for all `AstNode`s so we define a sub class of interesting ones.
//
// Note that AstNode is abstract and AstNode_ is a library class, so
// we have to extend @py_ast_node.
class Nameable extends @py_ast_node {
// for all `DataFlow::Node`s so we define a sub class of interesting ones.
class Nameable extends DataFlow::Node {
Nameable() {
this = any(ProtocolConfiguration pc).getNode()
or
exists(Nameable attr | this = attr.(Attribute).getObject())
this = any(Nameable attr).(DataFlow::AttrRef).getObject()
}
string toString() { result = "AstNode" }
}
string callName(Nameable call) {
result = call.(Name).getId()
result = call.asExpr().(Name).getId()
or
exists(Attribute a | a = call | result = callName(a.getObject()) + "." + a.getName())
exists(DataFlow::AttrRef a | a = call |
result = callName(a.getObject()) + "." + a.getAttributeName()
)
}
string configName(ProtocolConfiguration protocolConfiguration) {
result =
"call to " + callName(protocolConfiguration.asCfgNode().(CallNode).getFunction().getNode())
result = "call to " + callName(protocolConfiguration.(DataFlow::CallCfgNode).getFunction())
or
not protocolConfiguration.asCfgNode() instanceof CallNode and
not protocolConfiguration instanceof DataFlow::CallCfgNode and
not protocolConfiguration instanceof ContextCreation and
result = "context modification"
}

View File

@@ -13,12 +13,12 @@ class PyOpenSSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
}
override string getProtocol() {
exists(ControlFlowNode protocolArg, PyOpenSSL pyo |
protocolArg in [node.getArg(0), node.getArgByName("method")]
exists(DataFlow::Node protocolArg, PyOpenSSL pyo |
protocolArg in [this.getArg(0), this.getArgByName("method")]
|
protocolArg =
[pyo.specific_version(result).getAUse(), pyo.unspecific_version(result).getAUse()]
.asCfgNode()
protocolArg in [
pyo.specific_version(result).getAUse(), pyo.unspecific_version(result).getAUse()
]
)
}
}
@@ -29,7 +29,7 @@ class ConnectionCall extends ConnectionCreation, DataFlow::CallCfgNode {
}
override DataFlow::CfgNode getContext() {
result.getNode() in [node.getArg(0), node.getArgByName("context")]
result in [this.getArg(0), this.getArgByName("context")]
}
}
@@ -43,8 +43,8 @@ class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode {
}
override ProtocolVersion getRestriction() {
API::moduleImport("OpenSSL").getMember("SSL").getMember("OP_NO_" + result).getAUse().asCfgNode() in [
node.getArg(0), node.getArgByName("options")
API::moduleImport("OpenSSL").getMember("SSL").getMember("OP_NO_" + result).getAUse() in [
this.getArg(0), this.getArgByName("options")
]
}
}

View File

@@ -11,15 +11,15 @@ class SSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
SSLContextCreation() { this = API::moduleImport("ssl").getMember("SSLContext").getACall() }
override string getProtocol() {
exists(ControlFlowNode protocolArg, Ssl ssl |
protocolArg in [node.getArg(0), node.getArgByName("protocol")]
exists(DataFlow::Node protocolArg, Ssl ssl |
protocolArg in [this.getArg(0), this.getArgByName("protocol")]
|
protocolArg =
[ssl.specific_version(result).getAUse(), ssl.unspecific_version(result).getAUse()]
.asCfgNode()
)
or
not exists(node.getAnArg()) and
not exists(this.getArg(_)) and
not exists(this.getArgByName(_)) and
result = "TLS"
}
}
@@ -39,12 +39,10 @@ API::Node sslContextInstance() {
result = API::moduleImport("ssl").getMember(["SSLContext", "create_default_context"]).getReturn()
}
class WrapSocketCall extends ConnectionCreation, DataFlow::CallCfgNode {
class WrapSocketCall extends ConnectionCreation, DataFlow::MethodCallNode {
WrapSocketCall() { this = sslContextInstance().getMember("wrap_socket").getACall() }
override DataFlow::Node getContext() {
result = this.getFunction().(DataFlow::AttrRead).getObject()
}
override DataFlow::Node getContext() { result = this.getObject() }
}
class OptionsAugOr extends ProtocolRestriction, DataFlow::CfgNode {
@@ -133,7 +131,7 @@ class ContextSetVersion extends ProtocolRestriction, ProtocolUnrestriction, Data
ContextSetVersion() {
exists(DataFlow::AttrWrite aw |
aw.getObject().asCfgNode() = node and
this = aw.getObject() and
aw.getAttributeName() = "minimum_version" and
aw.getValue() =
API::moduleImport("ssl").getMember("TLSVersion").getMember(restriction).getAUse()

View File

@@ -0,0 +1,104 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Using a broken or weak cryptographic hash function can leave data
vulnerable, and should not be used in security related code.
</p>
<p>
A strong cryptographic hash function should be resistant to:
</p>
<ul>
<li>
pre-image attacks: if you know a hash value <code>h(x)</code>,
you should not be able to easily find the input <code>x</code>.
</li>
<li>
collision attacks: if you know a hash value <code>h(x)</code>,
you should not be able to easily find a different input <code>y</code>
with the same hash value <code>h(x) = h(y)</code>.
</li>
</ul>
<p>
In cases with a limited input space, such as for passwords, the hash
function also needs to be computationally expensive to be resistant to
brute-force attacks. Passwords should also have an unique salt applied
before hashing, but that is not considered by this query.
</p>
<p>
As an example, both MD5 and SHA-1 are known to be vulnerable to collision attacks.
</p>
<p>
Since it's OK to use a weak cryptographic hash function in a non-security
context, this query only alerts when these are used to hash sensitive
data (such as passwords, certificates, usernames).
</p>
<p>
Use of broken or weak cryptographic algorithms that are not hashing algorithms, is
handled by the <code>py/weak-cryptographic-algorithm</code> query.
</p>
</overview>
<recommendation>
<p>
Ensure that you use a strong, modern cryptographic hash function:
</p>
<ul>
<li>
such as Argon2, scrypt, bcrypt, or PBKDF2 for passwords and other data with limited input space.
</li>
<li>
such as SHA-2, or SHA-3 in other cases.
</li>
</ul>
</recommendation>
<example>
<p>
The following example shows two functions for checking whether the hash
of a certificate matches a known value -- to prevent tampering.
The first function uses MD5 that is known to be vulnerable to collision attacks.
The second function uses SHA-256 that is a strong cryptographic hashing function.
</p>
<sample src="examples/weak_certificate_hashing.py" />
</example>
<example>
<p>
The following example shows two functions for hashing passwords.
The first function uses SHA-256 to hash passwords. Although SHA-256 is a
strong cryptographic hash function, it is not suitable for password
hashing since it is not computationally expensive.
</p>
<sample src="examples/weak_password_hashing_bad.py" />
<p>
The second function uses Argon2 (through the <code>argon2-cffi</code>
PyPI package), which is a strong password hashing algorithm (and
includes a per-password salt by default).
</p>
<sample src="examples/weak_password_hashing_good.py" />
</example>
<references>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html">Password Storage Cheat Sheet</a></li>
</references>
</qhelp>

View File

@@ -0,0 +1,49 @@
/**
* @name Use of a broken or weak cryptographic hashing algorithm on sensitive data
* @description Using broken or weak cryptographic hashing algorithms can compromise security.
* @kind path-problem
* @problem.severity warning
* @security-severity 7.5
* @precision high
* @id py/weak-sensitive-data-hashing
* @tags security
* external/cwe/cwe-327
* external/cwe/cwe-328
* external/cwe/cwe-916
*/
import python
import semmle.python.security.dataflow.WeakSensitiveDataHashing
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import DataFlow::PathGraph
from
DataFlow::PathNode source, DataFlow::PathNode sink, string ending, string algorithmName,
string classification
where
exists(NormalHashFunction::Configuration config |
config.hasFlowPath(source, sink) and
algorithmName = sink.getNode().(NormalHashFunction::Sink).getAlgorithmName() and
classification = source.getNode().(NormalHashFunction::Source).getClassification() and
ending = "."
)
or
exists(ComputationallyExpensiveHashFunction::Configuration config |
config.hasFlowPath(source, sink) and
algorithmName = sink.getNode().(ComputationallyExpensiveHashFunction::Sink).getAlgorithmName() and
classification =
source.getNode().(ComputationallyExpensiveHashFunction::Source).getClassification() and
(
sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
ending = "."
or
not sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
ending =
" for " + classification +
" hashing, since it is not a computationally expensive hash function."
)
)
select sink.getNode(), source, sink,
"$@ is used in a hashing algorithm (" + algorithmName + ") that is insecure" + ending,
source.getNode(), "Sensitive data (" + classification + ")"

View File

@@ -0,0 +1,9 @@
import hashlib
def certificate_matches_known_hash_bad(certificate, known_hash):
hash = hashlib.md5(certificate).hexdigest() # BAD
return hash == known_hash
def certificate_matches_known_hash_good(certificate, known_hash):
hash = hashlib.sha256(certificate).hexdigest() # GOOD
return hash == known_hash

View File

@@ -0,0 +1,4 @@
import hashlib
def get_password_hash(password: str, salt: str):
return hashlib.sha256(password + salt).hexdigest() # BAD

View File

@@ -0,0 +1,9 @@
from argon2 import PasswordHasher
def get_initial_hash(password: str):
ph = PasswordHasher()
return ph.hash(password) # GOOD
def check_password(password: str, known_hash):
ph = PasswordHasher()
return ph.verify(known_hash, password) # GOOD

View File

@@ -4,6 +4,7 @@
* @kind problem
* @id py/insecure-temporary-file
* @problem.severity error
* @security-severity 7.0
* @sub-severity high
* @precision high
* @tags external/cwe/cwe-377

View File

@@ -4,6 +4,7 @@
* @kind path-problem
* @id py/unsafe-deserialization
* @problem.severity error
* @security-severity 9.8
* @sub-severity high
* @precision high
* @tags external/cwe/cwe-502
@@ -15,6 +16,6 @@ import python
import semmle.python.security.dataflow.UnsafeDeserialization
import DataFlow::PathGraph
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from UnsafeDeserialization::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"

View File

@@ -4,6 +4,7 @@
* may cause redirection to malicious web sites.
* @kind path-problem
* @problem.severity error
* @security-severity 6.1
* @sub-severity low
* @id py/url-redirection
* @tags security
@@ -15,7 +16,7 @@ import python
import semmle.python.security.dataflow.UrlRedirect
import DataFlow::PathGraph
from UrlRedirectConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from UrlRedirect::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Untrusted URL redirection due to $@.", source.getNode(),
"A user-provided value"

View File

@@ -4,6 +4,7 @@
* @kind problem
* @id py/overly-permissive-file
* @problem.severity warning
* @security-severity 7.8
* @sub-severity high
* @precision medium
* @tags external/cwe/cwe-732

View File

@@ -3,6 +3,7 @@
* @description Credentials are hard coded in the source code of the application.
* @kind path-problem
* @problem.severity error
* @security-severity 9.8
* @precision medium
* @id py/hardcoded-credentials
* @tags security

View File

@@ -5,6 +5,7 @@
* @tags security
* correctness
* @problem.severity error
* @security-severity 4.2
* @sub-severity high
* @precision low
* @id py/use-of-exec

View File

@@ -30,5 +30,11 @@ predicate modification_of_locals(ControlFlowNode f) {
}
from AstNode a, ControlFlowNode f
where modification_of_locals(f) and a = f.getNode()
where
modification_of_locals(f) and
a = f.getNode() and
// in module level scope `locals() == globals()`
// see https://docs.python.org/3/library/functions.html#locals
// FP report in https://github.com/github/codeql/issues/6674
not a.getScope() instanceof ModuleScope
select a, "Modification of the locals() dictionary will have no effect on the local variables."

View File

@@ -7,6 +7,7 @@
* be counted as user written code.
* @kind metric
* @tags summary
* lines-of-code
* @id py/summary/lines-of-user-code
*/

View File

@@ -19,6 +19,7 @@ predicate unused_local(Name unused, LocalVariable v) {
def.getVariable() = v and
def.isUnused() and
not exists(def.getARedef()) and
not exists(annotation_without_assignment(v)) and
def.isRelevant() and
not v = any(Nonlocal n).getAVariable() and
not exists(def.getNode().getParentNode().(FunctionDef).getDefinedFunction().getADecorator()) and
@@ -26,6 +27,17 @@ predicate unused_local(Name unused, LocalVariable v) {
)
}
/**
* Gets any annotation of the local variable `v` that does not also reassign its value.
*
* TODO: This predicate should not be needed. Rather, annotated "assignments" that do not actually
* assign a value should not result in the creation of an SSA variable (which then goes unused).
*/
private AnnAssign annotation_without_assignment(LocalVariable v) {
result.getTarget() = v.getAStore() and
not exists(result.getValue())
}
from Name unused, LocalVariable v
where
unused_local(unused, v) and

View File

@@ -1,4 +1,4 @@
- description: Standard Code Scanning queries for Python
- qlpack: codeql-python
- queries: .
- apply: code-scanning-selectors.yml
from: codeql-suite-helpers
from: codeql/suite-helpers

View File

@@ -1,7 +1,7 @@
- description: Standard LGTM queries for Python, including ones not displayed by default
- qlpack: codeql-python
- queries: .
- apply: lgtm-selectors.yml
from: codeql-suite-helpers
from: codeql/suite-helpers
# These are only for IDE use.
- exclude:
tags contain:

View File

@@ -1,4 +1,4 @@
- description: Standard LGTM queries for Python
- apply: codeql-suites/python-lgtm-full.qls
- apply: lgtm-displayed-only.yml
from: codeql-suite-helpers
from: codeql/suite-helpers

View File

@@ -1,4 +1,4 @@
- description: Security-and-quality queries for Python
- qlpack: codeql-python
- queries: .
- apply: security-and-quality-selectors.yml
from: codeql-suite-helpers
from: codeql/suite-helpers

View File

@@ -1,4 +1,4 @@
- description: Security-extended queries for Python
- qlpack: codeql-python
- queries: .
- apply: security-extended-selectors.yml
from: codeql-suite-helpers
from: codeql/suite-helpers

View File

@@ -1,6 +0,0 @@
/**
* WARNING: Use of this module is DEPRECATED.
* All new queries should use `import python`.
*/
import python

View File

@@ -0,0 +1,33 @@
/**
* @name Clear-text logging of sensitive information
* @description OLD QUERY: Logging sensitive information without encryption or hashing can
* expose it to an attacker.
* @kind path-problem
* @problem.severity error
* @id py/old/clear-text-logging-sensitive-data
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
class CleartextLoggingConfiguration extends TaintTracking::Configuration {
CleartextLoggingConfiguration() { this = "ClearTextLogging" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextLogging::Sink and
kind instanceof SensitiveData
}
}
from CleartextLoggingConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data returned by $@ is logged here.",
source.getSource(), source.getCfgNode().(SensitiveData::Source).repr()

View File

@@ -0,0 +1,33 @@
/**
* @name Clear-text storage of sensitive information
* @description OLD QUERY: Sensitive information stored without encryption or hashing can expose it to an
* attacker.
* @kind path-problem
* @problem.severity error
* @id py/old/clear-text-storage-sensitive-data
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
class CleartextStorageConfiguration extends TaintTracking::Configuration {
CleartextStorageConfiguration() { this = "ClearTextStorage" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextStorage::Sink and
kind instanceof SensitiveData
}
}
from CleartextStorageConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data from $@ is stored here.", source.getSource(),
source.getCfgNode().(SensitiveData::Source).repr()

View File

@@ -0,0 +1,28 @@
/**
* @name OLD QUERY: Use of a broken or weak cryptographic algorithm
* @description Using broken or weak cryptographic algorithms can compromise security.
* @kind path-problem
* @problem.severity warning
* @id py/old/weak-cryptographic-algorithm
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.security.SensitiveData
import semmle.python.security.Crypto
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof SensitiveDataSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
}
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
src.getSource(), "Sensitive data"

View File

@@ -0,0 +1,50 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>If an LDAP query or DN is built using string concatenation or string formatting, and the
components of the concatenation include user input without any proper sanitization, a user
is likely to be able to run malicious LDAP queries.</p>
</overview>
<recommendation>
<p>If user input must be included in an LDAP query or DN, it should be escaped to
avoid a malicious user providing special characters that change the meaning
of the query. In Python2, user input should be escaped with <code>ldap.dn.escape_dn_chars</code>
or <code>ldap.filter.escape_filter_chars</code>, while in Python3, user input should be escaped with
<code>ldap3.utils.dn.escape_rdn</code> or <code>ldap3.utils.conv.escape_filter_chars</code>
depending on the component tainted by the user. A good practice is to escape filter characters
that could change the meaning of the query (https://tools.ietf.org/search/rfc4515#section-3).</p>
</recommendation>
<example>
<p>In the following examples, the code accepts both <code>username</code> and <code>dc</code> from the user,
which it then uses to build a LDAP query and DN.</p>
<p>The first and the second example uses the unsanitized user input directly
in the search filter and DN for the LDAP query.
A malicious user could provide special characters to change the meaning of these
components, and search for a completely different set of values.</p>
<sample src="examples/example_bad1.py" />
<sample src="examples/example_bad2.py" />
<p>In the third and four example, the input provided by the user is sanitized before it is included in the search filter or DN.
This ensures the meaning of the query cannot be changed by a malicious user.</p>
<sample src="examples/example_good1.py" />
<sample src="examples/example_good2.py" />
</example>
<references>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/LDAP_Injection_Prevention_Cheat_Sheet.html">LDAP Injection Prevention Cheat Sheet</a>.</li>
<li>OWASP: <a href="https://owasp.org/www-community/attacks/LDAP_Injection">LDAP Injection</a>.</li>
<li>SonarSource: <a href="https://rules.sonarsource.com/python/RSPEC-2078">RSPEC-2078</a>.</li>
<li>Python2: <a href="https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html">LDAP Documentation</a>.</li>
<li>Python3: <a href="https://ldap3.readthedocs.io/en/latest/">LDAP Documentation</a>.</li>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/LDAP_injection">LDAP injection</a>.</li>
<li>BlackHat: <a href="https://www.blackhat.com/presentations/bh-europe-08/Alonso-Parada/Whitepaper/bh-eu-08-alonso-parada-WP.pdf">LDAP Injection and Blind LDAP Injection</a>.</li>
<li>LDAP: <a href="https://ldap.com/2018/05/04/understanding-and-defending-against-ldap-injection-attacks/">Understanding and Defending Against LDAP Injection Attacks</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name LDAP query built from user-controlled sources
* @description Building an LDAP query from user-controlled sources is vulnerable to insertion of
* malicious LDAP code by the user.
* @kind path-problem
* @problem.severity error
* @id py/ldap-injection
* @tags experimental
* security
* external/cwe/cwe-090
*/
// Determine precision above
import python
import experimental.semmle.python.security.injection.LDAP
import DataFlow::PathGraph
from LDAPInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ LDAP query parameter comes from $@.", sink.getNode(),
"This", source.getNode(), "a user-provided value"

View File

@@ -0,0 +1,15 @@
from flask import request, Flask
import ldap
@app.route("/normal")
def normal():
unsafe_dc = request.args['dc']
unsafe_filter = request.args['username']
dn = "dc={}".format(unsafe_dc)
search_filter = "(user={})".format(unsafe_filter)
ldap_connection = ldap.initialize("ldap://127.0.0.1")
user = ldap_connection.search_s(
dn, ldap.SCOPE_SUBTREE, search_filter)

View File

@@ -0,0 +1,15 @@
from flask import request, Flask
import ldap3
@app.route("/normal")
def normal():
unsafe_dc = request.args['dc']
unsafe_filter = request.args['username']
dn = "dc={}".format(unsafe_dc)
search_filter = "(user={})".format(unsafe_filter)
srv = ldap3.Server('ldap://127.0.0.1')
conn = ldap3.Connection(srv, user=dn, auto_bind=True)
conn.search(dn, search_filter)

View File

@@ -0,0 +1,20 @@
from flask import request, Flask
import ldap
import ldap.filter
import ldap.dn
@app.route("/normal")
def normal():
unsafe_dc = request.args['dc']
unsafe_filter = request.args['username']
safe_dc = ldap.dn.escape_dn_chars(unsafe_dc)
safe_filter = ldap.filter.escape_filter_chars(unsafe_filter)
dn = "dc={}".format(safe_dc)
search_filter = "(user={})".format(safe_filter)
ldap_connection = ldap.initialize("ldap://127.0.0.1")
user = ldap_connection.search_s(
dn, ldap.SCOPE_SUBTREE, search_filter)

View File

@@ -0,0 +1,20 @@
from flask import request, Flask
import ldap3
from ldap3.utils.dn import escape_rdn
from ldap3.utils.conv import escape_filter_chars
@app.route("/normal")
def normal():
unsafe_dc = request.args['dc']
unsafe_filter = request.args['username']
safe_dc = escape_rdn(unsafe_dc)
safe_filter = escape_filter_chars(unsafe_filter)
dn = "dc={}".format(safe_dc)
search_filter = "(user={})".format(safe_filter)
srv = ldap3.Server('ldap://127.0.0.1')
conn = ldap3.Connection(srv, user=dn, auto_bind=True)
conn.search(dn, search_filter)

View File

@@ -0,0 +1,31 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>If an LDAP query doesn't carry any kind of authentication, anonymous binds causes an empty or None-set password
to result in a successful authentication.</p>
</overview>
<recommendation>
<p>Use a non-empty password while establishing an LDAP connection.</p>
</recommendation>
<example>
<p>In the following examples, the code builds a LDAP query whose execution carries no authentication or binds anonymously.</p>
<sample src="examples/auth_bad_2.py" />
<sample src="examples/auth_bad_3.py" />
<p>In the third and fourth examples, the authentication is established using a password.</p>
<sample src="examples/auth_good_2.py" />
<sample src="examples/auth_good_3.py" />
</example>
<references>
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-4433">RSPEC-4433</a>.</li>
<li>Python2: <a href="https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html">LDAP Documentation</a>.</li>
<li>Python3: <a href="https://ldap3.readthedocs.io/en/latest/">LDAP Documentation</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,31 @@
/**
* @name Improper LDAP Authentication
* @description A user-controlled query carries no authentication
* @kind problem
* @problem.severity warning
* @id py/improper-ldap-auth
* @tags experimental
* security
* external/cwe/cwe-287
*/
// Determine precision above
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
predicate authenticatesImproperly(LDAPBind ldapBind) {
(
DataFlow::localFlow(DataFlow::exprNode(any(None noneName)), ldapBind.getPassword()) or
not exists(ldapBind.getPassword())
)
or
exists(StrConst emptyString |
emptyString.getText() = "" and
DataFlow::localFlow(DataFlow::exprNode(emptyString), ldapBind.getPassword())
)
}
from LDAPBind ldapBind
where authenticatesImproperly(ldapBind)
select ldapBind, "The following LDAP bind operation is executed without authentication"

View File

@@ -0,0 +1,14 @@
from flask import request, Flask
import ldap
import ldap.filter
import ldap.dn
@app.route("/bind_example")
def bind_example():
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind('cn=root', "")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)

View File

@@ -0,0 +1,13 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
from ldap3.utils.dn import escape_rdn
from ldap3.utils.conv import escape_filter_chars
@app.route("/passwordNone")
def passwordNone():
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, user='user_dn', password=None)
status, result, response, _ = conn.search(dn, search_filter)

View File

@@ -0,0 +1,14 @@
from flask import request, Flask
import ldap
import ldap.filter
import ldap.dn
@app.route("/bind_example")
def bind_example():
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind('cn=root', "SecurePa$$!")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)

View File

@@ -0,0 +1,14 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
from ldap3.utils.dn import escape_rdn
from ldap3.utils.conv import escape_filter_chars
@app.route("/passwordFromEnv")
def passwordFromEnv():
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, user='user_dn',
password="SecurePa$$!")
status, result, response, _ = conn.search(dn, search_filter)

View File

@@ -0,0 +1,23 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Failing to ensure the utilization of SSL in an LDAP connection can cause the entire communication
to be sent in cleartext making it easier for an attacker to intercept it.</p>
</overview>
<recommendation>
<p>Always set <code>use_SSL</code> to <code>True</code>, call <code>start_tls_s()</code> or set a proper option flag (<code>ldap.OPT_X_TLS_XXXXXX</code>).</p>
</recommendation>
<example>
<p>This example shows both good and bad ways to deal with this issue under Python 3.</p>
<p>The first one sets <code>use_SSL</code> to true as a keyword argument whereas the second one fails to provide a value for it, so
the default one is used (<code>False</code>).</p>
<sample src="examples/LDAPInsecureAuth.py" />
</example>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name Python Insecure LDAP Authentication
* @description Python LDAP Insecure LDAP Authentication
* @kind path-problem
* @problem.severity error
* @id py/insecure-ldap-auth
* @tags experimental
* security
* external/cwe/cwe-522
* external/cwe/cwe-523
*/
// determine precision above
import python
import DataFlow::PathGraph
import experimental.semmle.python.security.LDAPInsecureAuth
from LDAPInsecureAuthConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ is authenticated insecurely.", sink.getNode(),
"This LDAP host"

View File

@@ -0,0 +1,20 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
app = Flask(__name__)
@app.route("/good")
def good():
srv = Server(host, port, use_ssl=True)
conn = Connection(srv, dn, password)
conn.search(dn, search_filter)
return conn.response
@app.route("/bad")
def bad():
srv = Server(host, port)
conn = Connection(srv, dn, password)
conn.search(dn, search_filter)
return conn.response

View File

@@ -0,0 +1,33 @@
/**
* @name XPath query built from user-controlled sources
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
* malicious Xpath code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/xpath-injection
* @tags security
* external/cwe/cwe-643
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
import XpathInjection::XpathInjection
import DataFlow::PathGraph
class XpathInjectionConfiguration extends TaintTracking::Configuration {
XpathInjectionConfiguration() { this = "PathNotNormalizedConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
}
from XpathInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "This Xpath query depends on $@.", source, "a user-provided value"

View File

@@ -0,0 +1,35 @@
/**
* Provides a taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `XpathInjection::Configuration` is needed, otherwise
* `XpathInjectionCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*/
module XpathInjection {
import XpathInjectionCustomizations::XpathInjection
/**
* A taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "Xpath Injection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,105 @@
/**
* Provides class and predicates to track external data that
* may represent malicious xpath query objects.
*
* This module is intended to be imported into a taint-tracking query.
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/** Models Xpath Injection related classes and functions */
module XpathInjection {
/**
* A data flow source for "XPath injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "XPath injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "XPath injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "XPath injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/** Returns an API node referring to `lxml.etree` */
API::Node etree() { result = API::moduleImport("lxml").getMember("etree") }
/** Returns an API node referring to `lxml.etree` */
API::Node etreeFromString() { result = etree().getMember("fromstring") }
/** Returns an API node referring to `lxml.etree.parse` */
API::Node etreeParse() { result = etree().getMember("parse") }
/** Returns an API node referring to `lxml.etree.parse` */
API::Node libxml2parseFile() { result = API::moduleImport("libxml2").getMember("parseFile") }
/**
* A Sink representing an argument to `etree.XPath` or `etree.ETXPath` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.XPath("`sink`")
* find_text = etree.ETXPath("`sink`")
*/
private class EtreeXpathArgument extends Sink {
EtreeXpathArgument() { this = etree().getMember(["XPath", "ETXPath"]).getACall().getArg(0) }
}
/**
* A Sink representing an argument to the `etree.XPath` call.
*
* from lxml import etree
* root = etree.fromstring(file(XML_DB).read(), XMLParser())
* find_text = root.xpath("`sink`")
*/
private class EtreeFromstringXpathArgument extends Sink {
EtreeFromstringXpathArgument() {
this = etreeFromString().getReturn().getMember("xpath").getACall().getArg(0)
}
}
/**
* A Sink representing an argument to the `xpath` call to a parsed xml document.
*
* from lxml import etree
* from io import StringIO
* f = StringIO('<foo><bar></bar></foo>')
* tree = etree.parse(f)
* r = tree.xpath('`sink`')
*/
private class ParseXpathArgument extends Sink {
ParseXpathArgument() { this = etreeParse().getReturn().getMember("xpath").getACall().getArg(0) }
}
/**
* A Sink representing an argument to the `xpathEval` call to a parsed libxml2 document.
*
* import libxml2
* tree = libxml2.parseFile("file.xml")
* r = tree.xpathEval('`sink`')
*/
private class ParseFileXpathEvalArgument extends Sink {
ParseFileXpathEvalArgument() {
this = libxml2parseFile().getReturn().getMember("xpathEval").getACall().getArg(0)
}
}
}

View File

@@ -1,36 +0,0 @@
/**
* @name XPath query built from user-controlled sources
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
* malicious Xpath code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/xpath-injection
* @tags security
* external/cwe/cwe-643
*/
import python
import semmle.python.security.Paths
import semmle.python.security.strings.Untrusted
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import experimental.semmle.python.security.injection.Xpath
class XpathInjectionConfiguration extends TaintTracking::Configuration {
XpathInjectionConfiguration() { this = "Xpath injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof XpathInjection::XpathInjectionSink
}
}
from XpathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This Xpath query depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -0,0 +1,6 @@
import python
import semmle.python.security.performance.SuperlinearBackTracking
from PolynomialBackTrackingTerm t
where t.getLocation().getFile().getBaseName() = "KnownCVEs.py"
select t.getRegex(), t, t.getReason()

View File

@@ -0,0 +1,108 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<include src="ReDoSIntroduction.inc.qhelp" />
<example>
<p>
Consider this use of a regular expression, which removes
all leading and trailing whitespace in a string:
</p>
<sample language="python">
re.sub(r"^\s+|\s+$", "", text) # BAD
</sample>
<p>
The sub-expression <code>"\s+$"</code> will match the
whitespace characters in <code>text</code> from left to right, but it
can start matching anywhere within a whitespace sequence. This is
problematic for strings that do <strong>not</strong> end with a whitespace
character. Such a string will force the regular expression engine to
process each whitespace sequence once per whitespace character in the
sequence.
</p>
<p>
This ultimately means that the time cost of trimming a
string is quadratic in the length of the string. So a string like
<code>"a b"</code> will take milliseconds to process, but a similar
string with a million spaces instead of just one will take several
minutes.
</p>
<p>
Avoid this problem by rewriting the regular expression to
not contain the ambiguity about when to start matching whitespace
sequences. For instance, by using a negative look-behind
(<code>^\s+|(?&lt;!\s)\s+$</code>), or just by using the built-in strip
method (<code>text.strip()</code>).
</p>
<p>
Note that the sub-expression <code>"^\s+"</code> is
<strong>not</strong> problematic as the <code>^</code> anchor restricts
when that sub-expression can start matching, and as the regular
expression engine matches from left to right.
</p>
</example>
<example>
<p>
As a similar, but slightly subtler problem, consider the
regular expression that matches lines with numbers, possibly written
using scientific notation:
</p>
<sample language="python">
^0\.\d+E?\d+$ # BAD
</sample>
<p>
The problem with this regular expression is in the
sub-expression <code>\d+E?\d+</code> because the second
<code>\d+</code> can start matching digits anywhere after the first
match of the first <code>\d+</code> if there is no <code>E</code> in
the input string.
</p>
<p>
This is problematic for strings that do <strong>not</strong>
end with a digit. Such a string will force the regular expression
engine to process each digit sequence once per digit in the sequence,
again leading to a quadratic time complexity.
</p>
<p>
To make the processing faster, the regular expression
should be rewritten such that the two <code>\d+</code> sub-expressions
do not have overlapping matches: <code>^0\.\d+(E\d+)?$</code>.
</p>
</example>
<include src="ReDoSReferences.inc.qhelp"/>
</qhelp>

View File

@@ -0,0 +1,33 @@
/**
* @name Polynomial regular expression used on uncontrolled data
* @description A regular expression that can require polynomial time
* to match may be vulnerable to denial-of-service attacks.
* @kind path-problem
* @problem.severity warning
* @precision high
* @id py/polynomial-redos
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
import python
import semmle.python.security.performance.SuperlinearBackTracking
import semmle.python.security.dataflow.PolynomialReDoS
import DataFlow::PathGraph
from
PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
PolynomialReDoS::Sink sinkNode, PolynomialBackTrackingTerm regexp
where
config.hasFlowPath(source, sink) and
sinkNode = sink.getNode() and
regexp.getRootTerm() = sinkNode.getRegExp()
// not (
// source.getNode().(Source).getKind() = "url" and
// regexp.isAtEndLine()
// )
select sinkNode.getHighlight(), source, sink,
"This $@ that depends on $@ may run slow on strings " + regexp.getPrefixMessage() +
"with many repetitions of '" + regexp.getPumpString() + "'.", regexp, "regular expression",
source.getNode(), "a user-provided value"

View File

@@ -0,0 +1,34 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<include src="ReDoSIntroduction.inc.qhelp" />
<example>
<p>
Consider this regular expression:
</p>
<sample language="python">
^_(__|.)+_$
</sample>
<p>
Its sub-expression <code>"(__|.)+?"</code> can match the string <code>"__"</code> either by the
first alternative <code>"__"</code> to the left of the <code>"|"</code> operator, or by two
repetitions of the second alternative <code>"."</code> to the right. Thus, a string consisting
of an odd number of underscores followed by some other character will cause the regular
expression engine to run for an exponential amount of time before rejecting the input.
</p>
<p>
This problem can be avoided by rewriting the regular expression to remove the ambiguity between
the two branches of the alternative inside the repetition:
</p>
<sample language="python">
^_(__|[^_])+_$
</sample>
</example>
<include src="ReDoSReferences.inc.qhelp"/>
</qhelp>

View File

@@ -0,0 +1,25 @@
/**
* @name Inefficient regular expression
* @description A regular expression that requires exponential time to match certain inputs
* can be a performance bottleneck, and may be vulnerable to denial-of-service
* attacks.
* @kind problem
* @problem.severity error
* @precision high
* @id py/redos
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
import python
import semmle.python.security.performance.ExponentialBackTracking
from RegExpTerm t, string pump, State s, string prefixMsg
where
hasReDoSResult(t, pump, s, prefixMsg) and
// exclude verbose mode regexes for now
not t.getRegex().getAMode() = "VERBOSE"
select t,
"This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
"containing many repetitions of '" + pump + "'."

View File

@@ -0,0 +1,54 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Some regular expressions take a long time to match certain
input strings to the point where the time it takes to match a string
of length <i>n</i> is proportional to <i>n<sup>k</sup></i> or even
<i>2<sup>n</sup></i>. Such regular expressions can negatively affect
performance, or even allow a malicious user to perform a Denial of
Service ("DoS") attack by crafting an expensive input string for the
regular expression to match.
</p>
<p>
The regular expression engine provided by Python uses a backtracking non-deterministic finite
automata to implement regular expression matching. While this approach
is space-efficient and allows supporting advanced features like
capture groups, it is not time-efficient in general. The worst-case
time complexity of such an automaton can be polynomial or even
exponential, meaning that for strings of a certain shape, increasing
the input length by ten characters may make the automaton about 1000
times slower.
</p>
<p>
Typically, a regular expression is affected by this
problem if it contains a repetition of the form <code>r*</code> or
<code>r+</code> where the sub-expression <code>r</code> is ambiguous
in the sense that it can match some string in multiple ways. More
information about the precise circumstances can be found in the
references.
</p>
</overview>
<recommendation>
<p>
Modify the regular expression to remove the ambiguity, or
ensure that the strings matched with the regular expression are short
enough that the time-complexity does not matter.
</p>
</recommendation>
</qhelp>

View File

@@ -0,0 +1,16 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<references>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.
</li>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Time_complexity">Time complexity</a>.</li>
<li>James Kirrage, Asiri Rathnayake, Hayo Thielecke:
<a href="http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf">Static Analysis for Regular Expression Denial-of-Service Attack</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,45 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
be able to modify the meaning of the expression. In particular, such a user may be able to provide
a regular expression fragment that takes exponential time in the worst case, and use that to
perform a Denial of Service attack.
</p>
</overview>
<recommendation>
<p>
Before embedding user input into a regular expression, use a sanitization function such as
<code>re.escape</code> to escape meta-characters that have a special meaning regarding
regular expressions' syntax.
</p>
</recommendation>
<example>
<p>
The following examples are based on a simple Flask web server environment.
</p>
<p>
The following example shows a HTTP request parameter that is used to construct a regular expression
without sanitizing it first:
</p>
<sample src="re_bad.py" />
<p>
Instead, the request parameter should be sanitized first, for example using the function
<code>re.escape</code>. This ensures that the user cannot insert characters which have a
special meaning in regular expressions.
</p>
<sample src="re_good.py" />
</example>
<references>
<li>OWASP: <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.</li>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
<li>Python docs: <a href="https://docs.python.org/3/library/re.html">re</a>.</li>
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2631">RSPEC-2631</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,29 @@
/**
* @name Regular expression injection
* @description User input should not be used in regular expressions without first being escaped,
* otherwise a malicious user may be able to inject an expression that could require
* exponential time on certain inputs.
* @kind path-problem
* @problem.severity error
* @id py/regex-injection
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
// determine precision above
import python
import experimental.semmle.python.security.injection.RegexInjection
import DataFlow::PathGraph
from
RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
RegexInjectionSink regexInjectionSink, Attribute methodAttribute
where
config.hasFlowPath(source, sink) and
regexInjectionSink = sink.getNode() and
methodAttribute = regexInjectionSink.getRegexMethod()
select sink.getNode(), source, sink,
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
source.getNode(), "user-provided value", methodAttribute,
regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()

View File

@@ -0,0 +1,15 @@
from flask import request, Flask
import re
@app.route("/direct")
def direct():
unsafe_pattern = request.args["pattern"]
re.search(unsafe_pattern, "")
@app.route("/compile")
def compile():
unsafe_pattern = request.args["pattern"]
compiled_pattern = re.compile(unsafe_pattern)
compiled_pattern.search("")

View File

@@ -0,0 +1,17 @@
from flask import request, Flask
import re
@app.route("/direct")
def direct():
unsafe_pattern = request.args['pattern']
safe_pattern = re.escape(unsafe_pattern)
re.search(safe_pattern, "")
@app.route("/compile")
def compile():
unsafe_pattern = request.args['pattern']
safe_pattern = re.escape(unsafe_pattern)
compiled_pattern = re.compile(safe_pattern)
compiled_pattern.search("")

View File

@@ -0,0 +1,40 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Passing user-controlled sources into NoSQL queries can result in a NoSQL injection flaw.
This tainted NoSQL query containing a user-controlled source can then execute a malicious query in a NoSQL database such as MongoDB.
In order for the user-controlled source to taint the NoSQL query, the user-controller source must be converted into a Python object using something like <code>json.loads</code> or <code>xmltodict.parse</code>.
</p>
<p>
Because a user-controlled source is passed into the query, the malicious user can have complete control over the query itself.
When the tainted query is executed, the malicious user can commit malicious actions such as bypassing role restrictions or accessing and modifying restricted data in the NoSQL database.
</p>
</overview>
<recommendation>
<p>
NoSQL injections can be prevented by escaping user-input's special characters that are passed into the NoSQL query from the user-supplied source.
Alternatively, using a sanitize library such as MongoSanitizer will ensure that user-supplied sources can not act as a malicious query.
</p>
</recommendation>
<example>
<p>In the example below, the user-supplied source is passed to a MongoDB function that queries the MongoDB database.</p>
<sample src="examples/NoSQLInjection-bad.py" />
<p> This can be fixed by using a sanitizer library like MongoSanitizer as shown in this annotated code version below.</p>
<sample src="examples/NoSQLInjection-good.py" />
</example>
<references>
<li>Mongoengine: <a href="http://mongoengine.org/">Documentation</a>.</li>
<li>Flask-Mongoengine: <a href="http://docs.mongoengine.org/projects/flask-mongoengine/en/latest/">Documentation</a>.</li>
<li>PyMongo: <a href="https://pypi.org/project/pymongo/">Documentation</a>.</li>
<li>Flask-PyMongo: <a href="https://flask-pymongo.readthedocs.io/en/latest/">Documentation</a>.</li>
<li>OWASP: <a href="https://owasp.org/www-pdf-archive/GOD16-NOSQL.pdf">NoSQL Injection</a>.</li>
<li>Security Stack Exchange Discussion: <a href="https://security.stackexchange.com/questions/83231/mongodb-nosql-injection-in-python-code">Question 83231</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,19 @@
/**
* @name NoSQL Injection
* @description Building a NoSQL query from user-controlled sources is vulnerable to insertion of
* malicious NoSQL code by the user.
* @kind path-problem
* @problem.severity error
* @id py/nosql-injection
* @tags experimental
* security
* external/cwe/cwe-943
*/
import python
import experimental.semmle.python.security.injection.NoSQLInjection
from CustomPathNode source, CustomPathNode sink
where noSQLInjectionFlow(source, sink)
select sink, source, sink, "$@ NoSQL query contains an unsanitized $@", sink, "This", source,
"user-provided value"

View File

@@ -0,0 +1,13 @@
from flask import Flask, request
from flask_pymongo import PyMongo
import json
mongo = PyMongo(app)
@app.route("/")
def home_page():
unsanitized_search = request.args['search']
json_search = json.loads(unsanitized_search)
result = mongo.db.user.find({'name': json_search})

View File

@@ -0,0 +1,15 @@
from flask import Flask, request
from flask_pymongo import PyMongo
from mongosanitizer.sanitizer import sanitize
import json
mongo = PyMongo(app)
@app.route("/")
def home_page():
unsafe_search = request.args['search']
json_search = json.loads(unsafe_search)
safe_search = sanitize(unsanitized_search)
result = client.db.collection.find_one({'data': safe_search})

View File

@@ -14,6 +14,73 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/** Provides classes for modeling Regular Expression-related APIs. */
module RegexExecution {
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the executed expression.
*/
abstract DataFlow::Node getRegexNode();
/**
* Gets the library used to execute the regular expression.
*/
abstract string getRegexModule();
}
}
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
RegexExecution() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
string getRegexModule() { result = range.getRegexModule() }
}
/** Provides classes for modeling Regular Expression escape-related APIs. */
module RegexEscape {
/**
* A data-flow node that escapes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexEscape` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the escaped expression.
*/
abstract DataFlow::Node getRegexNode();
}
}
/**
* A data-flow node that escapes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexEscape::Range` instead.
*/
class RegexEscape extends DataFlow::Node {
RegexEscape::Range range;
RegexEscape() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
}
/** Provides classes for modeling XML parsing APIs. */
module XMLParsing {
/**
@@ -99,3 +166,213 @@ class XMLParser extends DataFlow::Node {
*/
predicate mayBeDangerous() { range.mayBeDangerous() }
}
/** Provides classes for modeling LDAP query execution-related APIs. */
module LDAPQuery {
/**
* A data-flow node that collects methods executing a LDAP query.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LDAPQuery` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the executed expression.
*/
abstract DataFlow::Node getQuery();
}
}
/**
* A data-flow node that collect methods executing a LDAP query.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPQuery::Range` instead.
*/
class LDAPQuery extends DataFlow::Node {
LDAPQuery::Range range;
LDAPQuery() { this = range }
/**
* Gets the argument containing the executed expression.
*/
DataFlow::Node getQuery() { result = range.getQuery() }
}
/** Provides classes for modeling LDAP components escape-related APIs. */
module LDAPEscape {
/**
* A data-flow node that collects functions escaping LDAP components.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LDAPEscape` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the escaped expression.
*/
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that collects functions escaping LDAP components.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPEscape::Range` instead.
*/
class LDAPEscape extends DataFlow::Node {
LDAPEscape::Range range;
LDAPEscape() { this = range }
/**
* Gets the argument containing the escaped expression.
*/
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides classes for modeling LDAP bind-related APIs. */
module LDAPBind {
/**
* A data-flow node that collects methods binding a LDAP connection.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LDAPBind` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the binding host.
*/
abstract DataFlow::Node getHost();
/**
* Gets the argument containing the binding expression.
*/
abstract DataFlow::Node getPassword();
/**
* Holds if the binding process use SSL.
*/
abstract predicate useSSL();
}
}
/**
* A data-flow node that collects methods binding a LDAP connection.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPBind::Range` instead.
*/
class LDAPBind extends DataFlow::Node {
LDAPBind::Range range;
LDAPBind() { this = range }
/**
* Gets the argument containing the binding host.
*/
DataFlow::Node getHost() { result = range.getHost() }
/**
* Gets the argument containing the binding expression.
*/
DataFlow::Node getPassword() { result = range.getPassword() }
/**
* Holds if the binding process use SSL.
*/
predicate useSSL() { range.useSSL() }
}
/** Provides classes for modeling SQL sanitization libraries. */
module SQLEscape {
/**
* A data-flow node that collects functions that escape SQL statements.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SQLEscape` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the raw SQL statement.
*/
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that collects functions escaping SQL statements.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SQLEscape::Range` instead.
*/
class SQLEscape extends DataFlow::Node {
SQLEscape::Range range;
SQLEscape() { this = range }
/**
* Gets the argument containing the raw SQL statement.
*/
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides a class for modeling NoSQL execution APIs. */
module NoSQLQuery {
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLQuery` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be executed. */
abstract DataFlow::Node getQuery();
}
}
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `NoSQLQuery::Range` instead.
*/
class NoSQLQuery extends DataFlow::Node {
NoSQLQuery::Range range;
NoSQLQuery() { this = range }
/** Gets the argument that specifies the NoSQL query to be executed. */
DataFlow::Node getQuery() { result = range.getQuery() }
}
/** Provides classes for modeling NoSQL sanitization-related APIs. */
module NoSQLSanitizer {
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be sanitized. */
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer::Range` instead.
*/
class NoSQLSanitizer extends DataFlow::Node {
NoSQLSanitizer::Range range;
NoSQLSanitizer() { this = range }
/** Gets the argument that specifies the NoSQL query to be sanitized. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
}

View File

@@ -4,3 +4,5 @@
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.XML
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL

View File

@@ -0,0 +1,263 @@
/**
* Provides classes modeling security-relevant aspects of the LDAP libraries.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for Python's ldap-related libraries.
*/
private module LDAP {
/**
* Provides models for the `python-ldap` PyPI package (imported as `ldap`).
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/index.html
*/
private module LDAP2 {
/** Gets a reference to the `ldap` module. */
API::Node ldap() { result = API::moduleImport("ldap") }
/** Returns a `ldap` module instance */
API::Node ldapInitialize() { result = ldap().getMember("initialize") }
/** Gets a reference to a `ldap` operation. */
private DataFlow::TypeTrackingNode ldapOperation(DataFlow::TypeTracker t) {
t.start() and
result.(DataFlow::AttrRead).getObject().getALocalSource() = ldapInitialize().getACall()
or
exists(DataFlow::TypeTracker t2 | result = ldapOperation(t2).track(t2, t))
}
/**
* List of `ldap` methods used to execute a query.
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
*/
private class LDAP2QueryMethods extends string {
LDAP2QueryMethods() {
this in ["search", "search_s", "search_st", "search_ext", "search_ext_s"]
}
}
/** Gets a reference to a `ldap` operation. */
private DataFlow::Node ldapOperation() {
ldapOperation(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** Gets a reference to a `ldap` query. */
private DataFlow::Node ldapQuery() {
result = ldapOperation() and
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2QueryMethods
}
/**
* A class to find `ldap` methods executing a query.
*
* See `LDAP2QueryMethods`
*/
private class LDAP2Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
LDAP2Query() { this.getFunction() = ldapQuery() }
override DataFlow::Node getQuery() {
result in [this.getArg(0), this.getArg(2), this.getArgByName("filterstr")]
}
}
/**
* List of `ldap` methods used for binding.
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
*/
private class LDAP2BindMethods extends string {
LDAP2BindMethods() {
this in [
"bind", "bind_s", "simple_bind", "simple_bind_s", "sasl_interactive_bind_s",
"sasl_non_interactive_bind_s", "sasl_external_bind_s", "sasl_gssapi_bind_s"
]
}
}
/** Gets a reference to a `ldap` bind. */
private DataFlow::Node ldapBind() {
result = ldapOperation() and
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2BindMethods
}
/**List of SSL-demanding options */
private class LDAPSSLOptions extends DataFlow::Node {
LDAPSSLOptions() { this = ldap().getMember("OPT_X_TLS_" + ["DEMAND", "HARD"]).getAUse() }
}
/**
* A class to find `ldap` methods binding a connection.
*
* See `LDAP2BindMethods`
*/
private class LDAP2Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
LDAP2Bind() { this.getFunction() = ldapBind() }
override DataFlow::Node getPassword() {
result in [this.getArg(1), this.getArgByName("cred")]
}
override DataFlow::Node getHost() {
exists(DataFlow::CallCfgNode initialize |
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = initialize and
initialize = ldapInitialize().getACall() and
result = initialize.getArg(0)
)
}
override predicate useSSL() {
// use initialize to correlate `this` and so avoid FP in several instances
exists(DataFlow::CallCfgNode initialize |
// ldap.set_option(ldap.OPT_X_TLS_%s)
ldap().getMember("set_option").getACall().getArg(_) instanceof LDAPSSLOptions
or
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = initialize and
initialize = ldapInitialize().getACall() and
(
// ldap_connection.start_tls_s()
// see https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#ldap.LDAPObject.start_tls_s
exists(DataFlow::MethodCallNode startTLS |
startTLS.getObject().getALocalSource() = initialize and
startTLS.getMethodName() = "start_tls_s"
)
or
// ldap_connection.set_option(ldap.OPT_X_TLS_%s, True)
exists(DataFlow::CallCfgNode setOption |
setOption.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
initialize and
setOption.getFunction().(DataFlow::AttrRead).getAttributeName() = "set_option" and
setOption.getArg(0) instanceof LDAPSSLOptions and
not DataFlow::exprNode(any(False falseExpr))
.(DataFlow::LocalSourceNode)
.flowsTo(setOption.getArg(1))
)
)
)
}
}
/**
* A class to find calls to `ldap.dn.escape_dn_chars`.
*
* See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
*/
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP2EscapeDNCall() { this = ldap().getMember("dn").getMember("escape_dn_chars").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* A class to find calls to `ldap.filter.escape_filter_chars`.
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap-filter.html#ldap.filter.escape_filter_chars
*/
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP2EscapeFilterCall() {
this = ldap().getMember("filter").getMember("escape_filter_chars").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}
/**
* Provides models for the `ldap3` PyPI package
*
* See https://pypi.org/project/ldap3/
*/
private module LDAP3 {
/** Gets a reference to the `ldap3` module. */
API::Node ldap3() { result = API::moduleImport("ldap3") }
/** Gets a reference to the `ldap3` `utils` module. */
API::Node ldap3Utils() { result = ldap3().getMember("utils") }
/** Returns a `ldap3` module `Server` instance */
API::Node ldap3Server() { result = ldap3().getMember("Server") }
/** Returns a `ldap3` module `Connection` instance */
API::Node ldap3Connection() { result = ldap3().getMember("Connection") }
/**
* A class to find `ldap3` methods executing a query.
*/
private class LDAP3Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
LDAP3Query() {
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
ldap3Connection().getACall() and
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "search"
}
override DataFlow::Node getQuery() { result in [this.getArg(0), this.getArg(1)] }
}
/**
* A class to find `ldap3` methods binding a connection.
*/
class LDAP3Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
LDAP3Bind() { this = ldap3Connection().getACall() }
override DataFlow::Node getPassword() {
result in [this.getArg(2), this.getArgByName("password")]
}
override DataFlow::Node getHost() {
exists(DataFlow::CallCfgNode serverCall |
serverCall = ldap3Server().getACall() and
this.getArg(0).getALocalSource() = serverCall and
result = serverCall.getArg(0)
)
}
override predicate useSSL() {
exists(DataFlow::CallCfgNode serverCall |
serverCall = ldap3Server().getACall() and
this.getArg(0).getALocalSource() = serverCall and
DataFlow::exprNode(any(True trueExpr))
.(DataFlow::LocalSourceNode)
.flowsTo([serverCall.getArg(2), serverCall.getArgByName("use_ssl")])
)
or
// ldap_connection.start_tls_s()
// see https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#ldap.LDAPObject.start_tls_s
exists(DataFlow::MethodCallNode startTLS |
startTLS.getMethodName() = "start_tls_s" and
startTLS.getObject().getALocalSource() = this
)
}
}
/**
* A class to find calls to `ldap3.utils.dn.escape_rdn`.
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
*/
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP3EscapeDNCall() { this = ldap3Utils().getMember("dn").getMember("escape_rdn").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* A class to find calls to `ldap3.utils.conv.escape_filter_chars`.
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/conv.py#L91
*/
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP3EscapeFilterCall() {
this = ldap3Utils().getMember("conv").getMember("escape_filter_chars").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}
}

View File

@@ -0,0 +1,215 @@
/**
* Provides classes modeling security-relevant aspects of the standard libraries.
* Note: some modeling is done internally in the dataflow/taint tracking implementation.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module NoSQL {
// API Nodes returning `Mongo` instances.
/** Gets a reference to `pymongo.MongoClient` */
private API::Node pyMongo() {
result = API::moduleImport("pymongo").getMember("MongoClient").getReturn()
}
/** Gets a reference to `flask_pymongo.PyMongo` */
private API::Node flask_PyMongo() {
result = API::moduleImport("flask_pymongo").getMember("PyMongo").getReturn()
}
/** Gets a reference to `mongoengine` */
private API::Node mongoEngine() { result = API::moduleImport("mongoengine") }
/** Gets a reference to `flask_mongoengine.MongoEngine` */
private API::Node flask_MongoEngine() {
result = API::moduleImport("flask_mongoengine").getMember("MongoEngine").getReturn()
}
/**
* Gets a reference to an initialized `Mongo` instance.
* See `pyMongo()`, `flask_PyMongo()`
*/
private API::Node mongoInstance() {
result = pyMongo() or
result = flask_PyMongo()
}
/**
* Gets a reference to an initialized `Mongo` DB instance.
* See `mongoEngine()`, `flask_MongoEngine()`
*/
private API::Node mongoDBInstance() {
result = mongoEngine().getMember(["get_db", "connect"]).getReturn() or
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn() or
result = flask_MongoEngine().getMember("get_db").getReturn()
}
/**
* Gets a reference to a `Mongo` DB use.
*
* See `mongoInstance()`, `mongoDBInstance()`.
*/
private DataFlow::LocalSourceNode mongoDB(DataFlow::TypeTracker t) {
t.start() and
(
exists(SubscriptNode subscript |
subscript.getObject() = mongoInstance().getAUse().asCfgNode() and
result.asCfgNode() = subscript
)
or
result.(DataFlow::AttrRead).getObject() = mongoInstance().getAUse()
or
result = mongoDBInstance().getAUse()
)
or
exists(DataFlow::TypeTracker t2 | result = mongoDB(t2).track(t2, t))
}
/**
* Gets a reference to a `Mongo` DB use.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db` would be a use of a `Mongo` instance, and so the result.
*/
private DataFlow::Node mongoDB() { mongoDB(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Gets a reference to a `Mongo` collection use.
*
* See `mongoDB()`.
*/
private DataFlow::LocalSourceNode mongoCollection(DataFlow::TypeTracker t) {
t.start() and
(
exists(SubscriptNode subscript | result.asCfgNode() = subscript |
subscript.getObject() = mongoDB().asCfgNode()
)
or
result.(DataFlow::AttrRead).getObject() = mongoDB()
)
or
exists(DataFlow::TypeTracker t2 | result = mongoCollection(t2).track(t2, t))
}
/**
* Gets a reference to a `Mongo` collection use.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user` would be a use of a `Mongo` collection, and so the result.
*/
private DataFlow::Node mongoCollection() {
mongoCollection(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** This class represents names of find_* relevant `Mongo` collection-level operation methods. */
private class MongoCollectionMethodNames extends string {
MongoCollectionMethodNames() {
this in [
"find", "find_raw_batches", "find_one", "find_one_and_delete", "find_and_modify",
"find_one_and_replace", "find_one_and_update", "find_one_or_404"
]
}
}
/**
* Gets a reference to a `Mongo` collection method.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user.find` would be a collection method, and so the result.
*/
private DataFlow::Node mongoCollectionMethod() {
mongoCollection() = result.(DataFlow::AttrRead).getObject() and
result.(DataFlow::AttrRead).getAttributeName() instanceof MongoCollectionMethodNames
}
/**
* Gets a reference to a `Mongo` collection method call
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user.find({'name': safe_search})` would be a collection method call, and so the result.
*/
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
MongoCollectionCall() { this.getFunction() = mongoCollectionMethod() }
override DataFlow::Node getQuery() { result = this.getArg(0) }
}
/**
* Gets a reference to a call from a class whose base is a reference to `mongoEngine()` or `flask_MongoEngine()`'s
* `Document` or `EmbeddedDocument` objects and its attribute is `objects`.
*
* ```py
* from flask_mongoengine import MongoEngine
* db = MongoEngine(app)
* class Movie(db.Document):
* title = db.StringField(required=True)
*
* Movie.objects(__raw__=json_search)
* ```
*
* `Movie.objects(__raw__=json_search)` would be the result.
*/
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
MongoEngineObjectsCall() {
this =
[mongoEngine(), flask_MongoEngine()]
.getMember(["Document", "EmbeddedDocument"])
.getASubclass()
.getMember("objects")
.getACall()
}
override DataFlow::Node getQuery() { result = this.getArgByName(_) }
}
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
MongoSanitizerCall() {
this =
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* ObjectId returns a string representing an id.
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
* then ObjectId will throw an error preventing the query from running.
*/
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
BsonObjectIdCall() {
this =
API::moduleImport(["bson", "bson.objectid", "bson.json_util"])
.getMember("ObjectId")
.getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}

View File

@@ -9,3 +9,91 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for Python's `re` library.
*
* See https://docs.python.org/3/library/re.html
*/
private module Re {
/**
* List of `re` methods immediately executing an expression.
*
* See https://docs.python.org/3/library/re.html#module-contents
*/
private class RegexExecutionMethods extends string {
RegexExecutionMethods() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
}
/**
* A class to find `re` methods immediately executing an expression.
*
* See `RegexExecutionMethods`
*/
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
DataFlow::Node regexNode;
DirectRegex() {
this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and
regexNode = this.getArg(0)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override string getRegexModule() { result = "re" }
}
/**
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegex extends DataFlow::MethodCallNode, RegexExecution::Range {
DataFlow::Node regexNode;
CompiledRegex() {
exists(DataFlow::MethodCallNode patternCall |
patternCall = API::moduleImport("re").getMember("compile").getACall() and
patternCall.flowsTo(this.getObject()) and
this.getMethodName() instanceof RegexExecutionMethods and
regexNode = patternCall.getArg(0)
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override string getRegexModule() { result = "re" }
}
/**
* A class to find `re` methods escaping an expression.
*
* See https://docs.python.org/3/library/re.html#re.escape
*/
class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range {
DataFlow::Node regexNode;
ReEscape() {
this = API::moduleImport("re").getMember("escape").getACall() and
regexNode = this.getArg(0)
}
override DataFlow::Node getRegexNode() { result = regexNode }
}
}

View File

@@ -0,0 +1,34 @@
/**
* Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package.
* See https://pypi.org/project/xmltodict/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `xmltodict` PyPI package.
* See https://pypi.org/project/xmltodict/
*/
private module XmlToDictModel {
/** Gets a reference to the `xmltodict` module. */
API::Node xmltodict() { result = API::moduleImport("xmltodict") }
/**
* A call to `xmltodict.parse`
* See https://github.com/martinblech/xmltodict/blob/ae19c452ca000bf243bfc16274c060bf3bf7cf51/xmltodict.py#L198
*/
private class XmlToDictParseCall extends Decoding::Range, DataFlow::CallCfgNode {
XmlToDictParseCall() { this = xmltodict().getMember("parse").getACall() }
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "XML" }
}
}

View File

@@ -0,0 +1,106 @@
/**
* Provides a taint-tracking configuration for detecting LDAP injection vulnerabilities
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import experimental.semmle.python.Concepts
string getFullHostRegex() { result = "(?i)ldap://.+" }
string getSchemaRegex() { result = "(?i)ldap(://)?" }
string getPrivateHostRegex() {
result =
"(?i)localhost(?:[:/?#].*)?|127\\.0\\.0\\.1(?:[:/?#].*)?|10(?:\\.[0-9]+){3}(?:[:/?#].*)?|172\\.16(?:\\.[0-9]+){2}(?:[:/?#].*)?|192.168(?:\\.[0-9]+){2}(?:[:/?#].*)?|\\[?0:0:0:0:0:0:0:1\\]?(?:[:/?#].*)?|\\[?::1\\]?(?:[:/?#].*)?"
}
// "ldap://somethingon.theinternet.com"
class LDAPFullHost extends StrConst {
LDAPFullHost() {
exists(string s |
s = this.getText() and
s.regexpMatch(getFullHostRegex()) and
// check what comes after the `ldap://` prefix
not s.substring(7, s.length()).regexpMatch(getPrivateHostRegex())
)
}
}
class LDAPSchema extends StrConst {
LDAPSchema() { this.getText().regexpMatch(getSchemaRegex()) }
}
class LDAPPrivateHost extends StrConst {
LDAPPrivateHost() { this.getText().regexpMatch(getPrivateHostRegex()) }
}
predicate concatAndCompareAgainstFullHostRegex(LDAPSchema schema, StrConst host) {
not host instanceof LDAPPrivateHost and
(schema.getText() + host.getText()).regexpMatch(getFullHostRegex())
}
// "ldap://" + "somethingon.theinternet.com"
class LDAPBothStrings extends BinaryExpr {
LDAPBothStrings() { concatAndCompareAgainstFullHostRegex(this.getLeft(), this.getRight()) }
}
// schema + host
class LDAPBothVar extends BinaryExpr {
LDAPBothVar() {
exists(SsaVariable schemaVar, SsaVariable hostVar |
this.getLeft() = schemaVar.getVariable().getALoad() and // getAUse is incompatible with Expr
this.getRight() = hostVar.getVariable().getALoad() and
concatAndCompareAgainstFullHostRegex(schemaVar
.getDefinition()
.getImmediateDominator()
.getNode(), hostVar.getDefinition().getImmediateDominator().getNode())
)
}
}
// schema + "somethingon.theinternet.com"
class LDAPVarString extends BinaryExpr {
LDAPVarString() {
exists(SsaVariable schemaVar |
this.getLeft() = schemaVar.getVariable().getALoad() and
concatAndCompareAgainstFullHostRegex(schemaVar
.getDefinition()
.getImmediateDominator()
.getNode(), this.getRight())
)
}
}
// "ldap://" + host
class LDAPStringVar extends BinaryExpr {
LDAPStringVar() {
exists(SsaVariable hostVar |
this.getRight() = hostVar.getVariable().getALoad() and
concatAndCompareAgainstFullHostRegex(this.getLeft(),
hostVar.getDefinition().getImmediateDominator().getNode())
)
}
}
/**
* A taint-tracking configuration for detecting LDAP insecure authentications.
*/
class LDAPInsecureAuthConfig extends TaintTracking::Configuration {
LDAPInsecureAuthConfig() { this = "LDAPInsecureAuthConfig" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSource or
source.asExpr() instanceof LDAPFullHost or
source.asExpr() instanceof LDAPBothStrings or
source.asExpr() instanceof LDAPBothVar or
source.asExpr() instanceof LDAPVarString or
source.asExpr() instanceof LDAPStringVar
}
override predicate isSink(DataFlow::Node sink) {
exists(LDAPBind ldapBind | not ldapBind.useSSL() and sink = ldapBind.getHost())
}
}

View File

@@ -0,0 +1,24 @@
/**
* Provides a taint-tracking configuration for detecting LDAP injection vulnerabilities
*/
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* A taint-tracking configuration for detecting LDAP injections.
*/
class LDAPInjectionFlowConfig extends TaintTracking::Configuration {
LDAPInjectionFlowConfig() { this = "LDAPInjectionFlowConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(LDAPQuery ldapQuery).getQuery() }
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(LDAPEscape ldapEsc).getAnInput()
}
}

View File

@@ -0,0 +1,57 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.DataFlow2
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.TaintTracking2
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.security.dataflow.ChainedConfigs12
import experimental.semmle.python.Concepts
import semmle.python.Concepts
/**
* A taint-tracking configuration for detecting string-to-dict conversions.
*/
class RFSToDictConfig extends TaintTracking::Configuration {
RFSToDictConfig() { this = "RFSToDictConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
exists(Decoding decoding | decoding.getFormat() = "JSON" and sink = decoding.getOutput())
}
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
}
}
/**
* A taint-tracking configuration for detecting NoSQL injections (previously converted to a dict).
*/
class FromDataDictToSink extends TaintTracking2::Configuration {
FromDataDictToSink() { this = "FromDataDictToSink" }
override predicate isSource(DataFlow::Node source) {
exists(Decoding decoding | decoding.getFormat() = "JSON" and source = decoding.getOutput())
}
override predicate isSink(DataFlow::Node sink) { sink = any(NoSQLQuery noSQLQuery).getQuery() }
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
}
}
/**
* A predicate checking string-to-dict conversion and its arrival to a NoSQL injection sink.
*/
predicate noSQLInjectionFlow(CustomPathNode source, CustomPathNode sink) {
exists(
RFSToDictConfig config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
FromDataDictToSink config2
|
config.hasFlowPath(source.asNode1(), mid1) and
config2.hasFlowPath(mid2, sink.asNode2()) and
mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
)
}

View File

@@ -0,0 +1,53 @@
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*/
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* A class to find methods executing regular expressions.
*
* See `RegexExecution`
*/
class RegexInjectionSink extends DataFlow::Node {
string regexModule;
Attribute regexMethod;
RegexInjectionSink() {
exists(RegexExecution reExec |
this = reExec.getRegexNode() and
regexModule = reExec.getRegexModule() and
regexMethod = reExec.(DataFlow::CallCfgNode).getFunction().asExpr().(Attribute)
)
}
/**
* Gets the argument containing the executed expression.
*/
string getRegexModule() { result = regexModule }
/**
* Gets the method used to execute the regular expression.
*/
Attribute getRegexMethod() { result = regexMethod }
}
/**
* A taint-tracking configuration for detecting regular expression injections.
*/
class RegexInjectionFlowConfig extends TaintTracking::Configuration {
RegexInjectionFlowConfig() { this = "RegexInjectionFlowConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(RegexEscape reEscape).getRegexNode()
}
}

View File

@@ -1,115 +0,0 @@
/**
* Provides class and predicates to track external data that
* may represent malicious xpath query objects.
*
* This module is intended to be imported into a taint-tracking query
* to extend `TaintKind` and `TaintSink`.
*/
import python
import semmle.python.dataflow.TaintTracking
import semmle.python.web.HttpRequest
/** Models Xpath Injection related classes and functions */
module XpathInjection {
/** Returns a class value which refers to `lxml.etree` */
Value etree() { result = Value::named("lxml.etree") }
/** Returns a class value which refers to `lxml.etree` */
Value libxml2parseFile() { result = Value::named("libxml2.parseFile") }
/** A generic taint sink that is vulnerable to Xpath injection. */
abstract class XpathInjectionSink extends TaintSink { }
/**
* A Sink representing an argument to the `etree.XPath` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.XPath("`sink`")
*/
private class EtreeXpathArgument extends XpathInjectionSink {
override string toString() { result = "lxml.etree.XPath" }
EtreeXpathArgument() {
exists(CallNode call | call.getFunction().(AttrNode).getObject("XPath").pointsTo(etree()) |
call.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
/**
* A Sink representing an argument to the `etree.EtXpath` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.EtXPath("`sink`")
*/
private class EtreeETXpathArgument extends XpathInjectionSink {
override string toString() { result = "lxml.etree.ETXpath" }
EtreeETXpathArgument() {
exists(CallNode call | call.getFunction().(AttrNode).getObject("ETXPath").pointsTo(etree()) |
call.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
/**
* A Sink representing an argument to the `xpath` call to a parsed xml document.
*
* from lxml import etree
* from io import StringIO
* f = StringIO('<foo><bar></bar></foo>')
* tree = etree.parse(f)
* r = tree.xpath('`sink`')
*/
private class ParseXpathArgument extends XpathInjectionSink {
override string toString() { result = "lxml.etree.parse.xpath" }
ParseXpathArgument() {
exists(
CallNode parseCall, CallNode xpathCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
parseCall.getFunction().(AttrNode).getObject("parse").pointsTo(etree()) and
assign.getValue().(Call).getAFlowNode() = parseCall and
xpathCall.getFunction().(AttrNode).getObject("xpath") = obj and
var.getAUse() = obj and
assign.getATarget() = var.getAStore() and
xpathCall.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
/**
* A Sink representing an argument to the `xpathEval` call to a parsed libxml2 document.
*
* import libxml2
* tree = libxml2.parseFile("file.xml")
* r = tree.xpathEval('`sink`')
*/
private class ParseFileXpathEvalArgument extends XpathInjectionSink {
override string toString() { result = "libxml2.parseFile.xpathEval" }
ParseFileXpathEvalArgument() {
exists(
CallNode parseCall, CallNode xpathCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
parseCall.getFunction().(AttrNode).pointsTo(libxml2parseFile()) and
assign.getValue().(Call).getAFlowNode() = parseCall and
xpathCall.getFunction().(AttrNode).getObject("xpathEval") = obj and
var.getAUse() = obj and
assign.getATarget() = var.getAStore() and
xpathCall.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
}

View File

@@ -1,420 +0,0 @@
/** Step Summaries and Type Tracking */
private import TypeTrackerSpecific
/**
* Any string that may appear as the name of a piece of content. This will usually include things like:
* - Attribute names (in Python)
* - Property names (in JavaScript)
*
* In general, this can also be used to model things like stores to specific list indices. To ensure
* correctness, it is important that
*
* - different types of content do not have overlapping names, and
* - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of
* content instead.
*/
class ContentName extends string {
ContentName() { this = getPossibleContentName() }
}
/** Either a content name, or the empty string (representing no content). */
class OptionalContentName extends string {
OptionalContentName() { this instanceof ContentName or this = "" }
}
/**
* A description of a step on an inter-procedural data flow path.
*/
private newtype TStepSummary =
LevelStep() or
CallStep() or
ReturnStep() or
StoreStep(ContentName content) or
LoadStep(ContentName content)
/**
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
*
* A description of a step on an inter-procedural data flow path.
*/
class StepSummary extends TStepSummary {
/** Gets a textual representation of this step summary. */
string toString() {
this instanceof LevelStep and result = "level"
or
this instanceof CallStep and result = "call"
or
this instanceof ReturnStep and result = "return"
or
exists(string content | this = StoreStep(content) | result = "store " + content)
or
exists(string content | this = LoadStep(content) | result = "load " + content)
}
}
/** Provides predicates for updating step summaries (`StepSummary`s). */
module StepSummary {
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*/
cached
predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
exists(Node mid | nodeFrom.flowsTo(mid) and smallstep(mid, nodeTo, summary))
}
/**
* Gets the summary that corresponds to having taken a forwards
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*
* Unlike `StepSummary::step`, this predicate does not compress
* type-preserving steps.
*/
predicate smallstep(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
jumpStep(nodeFrom, nodeTo) and
summary = LevelStep()
or
callStep(nodeFrom, nodeTo) and summary = CallStep()
or
returnStep(nodeFrom, nodeTo) and
summary = ReturnStep()
or
exists(string content |
localSourceStoreStep(nodeFrom, nodeTo, content) and
summary = StoreStep(content)
or
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
)
}
/**
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
*
* Note that `nodeTo` will always be a local source node that flows to the place where the content
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
* from the point of view of the execution of the program.
*
* For instance, if we interpret attribute writes in Python as writing to content with the same
* name as the attribute and consider the following snippet
*
* ```python
* def foo(y):
* x = Foo()
* bar(x)
* x.attr = y
* baz(x)
*
* def bar(x):
* z = x.attr
* ```
* for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
predicate localSourceStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
}
}
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
/**
* Summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
* recognize calls to that API. Note that type-tracking does not by itself provide a
* source/sink relation, that is, it may determine that a node has a given type,
* but it won't determine where that type came from.
*
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```ql
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (DataFlow::TypeTracker t2 |
* result = myType(t2).track(t2, t)
* )
* }
*
* DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
* ```
*
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
*/
class TypeTracker extends TTypeTracker {
Boolean hasCall;
OptionalContentName content;
TypeTracker() { this = MkTypeTracker(hasCall, content) }
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
cached
TypeTracker append(StepSummary step) {
step = LevelStep() and result = this
or
step = CallStep() and result = MkTypeTracker(true, content)
or
step = ReturnStep() and hasCall = false and result = this
or
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
or
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
}
/** Gets a textual representation of this summary. */
string toString() {
exists(string withCall, string withContent |
(if hasCall = true then withCall = "with" else withCall = "without") and
(if content != "" then withContent = " with content " + content else withContent = "") and
result = "type tracker " + withCall + " call steps" + withContent
)
}
/**
* Holds if this is the starting point of type tracking.
*/
predicate start() { hasCall = false and content = "" }
/**
* Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
* The type tracking only ends after the content has been loaded.
*/
predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
/**
* Holds if this is the starting point of type tracking
* when tracking a parameter into a call, but not out of it.
*/
predicate call() { hasCall = true and content = "" }
/**
* Holds if this is the end point of type tracking.
*/
predicate end() { content = "" }
/**
* INTERNAL. DO NOT USE.
*
* Holds if this type has been tracked into a call.
*/
boolean hasCall() { result = hasCall }
/**
* INTERNAL. DO NOT USE.
*
* Gets the content associated with this type tracker.
*/
string getContent() { result = content }
/**
* Gets a type tracker that starts where this one has left off to allow continued
* tracking.
*
* This predicate is only defined if the type is not associated to a piece of content.
*/
TypeTracker continue() { content = "" and result = this }
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*/
pragma[inline]
TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
exists(StepSummary summary |
StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
result = this.append(pragma[only_bind_into](summary))
)
}
/**
* Gets the summary that corresponds to having taken a forwards
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*
* Unlike `TypeTracker::step`, this predicate exposes all edges
* in the flow graph, and not just the edges between `Node`s.
* It may therefore be less performant.
*
* Type tracking predicates using small steps typically take the following form:
* ```ql
* DataFlow::Node myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (DataFlow::TypeTracker t2 |
* t = t2.smallstep(myType(t2), result)
* )
* }
*
* DataFlow::Node myType() {
* result = myType(DataFlow::TypeTracker::end())
* }
* ```
*/
pragma[inline]
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
result = this.append(summary)
)
or
simpleLocalFlowStep(nodeFrom, nodeTo) and
result = this
}
}
/** Provides predicates for implementing custom `TypeTracker`s. */
module TypeTracker {
/**
* Gets a valid end point of type tracking.
*/
TypeTracker end() { result.end() }
}
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
/**
* Summary of the steps needed to back-track a use of a value to a given dataflow node.
*
* This can for example be used to track callbacks that are passed to a certain API,
* so we can model specific parameters of that callback as having a certain type.
*
* Note that type back-tracking does not provide a source/sink relation, that is,
* it may determine that a node will be used in an API call somewhere, but it won't
* determine exactly where that use was, or the path that led to the use.
*
* It is recommended that all uses of this type are written in the following form,
* for back-tracking some callback type `myCallback`:
*
* ```ql
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
* t.start() and
* result = (< some API call >).getArgument(< n >).getALocalSource()
* or
* exists (DataFlow::TypeBackTracker t2 |
* result = myCallback(t2).backtrack(t2, t)
* )
* }
*
* DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
* ```
*
* Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
* `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
* intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
*/
class TypeBackTracker extends TTypeBackTracker {
Boolean hasReturn;
string content;
TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
TypeBackTracker prepend(StepSummary step) {
step = LevelStep() and result = this
or
step = CallStep() and hasReturn = false and result = this
or
step = ReturnStep() and result = MkTypeBackTracker(true, content)
or
exists(string p |
step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
)
or
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
}
/** Gets a textual representation of this summary. */
string toString() {
exists(string withReturn, string withContent |
(if hasReturn = true then withReturn = "with" else withReturn = "without") and
(if content != "" then withContent = " with content " + content else withContent = "") and
result = "type back-tracker " + withReturn + " return steps" + withContent
)
}
/**
* Holds if this is the starting point of type tracking.
*/
predicate start() { hasReturn = false and content = "" }
/**
* Holds if this is the end point of type tracking.
*/
predicate end() { content = "" }
/**
* INTERNAL. DO NOT USE.
*
* Holds if this type has been back-tracked into a call through return edge.
*/
boolean hasReturn() { result = hasReturn }
/**
* Gets a type tracker that starts where this one has left off to allow continued
* tracking.
*
* This predicate is only defined if the type has not been tracked into a piece of content.
*/
TypeBackTracker continue() { content = "" and result = this }
/**
* Gets the summary that corresponds to having taken a backwards
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
*/
pragma[inline]
TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
exists(StepSummary summary |
StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
this = result.prepend(pragma[only_bind_into](summary))
)
}
/**
* Gets the summary that corresponds to having taken a backwards
* local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
*
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
* in the flowgraph, and not just the edges between
* `LocalSourceNode`s. It may therefore be less performant.
*
* Type tracking predicates using small steps typically take the following form:
* ```ql
* DataFlow::Node myType(DataFlow::TypeBackTracker t) {
* t.start() and
* result = < some API call >.getArgument(< n >)
* or
* exists (DataFlow::TypeBackTracker t2 |
* t = t2.smallstep(result, myType(t2))
* )
* }
*
* DataFlow::Node myType() {
* result = myType(DataFlow::TypeBackTracker::end())
* }
* ```
*/
pragma[inline]
TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
this = result.prepend(summary)
)
or
simpleLocalFlowStep(nodeFrom, nodeTo) and
this = result
}
}
/** Provides predicates for implementing custom `TypeBackTracker`s. */
module TypeBackTracker {
/**
* Gets a valid end point of type back-tracking.
*/
TypeBackTracker end() { result.end() }
}

Some files were not shown because too many files have changed in this diff Show More