Merge branch 'main' into peewee-modeling

This commit is contained in:
Rasmus Wriedt Larsen
2021-08-17 12:02:33 +02:00
2085 changed files with 77183 additions and 20087 deletions

View File

@@ -27,7 +27,7 @@ private string vulnerableHostname() {
}
/** Gets a reference to a hostname that can be used to bind to all interfaces. */
private DataFlow::LocalSourceNode vulnerableHostnameRef(DataFlow::TypeTracker t, string hostname) {
private DataFlow::TypeTrackingNode vulnerableHostnameRef(DataFlow::TypeTracker t, string hostname) {
t.start() and
exists(StrConst allInterfacesStrConst | hostname = vulnerableHostname() |
allInterfacesStrConst.getText() = hostname and
@@ -43,7 +43,7 @@ DataFlow::Node vulnerableHostnameRef(string hostname) {
}
/** Gets a reference to a tuple for which the first element is a hostname that can be used to bind to all interfaces. */
private DataFlow::LocalSourceNode vulnerableAddressTuple(DataFlow::TypeTracker t, string hostname) {
private DataFlow::TypeTrackingNode vulnerableAddressTuple(DataFlow::TypeTracker t, string hostname) {
t.start() and
result.asExpr() = any(Tuple tup | tup.getElt(0) = vulnerableHostnameRef(hostname).asExpr())
or

View File

@@ -19,7 +19,7 @@ import python
import semmle.python.security.dataflow.CommandInjection
import DataFlow::PathGraph
from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from CommandInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -17,7 +17,7 @@ import python
import semmle.python.security.dataflow.ReflectedXSS
import DataFlow::PathGraph
from ReflectedXssConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from ReflectedXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
source.getNode(), "a user-provided value"

View File

@@ -16,7 +16,7 @@ import python
import semmle.python.security.dataflow.SqlInjection
import DataFlow::PathGraph
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from SqlInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -19,7 +19,7 @@ import python
import semmle.python.security.dataflow.CodeInjection
import DataFlow::PathGraph
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from CodeInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
source.getNode(), "A user-provided value"

View File

@@ -17,7 +17,7 @@ import python
import semmle.python.security.dataflow.StackTraceExposure
import DataFlow::PathGraph
from StackTraceExposureConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from StackTraceExposure::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ may be exposed to an external user", source.getNode(),
"Error information"

View File

@@ -17,7 +17,7 @@ import semmle.python.ApiGraphs
import semmle.python.frameworks.Flask
/** Gets a reference to a truthy literal. */
private DataFlow::LocalSourceNode truthyLiteral(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode truthyLiteral(DataFlow::TypeTracker t) {
t.start() and
result.asExpr().(ImmutableLiteral).booleanValue() = true
or

View File

@@ -7,7 +7,7 @@
<p>
Encryption is key to the security of most, if not all, online communication.
Using Transport Layer Security (TLS) can ensure that communication cannot be interrupted by an interloper.
For this reason, is is unwise to disable the verification that TLS provides.
For this reason, it is unwise to disable the verification that TLS provides.
Functions in the <code>requests</code> module provide verification by default, and it is only when
explicitly turned off using <code>verify=False</code> that no verification occurs.
</p>

View File

@@ -14,25 +14,13 @@
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
private import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
import semmle.python.security.dataflow.CleartextLogging::CleartextLogging
class CleartextLoggingConfiguration extends TaintTracking::Configuration {
CleartextLoggingConfiguration() { this = "ClearTextLogging" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextLogging::Sink and
kind instanceof SensitiveData
}
}
from CleartextLoggingConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data returned by $@ is logged here.",
source.getSource(), source.getCfgNode().(SensitiveData::Source).repr()
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, string classification
where
config.hasFlowPath(source, sink) and
classification = source.getNode().(Source).getClassification()
select sink.getNode(), source, sink, "$@ is logged here.", source.getNode(),
"Sensitive data (" + classification + ")"

View File

@@ -14,25 +14,13 @@
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
private import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
import semmle.python.security.dataflow.CleartextStorage::CleartextStorage
class CleartextStorageConfiguration extends TaintTracking::Configuration {
CleartextStorageConfiguration() { this = "ClearTextStorage" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextStorage::Sink and
kind instanceof SensitiveData
}
}
from CleartextStorageConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data from $@ is stored here.", source.getSource(),
source.getCfgNode().(SensitiveData::Source).repr()
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, string classification
where
config.hasFlowPath(source, sink) and
classification = source.getNode().(Source).getClassification()
select sink.getNode(), source, sink, "$@ is stored here.", source.getNode(),
"Sensitive data (" + classification + ")"

View File

@@ -16,6 +16,6 @@ import python
import semmle.python.security.dataflow.UnsafeDeserialization
import DataFlow::PathGraph
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from UnsafeDeserialization::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"

View File

@@ -16,7 +16,7 @@ import python
import semmle.python.security.dataflow.UrlRedirect
import DataFlow::PathGraph
from UrlRedirectConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
from UrlRedirect::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Untrusted URL redirection due to $@.", source.getNode(),
"A user-provided value"

View File

@@ -0,0 +1,33 @@
/**
* @name Clear-text logging of sensitive information
* @description OLD QUERY: Logging sensitive information without encryption or hashing can
* expose it to an attacker.
* @kind path-problem
* @problem.severity error
* @id py/old/clear-text-logging-sensitive-data
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
class CleartextLoggingConfiguration extends TaintTracking::Configuration {
CleartextLoggingConfiguration() { this = "ClearTextLogging" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextLogging::Sink and
kind instanceof SensitiveData
}
}
from CleartextLoggingConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data returned by $@ is logged here.",
source.getSource(), source.getCfgNode().(SensitiveData::Source).repr()

View File

@@ -0,0 +1,33 @@
/**
* @name Clear-text storage of sensitive information
* @description OLD QUERY: Sensitive information stored without encryption or hashing can expose it to an
* attacker.
* @kind path-problem
* @problem.severity error
* @id py/old/clear-text-storage-sensitive-data
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
class CleartextStorageConfiguration extends TaintTracking::Configuration {
CleartextStorageConfiguration() { this = "ClearTextStorage" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextStorage::Sink and
kind instanceof SensitiveData
}
}
from CleartextStorageConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data from $@ is stored here.", source.getSource(),
source.getCfgNode().(SensitiveData::Source).repr()

View File

@@ -0,0 +1,31 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>If an LDAP query doesn't carry any kind of authentication, anonymous binds causes an empty or None-set password
to result in a successful authentication.</p>
</overview>
<recommendation>
<p>Use a non-empty password while establishing an LDAP connection.</p>
</recommendation>
<example>
<p>In the following examples, the code builds a LDAP query whose execution carries no authentication or binds anonymously.</p>
<sample src="examples/auth_bad_2.py" />
<sample src="examples/auth_bad_3.py" />
<p>In the third and fourth examples, the authentication is established using a password.</p>
<sample src="examples/auth_good_2.py" />
<sample src="examples/auth_good_3.py" />
</example>
<references>
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-4433">RSPEC-4433</a>.</li>
<li>Python2: <a href="https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html">LDAP Documentation</a>.</li>
<li>Python3: <a href="https://ldap3.readthedocs.io/en/latest/">LDAP Documentation</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,31 @@
/**
* @name Improper LDAP Authentication
* @description A user-controlled query carries no authentication
* @kind problem
* @problem.severity warning
* @id py/improper-ldap-auth
* @tags experimental
* security
* external/cwe/cwe-287
*/
// Determine precision above
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
predicate authenticatesImproperly(LDAPBind ldapBind) {
(
DataFlow::localFlow(DataFlow::exprNode(any(None noneName)), ldapBind.getPassword()) or
not exists(ldapBind.getPassword())
)
or
exists(StrConst emptyString |
emptyString.getText() = "" and
DataFlow::localFlow(DataFlow::exprNode(emptyString), ldapBind.getPassword())
)
}
from LDAPBind ldapBind
where authenticatesImproperly(ldapBind)
select ldapBind, "The following LDAP bind operation is executed without authentication"

View File

@@ -0,0 +1,14 @@
from flask import request, Flask
import ldap
import ldap.filter
import ldap.dn
@app.route("/bind_example")
def bind_example():
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind('cn=root', "")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)

View File

@@ -0,0 +1,13 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
from ldap3.utils.dn import escape_rdn
from ldap3.utils.conv import escape_filter_chars
@app.route("/passwordNone")
def passwordNone():
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, user='user_dn', password=None)
status, result, response, _ = conn.search(dn, search_filter)

View File

@@ -0,0 +1,14 @@
from flask import request, Flask
import ldap
import ldap.filter
import ldap.dn
@app.route("/bind_example")
def bind_example():
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind('cn=root', "SecurePa$$!")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)

View File

@@ -0,0 +1,14 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
from ldap3.utils.dn import escape_rdn
from ldap3.utils.conv import escape_filter_chars
@app.route("/passwordFromEnv")
def passwordFromEnv():
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, user='user_dn',
password="SecurePa$$!")
status, result, response, _ = conn.search(dn, search_filter)

View File

@@ -0,0 +1,6 @@
import python
import semmle.python.security.performance.SuperlinearBackTracking
from PolynomialBackTrackingTerm t
where t.getLocation().getFile().getBaseName() = "KnownCVEs.py"
select t.getRegex(), t, t.getReason()

View File

@@ -0,0 +1,108 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<include src="ReDoSIntroduction.inc.qhelp" />
<example>
<p>
Consider this use of a regular expression, which removes
all leading and trailing whitespace in a string:
</p>
<sample language="python">
re.sub(r"^\s+|\s+$", "", text) # BAD
</sample>
<p>
The sub-expression <code>"\s+$"</code> will match the
whitespace characters in <code>text</code> from left to right, but it
can start matching anywhere within a whitespace sequence. This is
problematic for strings that do <strong>not</strong> end with a whitespace
character. Such a string will force the regular expression engine to
process each whitespace sequence once per whitespace character in the
sequence.
</p>
<p>
This ultimately means that the time cost of trimming a
string is quadratic in the length of the string. So a string like
<code>"a b"</code> will take milliseconds to process, but a similar
string with a million spaces instead of just one will take several
minutes.
</p>
<p>
Avoid this problem by rewriting the regular expression to
not contain the ambiguity about when to start matching whitespace
sequences. For instance, by using a negative look-behind
(<code>^\s+|(?&lt;!\s)\s+$</code>), or just by using the built-in strip
method (<code>text.strip()</code>).
</p>
<p>
Note that the sub-expression <code>"^\s+"</code> is
<strong>not</strong> problematic as the <code>^</code> anchor restricts
when that sub-expression can start matching, and as the regular
expression engine matches from left to right.
</p>
</example>
<example>
<p>
As a similar, but slightly subtler problem, consider the
regular expression that matches lines with numbers, possibly written
using scientific notation:
</p>
<sample language="python">
^0\.\d+E?\d+$ # BAD
</sample>
<p>
The problem with this regular expression is in the
sub-expression <code>\d+E?\d+</code> because the second
<code>\d+</code> can start matching digits anywhere after the first
match of the first <code>\d+</code> if there is no <code>E</code> in
the input string.
</p>
<p>
This is problematic for strings that do <strong>not</strong>
end with a digit. Such a string will force the regular expression
engine to process each digit sequence once per digit in the sequence,
again leading to a quadratic time complexity.
</p>
<p>
To make the processing faster, the regular expression
should be rewritten such that the two <code>\d+</code> sub-expressions
do not have overlapping matches: <code>^0\.\d+(E\d+)?$</code>.
</p>
</example>
<include src="ReDoSReferences.inc.qhelp"/>
</qhelp>

View File

@@ -0,0 +1,33 @@
/**
* @name Polynomial regular expression used on uncontrolled data
* @description A regular expression that can require polynomial time
* to match may be vulnerable to denial-of-service attacks.
* @kind path-problem
* @problem.severity warning
* @precision high
* @id py/polynomial-redos
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
import python
import semmle.python.security.performance.SuperlinearBackTracking
import semmle.python.security.dataflow.PolynomialReDoS
import DataFlow::PathGraph
from
PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
PolynomialReDoS::Sink sinkNode, PolynomialBackTrackingTerm regexp
where
config.hasFlowPath(source, sink) and
sinkNode = sink.getNode() and
regexp.getRootTerm() = sinkNode.getRegExp()
// not (
// source.getNode().(Source).getKind() = "url" and
// regexp.isAtEndLine()
// )
select sinkNode.getHighlight(), source, sink,
"This $@ that depends on $@ may run slow on strings " + regexp.getPrefixMessage() +
"with many repetitions of '" + regexp.getPumpString() + "'.", regexp, "regular expression",
source.getNode(), "a user-provided value"

View File

@@ -0,0 +1,34 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<include src="ReDoSIntroduction.inc.qhelp" />
<example>
<p>
Consider this regular expression:
</p>
<sample language="python">
^_(__|.)+_$
</sample>
<p>
Its sub-expression <code>"(__|.)+?"</code> can match the string <code>"__"</code> either by the
first alternative <code>"__"</code> to the left of the <code>"|"</code> operator, or by two
repetitions of the second alternative <code>"."</code> to the right. Thus, a string consisting
of an odd number of underscores followed by some other character will cause the regular
expression engine to run for an exponential amount of time before rejecting the input.
</p>
<p>
This problem can be avoided by rewriting the regular expression to remove the ambiguity between
the two branches of the alternative inside the repetition:
</p>
<sample language="python">
^_(__|[^_])+_$
</sample>
</example>
<include src="ReDoSReferences.inc.qhelp"/>
</qhelp>

View File

@@ -0,0 +1,25 @@
/**
* @name Inefficient regular expression
* @description A regular expression that requires exponential time to match certain inputs
* can be a performance bottleneck, and may be vulnerable to denial-of-service
* attacks.
* @kind problem
* @problem.severity error
* @precision high
* @id py/redos
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
import python
import semmle.python.security.performance.ExponentialBackTracking
from RegExpTerm t, string pump, State s, string prefixMsg
where
hasReDoSResult(t, pump, s, prefixMsg) and
// exclude verbose mode regexes for now
not t.getRegex().getAMode() = "VERBOSE"
select t,
"This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
"containing many repetitions of '" + pump + "'."

View File

@@ -0,0 +1,54 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Some regular expressions take a long time to match certain
input strings to the point where the time it takes to match a string
of length <i>n</i> is proportional to <i>n<sup>k</sup></i> or even
<i>2<sup>n</sup></i>. Such regular expressions can negatively affect
performance, or even allow a malicious user to perform a Denial of
Service ("DoS") attack by crafting an expensive input string for the
regular expression to match.
</p>
<p>
The regular expression engine provided by Python uses a backtracking non-deterministic finite
automata to implement regular expression matching. While this approach
is space-efficient and allows supporting advanced features like
capture groups, it is not time-efficient in general. The worst-case
time complexity of such an automaton can be polynomial or even
exponential, meaning that for strings of a certain shape, increasing
the input length by ten characters may make the automaton about 1000
times slower.
</p>
<p>
Typically, a regular expression is affected by this
problem if it contains a repetition of the form <code>r*</code> or
<code>r+</code> where the sub-expression <code>r</code> is ambiguous
in the sense that it can match some string in multiple ways. More
information about the precise circumstances can be found in the
references.
</p>
</overview>
<recommendation>
<p>
Modify the regular expression to remove the ambiguity, or
ensure that the strings matched with the regular expression are short
enough that the time-complexity does not matter.
</p>
</recommendation>
</qhelp>

View File

@@ -0,0 +1,16 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<references>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.
</li>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Time_complexity">Time complexity</a>.</li>
<li>James Kirrage, Asiri Rathnayake, Hayo Thielecke:
<a href="http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf">Static Analysis for Regular Expression Denial-of-Service Attack</a>.
</li>
</references>
</qhelp>

View File

@@ -146,3 +146,66 @@ class LDAPEscape extends DataFlow::Node {
*/
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides classes for modeling LDAP bind-related APIs. */
module LDAPBind {
/**
* A data-flow node that collects methods binding a LDAP connection.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LDAPBind` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the binding expression.
*/
abstract DataFlow::Node getPassword();
}
}
/**
* A data-flow node that collects methods binding a LDAP connection.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPBind::Range` instead.
*/
class LDAPBind extends DataFlow::Node {
LDAPBind::Range range;
LDAPBind() { this = range }
DataFlow::Node getPassword() { result = range.getPassword() }
}
/** Provides classes for modeling SQL sanitization libraries. */
module SQLEscape {
/**
* A data-flow node that collects functions that escape SQL statements.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SQLEscape` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the raw SQL statement.
*/
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that collects functions escaping SQL statements.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SQLEscape::Range` instead.
*/
class SQLEscape extends DataFlow::Node {
SQLEscape::Range range;
SQLEscape() { this = range }
/**
* Gets the argument containing the raw SQL statement.
*/
DataFlow::Node getAnInput() { result = range.getAnInput() }
}

View File

@@ -19,6 +19,20 @@ private module LDAP {
* See https://www.python-ldap.org/en/python-ldap-3.3.0/index.html
*/
private module LDAP2 {
/** Gets a reference to the `ldap` module. */
API::Node ldap() { result = API::moduleImport("ldap") }
/** Returns a `ldap` module instance */
API::Node ldapInitialize() { result = ldap().getMember("initialize") }
/** Gets a reference to a `ldap` operation. */
private DataFlow::TypeTrackingNode ldapOperation(DataFlow::TypeTracker t) {
t.start() and
result.(DataFlow::AttrRead).getObject().getALocalSource() = ldapInitialize().getACall()
or
exists(DataFlow::TypeTracker t2 | result = ldapOperation(t2).track(t2, t))
}
/**
* List of `ldap` methods used to execute a query.
*
@@ -30,32 +44,61 @@ private module LDAP {
}
}
/** Gets a reference to a `ldap` operation. */
private DataFlow::Node ldapOperation() {
ldapOperation(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** Gets a reference to a `ldap` query. */
private DataFlow::Node ldapQuery() {
result = ldapOperation() and
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2QueryMethods
}
/**
* A class to find `ldap` methods executing a query.
*
* See `LDAP2QueryMethods`
*/
private class LDAP2Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
DataFlow::Node ldapQuery;
LDAP2Query() { this.getFunction() = ldapQuery() }
LDAP2Query() {
exists(DataFlow::AttrRead searchMethod |
this.getFunction() = searchMethod and
API::moduleImport("ldap").getMember("initialize").getACall() =
searchMethod.getObject().getALocalSource() and
searchMethod.getAttributeName() instanceof LDAP2QueryMethods and
(
ldapQuery = this.getArg(0)
or
(
ldapQuery = this.getArg(2) or
ldapQuery = this.getArgByName("filterstr")
)
)
)
override DataFlow::Node getQuery() {
result in [this.getArg(0), this.getArg(2), this.getArgByName("filterstr")]
}
}
override DataFlow::Node getQuery() { result = ldapQuery }
/**
* List of `ldap` methods used for binding.
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
*/
private class LDAP2BindMethods extends string {
LDAP2BindMethods() {
this in [
"bind", "bind_s", "simple_bind", "simple_bind_s", "sasl_interactive_bind_s",
"sasl_non_interactive_bind_s", "sasl_external_bind_s", "sasl_gssapi_bind_s"
]
}
}
/** Gets a reference to a `ldap` bind. */
private DataFlow::Node ldapBind() {
result = ldapOperation() and
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2BindMethods
}
/**
* A class to find `ldap` methods binding a connection.
*
* See `LDAP2BindMethods`
*/
private class LDAP2Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
LDAP2Bind() { this.getFunction() = ldapBind() }
override DataFlow::Node getPassword() {
result in [this.getArg(1), this.getArgByName("cred")]
}
}
/**
@@ -64,9 +107,7 @@ private module LDAP {
* See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
*/
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP2EscapeDNCall() {
this = API::moduleImport("ldap").getMember("dn").getMember("escape_dn_chars").getACall()
}
LDAP2EscapeDNCall() { this = ldap().getMember("dn").getMember("escape_dn_chars").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
@@ -78,8 +119,7 @@ private module LDAP {
*/
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP2EscapeFilterCall() {
this =
API::moduleImport("ldap").getMember("filter").getMember("escape_filter_chars").getACall()
this = ldap().getMember("filter").getMember("escape_filter_chars").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
@@ -92,26 +132,40 @@ private module LDAP {
* See https://pypi.org/project/ldap3/
*/
private module LDAP3 {
/** Gets a reference to the `ldap3` module. */
API::Node ldap3() { result = API::moduleImport("ldap3") }
/** Gets a reference to the `ldap3` `utils` module. */
API::Node ldap3Utils() { result = ldap3().getMember("utils") }
/** Returns a `ldap3` module `Server` instance */
API::Node ldap3Server() { result = ldap3().getMember("Server") }
/** Returns a `ldap3` module `Connection` instance */
API::Node ldap3Connection() { result = ldap3().getMember("Connection") }
/**
* A class to find `ldap3` methods executing a query.
*/
private class LDAP3Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
DataFlow::Node ldapQuery;
LDAP3Query() {
exists(DataFlow::AttrRead searchMethod |
this.getFunction() = searchMethod and
API::moduleImport("ldap3").getMember("Connection").getACall() =
searchMethod.getObject().getALocalSource() and
searchMethod.getAttributeName() = "search" and
(
ldapQuery = this.getArg(0) or
ldapQuery = this.getArg(1)
)
)
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
ldap3Connection().getACall() and
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "search"
}
override DataFlow::Node getQuery() { result = ldapQuery }
override DataFlow::Node getQuery() { result in [this.getArg(0), this.getArg(1)] }
}
/**
* A class to find `ldap3` methods binding a connection.
*/
class LDAP3Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
LDAP3Bind() { this = ldap3Connection().getACall() }
override DataFlow::Node getPassword() {
result in [this.getArg(2), this.getArgByName("password")]
}
}
/**
@@ -120,14 +174,7 @@ private module LDAP {
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
*/
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP3EscapeDNCall() {
this =
API::moduleImport("ldap3")
.getMember("utils")
.getMember("dn")
.getMember("escape_rdn")
.getACall()
}
LDAP3EscapeDNCall() { this = ldap3Utils().getMember("dn").getMember("escape_rdn").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
@@ -139,12 +186,7 @@ private module LDAP {
*/
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
LDAP3EscapeFilterCall() {
this =
API::moduleImport("ldap3")
.getMember("utils")
.getMember("conv")
.getMember("escape_filter_chars")
.getACall()
this = ldap3Utils().getMember("conv").getMember("escape_filter_chars").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }

View File

@@ -0,0 +1,148 @@
/**
* Provides classes modeling security-relevant aspects of the 'SqlAlchemy' package.
* See https://pypi.org/project/SQLAlchemy/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.ApiGraphs
private import semmle.python.Concepts
private import experimental.semmle.python.Concepts
private module SqlAlchemy {
/**
* Returns an instantization of a SqlAlchemy Session object.
* See https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session and
* https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.sessionmaker
*/
private API::Node getSqlAlchemySessionInstance() {
result = API::moduleImport("sqlalchemy.orm").getMember("Session").getReturn() or
result = API::moduleImport("sqlalchemy.orm").getMember("sessionmaker").getReturn().getReturn()
}
/**
* Returns an instantization of a SqlAlchemy Engine object.
* See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine
*/
private API::Node getSqlAlchemyEngineInstance() {
result = API::moduleImport("sqlalchemy").getMember("create_engine").getReturn()
}
/**
* Returns an instantization of a SqlAlchemy Query object.
* See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query
*/
private API::Node getSqlAlchemyQueryInstance() {
result = getSqlAlchemySessionInstance().getMember("query").getReturn()
}
/**
* A call to `execute` meant to execute an SQL expression
* See the following links:
* - https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=execute#sqlalchemy.engine.Connection.execute
* - https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=execute#sqlalchemy.engine.Engine.execute
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html?highlight=execute#sqlalchemy.orm.Session.execute
*/
private class SqlAlchemyExecuteCall extends DataFlow::CallCfgNode, SqlExecution::Range {
SqlAlchemyExecuteCall() {
// new way
this = getSqlAlchemySessionInstance().getMember("execute").getACall() or
this =
getSqlAlchemyEngineInstance()
.getMember(["connect", "begin"])
.getReturn()
.getMember("execute")
.getACall()
}
override DataFlow::Node getSql() { result = this.getArg(0) }
}
/**
* A call to `scalar` meant to execute an SQL expression
* See https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session.scalar and
* https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=scalar#sqlalchemy.engine.Engine.scalar
*/
private class SqlAlchemyScalarCall extends DataFlow::CallCfgNode, SqlExecution::Range {
SqlAlchemyScalarCall() {
this =
[getSqlAlchemySessionInstance(), getSqlAlchemyEngineInstance()]
.getMember("scalar")
.getACall()
}
override DataFlow::Node getSql() { result = this.getArg(0) }
}
/**
* A call on a Query object
* See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query
*/
private class SqlAlchemyQueryCall extends DataFlow::CallCfgNode, SqlExecution::Range {
SqlAlchemyQueryCall() {
this =
getSqlAlchemyQueryInstance()
.getMember(any(SqlAlchemyVulnerableMethodNames methodName))
.getACall()
}
override DataFlow::Node getSql() { result = this.getArg(0) }
}
/**
* This class represents a list of methods vulnerable to sql injection.
*
* See https://github.com/jty-team/codeql/pull/2#issue-611592361
*/
private class SqlAlchemyVulnerableMethodNames extends string {
SqlAlchemyVulnerableMethodNames() { this in ["filter", "filter_by", "group_by", "order_by"] }
}
/**
* Additional taint-steps for `sqlalchemy.text()`
*
* See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text
* See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause
*/
class SqlAlchemyTextAdditionalTaintSteps extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::CallCfgNode call |
(
call = API::moduleImport("sqlalchemy").getMember("text").getACall()
or
call = API::moduleImport("sqlalchemy").getMember("sql").getMember("text").getACall()
or
call =
API::moduleImport("sqlalchemy")
.getMember("sql")
.getMember("expression")
.getMember("text")
.getACall()
or
call =
API::moduleImport("sqlalchemy")
.getMember("sql")
.getMember("expression")
.getMember("TextClause")
.getACall()
) and
nodeFrom in [call.getArg(0), call.getArgByName("text")] and
nodeTo = call
)
}
}
/**
* Gets a reference to `sqlescapy.sqlescape`.
*
* See https://pypi.org/project/sqlescapy/
*/
class SQLEscapySanitizerCall extends DataFlow::CallCfgNode, SQLEscape::Range {
SQLEscapySanitizerCall() {
this = API::moduleImport("sqlescapy").getMember("sqlescape").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}

View File

@@ -512,7 +512,7 @@ module API {
*
* The flow from `src` to that node may be inter-procedural.
*/
private DataFlow::LocalSourceNode trackUseNode(
private DataFlow::TypeTrackingNode trackUseNode(
DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
) {
t.start() and
@@ -530,7 +530,6 @@ module API {
cached
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
// We exclude module variable nodes, as these do not correspond to real uses.
not result instanceof DataFlow::ModuleVariableNode
}

View File

@@ -49,12 +49,15 @@ abstract class AstNode extends AstNode_ {
/** Whether this contains `inner` syntactically */
predicate contains(AstNode inner) { this.getAChildNode+() = inner }
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
predicate containsInScope(AstNode inner) {
pragma[noinline]
private predicate containsInScope(AstNode inner, Scope scope) {
this.contains(inner) and
this.getScope() = inner.getScope() and
not inner instanceof Scope
not inner instanceof Scope and
scope = this.getScope()
}
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
predicate containsInScope(AstNode inner) { this.containsInScope(inner, inner.getScope()) }
}
/* Parents */

View File

@@ -4,7 +4,7 @@
* provide concrete subclasses.
*/
import python
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
@@ -72,6 +72,39 @@ module FileSystemAccess {
}
}
/**
* A data flow node that writes data to the file system access.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `FileSystemWriteAccess::Range` instead.
*/
class FileSystemWriteAccess extends FileSystemAccess {
override FileSystemWriteAccess::Range range;
/**
* Gets a node that represents data to be written to the file system (possibly with
* some transformation happening before it is written, like JSON encoding).
*/
DataFlow::Node getADataNode() { result = range.getADataNode() }
}
/** Provides a class for modeling new file system writes. */
module FileSystemWriteAccess {
/**
* A data flow node that writes data to the file system access.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `FileSystemWriteAccess` instead.
*/
abstract class Range extends FileSystemAccess::Range {
/**
* Gets a node that represents data to be written to the file system (possibly with
* some transformation happening before it is written, like JSON encoding).
*/
abstract DataFlow::Node getADataNode();
}
}
/** Provides classes for modeling path-related APIs. */
module Path {
/**
@@ -235,6 +268,35 @@ private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaint
}
}
/**
* A data-flow node that logs data.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Logging::Range` instead.
*/
class Logging extends DataFlow::Node {
Logging::Range range;
Logging() { this = range }
/** Gets an input that is logged. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides a class for modeling new logging mechanisms. */
module Logging {
/**
* A data-flow node that logs data.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Logging` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that is logged. */
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that dynamically executes Python code.
*
@@ -293,6 +355,78 @@ module SqlExecution {
}
}
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Escaping::Range` instead.
*/
class Escaping extends DataFlow::Node {
Escaping::Range range;
Escaping() {
this = range and
// escapes that don't have _both_ input/output defined are not valid
exists(range.getAnInput()) and
exists(range.getOutput())
}
/** Gets an input that will be escaped. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
/** Gets the output that contains the escaped data. */
DataFlow::Node getOutput() { result = range.getOutput() }
/**
* Gets the context that this function escapes for, such as `html`, or `url`.
*/
string getKind() { result = range.getKind() }
}
/** Provides a class for modeling new escaping APIs. */
module Escaping {
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Escaping` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that will be escaped. */
abstract DataFlow::Node getAnInput();
/** Gets the output that contains the escaped data. */
abstract DataFlow::Node getOutput();
/**
* Gets the context that this function escapes for.
*
* While kinds are represented as strings, this should not be relied upon. Use the
* predicates in the `Escaping` module, such as `getHtmlKind`.
*/
abstract string getKind();
}
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getHtmlKind() { result = "html" }
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
//
// Technically it claims to escape for both HTML and XML, but for now we don't have
// anything that relies on XML escaping, so I'm going to defer deciding whether they
// should be the same kind, or whether they deserve to be treated differently.
}
/**
* An escape of a string so it can be safely included in
* the body of an HTML element, for example, replacing `{}` in
* `<p>{}</p>`.
*/
class HtmlEscaping extends Escaping {
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
import semmle.python.web.HttpConstants
@@ -522,6 +656,62 @@ module HTTP {
abstract DataFlow::Node getRedirectLocation();
}
}
/**
* A data-flow node that sets a cookie in an HTTP response.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HTTP::CookieWrite::Range` instead.
*/
class CookieWrite extends DataFlow::Node {
CookieWrite::Range range;
CookieWrite() { this = range }
/**
* Gets the argument, if any, specifying the raw cookie header.
*/
DataFlow::Node getHeaderArg() { result = range.getHeaderArg() }
/**
* Gets the argument, if any, specifying the cookie name.
*/
DataFlow::Node getNameArg() { result = range.getNameArg() }
/**
* Gets the argument, if any, specifying the cookie value.
*/
DataFlow::Node getValueArg() { result = range.getValueArg() }
}
/** Provides a class for modeling new cookie writes on HTTP responses. */
module CookieWrite {
/**
* A data-flow node that sets a cookie in an HTTP response.
*
* Note: we don't require that this redirect must be sent to a client (a kind of
* "if a tree falls in a forest and nobody hears it" situation).
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `HttpResponse` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument, if any, specifying the raw cookie header.
*/
abstract DataFlow::Node getHeaderArg();
/**
* Gets the argument, if any, specifying the cookie name.
*/
abstract DataFlow::Node getNameArg();
/**
* Gets the argument, if any, specifying the cookie value.
*/
abstract DataFlow::Node getValueArg();
}
}
}
}
@@ -568,7 +758,7 @@ module Cryptography {
/** Provides classes for modeling new key-pair generation APIs. */
module KeyGeneration {
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
private DataFlow::LocalSourceNode keysizeBacktracker(
private DataFlow::TypeTrackingNode keysizeBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and

View File

@@ -16,6 +16,7 @@ private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb

View File

@@ -7,6 +7,7 @@
*/
import python
import semmle.python.RegexTreeView
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()
@@ -53,6 +54,9 @@ private newtype TPrintAstNode =
not list = any(Module mod).getBody() and
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child)) and
exists(list.getAnItem())
} or
TRegExpTermNode(RegExpTerm term) {
exists(StrConst str | term.getRootTerm() = getParsedRegExp(str) and shouldPrint(str, _))
}
/**
@@ -419,6 +423,42 @@ class ParameterNode extends AstElementNode {
}
}
/**
* A print node for a `StrConst`.
*
* The string has a child, if the child is used as a regular expression,
* which is the root of the regular expression.
*/
class StrConstNode extends AstElementNode {
override StrConst element;
override PrintAstNode getChild(int childIndex) {
childIndex = 0 and result.(RegExpTermNode).getTerm() = getParsedRegExp(element)
}
}
/**
* A print node for a regular expression term.
*/
class RegExpTermNode extends TRegExpTermNode, PrintAstNode {
RegExpTerm term;
RegExpTermNode() { this = TRegExpTermNode(term) }
/** Gets the `RegExpTerm` for this node. */
RegExpTerm getTerm() { result = term }
override PrintAstNode getChild(int childIndex) {
result.(RegExpTermNode).getTerm() = term.getChild(childIndex)
}
override string toString() {
result = "[" + strictconcat(term.getPrimaryQLClass(), " | ") + "] " + term.toString()
}
override Location getLocation() { result = term.getLocation() }
}
/**
* Gets the `i`th child from `node` ordered by location.
*/
@@ -447,7 +487,7 @@ private module PrettyPrinting {
string getQlClass(AstNode a) {
shouldPrint(a, _) and
(
not exists(getQlCustomClass(a)) and result = a.toString()
not exists(getQlCustomClass(a)) and result = strictconcat(a.toString(), " | ")
or
result = strictconcat(getQlCustomClass(a), " | ")
)

View File

@@ -0,0 +1,973 @@
/** Provides a class hierarchy corresponding to a parse tree of regular expressions. */
import python
private import semmle.python.regex
/**
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
* or another regular expression term.
*/
newtype TRegExpParent =
/** A string literal used as a regular expression */
TRegExpLiteral(Regex re) or
/** A quantified term */
TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
/** A sequence term */
TRegExpSequence(Regex re, int start, int end) { re.sequence(start, end) } or
/** An alternatio term */
TRegExpAlt(Regex re, int start, int end) { re.alternation(start, end) } or
/** A character class term */
TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
/** A character range term */
TRegExpCharacterRange(Regex re, int start, int end) { re.charRange(_, start, _, _, end) } or
/** A group term */
TRegExpGroup(Regex re, int start, int end) { re.group(start, end) } or
/** A special character */
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
/** A normal character */
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
/** A back reference */
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
/**
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
* or another regular expression term.
*/
class RegExpParent extends TRegExpParent {
string toString() { result = "RegExpParent" }
/** Gets the `i`th child term. */
abstract RegExpTerm getChild(int i);
/** Gets a child term . */
RegExpTerm getAChild() { result = getChild(_) }
/** Gets the number of child terms. */
int getNumChild() { result = count(getAChild()) }
/** Gets the associated regex. */
abstract Regex getRegex();
}
/** A string literal used as a regular expression */
class RegExpLiteral extends TRegExpLiteral, RegExpParent {
Regex re;
RegExpLiteral() { this = TRegExpLiteral(re) }
override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
predicate isDotAll() { re.getAMode() = "DOTALL" }
override Regex getRegex() { result = re }
string getPrimaryQLClass() { result = "RegExpLiteral" }
}
/**
* A regular expression term, that is, a syntactic part of a regular expression.
*/
class RegExpTerm extends RegExpParent {
Regex re;
int start;
int end;
RegExpTerm() {
this = TRegExpAlt(re, start, end)
or
this = TRegExpBackRef(re, start, end)
or
this = TRegExpCharacterClass(re, start, end)
or
this = TRegExpCharacterRange(re, start, end)
or
this = TRegExpNormalChar(re, start, end)
or
this = TRegExpGroup(re, start, end)
or
this = TRegExpQuantifier(re, start, end)
or
this = TRegExpSequence(re, start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
or
this = TRegExpSpecialChar(re, start, end)
}
/**
* Gets the outermost term of this regular expression.
*/
RegExpTerm getRootTerm() {
this.isRootTerm() and result = this
or
result = getParent().(RegExpTerm).getRootTerm()
}
/**
* Holds if this term is part of a string literal
* that is interpreted as a regular expression.
*/
predicate isUsedAsRegExp() { any() }
/**
* Holds if this is the root term of a regular expression.
*/
predicate isRootTerm() { start = 0 and end = re.getText().length() }
override RegExpTerm getChild(int i) {
result = this.(RegExpAlt).getChild(i)
or
result = this.(RegExpBackRef).getChild(i)
or
result = this.(RegExpCharacterClass).getChild(i)
or
result = this.(RegExpCharacterRange).getChild(i)
or
result = this.(RegExpNormalChar).getChild(i)
or
result = this.(RegExpGroup).getChild(i)
or
result = this.(RegExpQuantifier).getChild(i)
or
result = this.(RegExpSequence).getChild(i)
or
result = this.(RegExpSpecialChar).getChild(i)
}
/**
* Gets the parent term of this regular expression term, or the
* regular expression literal if this is the root term.
*/
RegExpParent getParent() { result.getAChild() = this }
override Regex getRegex() { result = re }
/** Gets the offset at which this term starts. */
int getStart() { result = start }
/** Gets the offset at which this term ends. */
int getEnd() { result = end }
override string toString() { result = re.getText().substring(start, end) }
/**
* Gets the location of the surrounding regex, as locations inside the regex do not exist.
* To get location information corresponding to the term inside the regex,
* use `hasLocationInfo`.
*/
Location getLocation() { result = re.getLocation() }
/** Holds if this term is found at the specified location offsets. */
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(int re_start, int re_end |
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, re_end) and
startcolumn = re_start + start + 4 and
endcolumn = re_start + end + 3
)
}
/** Gets the file in which this term is found. */
File getFile() { result = this.getLocation().getFile() }
/** Gets the raw source text of this term. */
string getRawValue() { result = this.toString() }
/** Gets the string literal in which this term is found. */
RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
/** Gets the regular expression term that is matched (textually) before this one, if any. */
RegExpTerm getPredecessor() {
exists(RegExpTerm parent | parent = getParent() |
result = parent.(RegExpSequence).previousElement(this)
or
not exists(parent.(RegExpSequence).previousElement(this)) and
not parent instanceof RegExpSubPattern and
result = parent.getPredecessor()
)
}
/** Gets the regular expression term that is matched (textually) after this one, if any. */
RegExpTerm getSuccessor() {
exists(RegExpTerm parent | parent = getParent() |
result = parent.(RegExpSequence).nextElement(this)
or
not exists(parent.(RegExpSequence).nextElement(this)) and
not parent instanceof RegExpSubPattern and
result = parent.getSuccessor()
)
}
/** Gets the primary QL class for this term. */
string getPrimaryQLClass() { result = "RegExpTerm" }
}
/**
* A quantified regular expression term.
*
* Example:
*
* ```
* ((ECMA|Java)[sS]cript)*
* ```
*/
class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
int part_end;
boolean maybe_empty;
boolean may_repeat_forever;
RegExpQuantifier() {
this = TRegExpQuantifier(re, start, end) and
re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
result.getStart() = start and
result.getEnd() = part_end
}
predicate mayRepeatForever() { may_repeat_forever = true }
string getQualifier() { result = re.getText().substring(part_end, end) }
override string getPrimaryQLClass() { result = "RegExpQuantifier" }
}
/**
* A regular expression term that permits unlimited repetitions.
*/
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
}
/**
* A star-quantified term.
*
* Example:
*
* ```
* \w*
* ```
*/
class RegExpStar extends InfiniteRepetitionQuantifier {
RegExpStar() { this.getQualifier().charAt(0) = "*" }
override string getPrimaryQLClass() { result = "RegExpStar" }
}
/**
* A plus-quantified term.
*
* Example:
*
* ```
* \w+
* ```
*/
class RegExpPlus extends InfiniteRepetitionQuantifier {
RegExpPlus() { this.getQualifier().charAt(0) = "+" }
override string getPrimaryQLClass() { result = "RegExpPlus" }
}
/**
* An optional term.
*
* Example:
*
* ```
* ;?
* ```
*/
class RegExpOpt extends RegExpQuantifier {
RegExpOpt() { this.getQualifier().charAt(0) = "?" }
override string getPrimaryQLClass() { result = "RegExpOpt" }
}
/**
* A range-quantified term
*
* Examples:
*
* ```
* \w{2,4}
* \w{2,}
* \w{2}
* ```
*/
class RegExpRange extends RegExpQuantifier {
string upper;
string lower;
RegExpRange() { re.multiples(part_end, end, lower, upper) }
string getUpper() { result = upper }
string getLower() { result = lower }
/**
* Gets the upper bound of the range, if any.
*
* If there is no upper bound, any number of repetitions is allowed.
* For a term of the form `r{lo}`, both the lower and the upper bound
* are `lo`.
*/
int getUpperBound() { result = this.getUpper().toInt() }
/** Gets the lower bound of the range. */
int getLowerBound() { result = this.getLower().toInt() }
override string getPrimaryQLClass() { result = "RegExpRange" }
}
/**
* A sequence term.
*
* Example:
*
* ```
* (ECMA|Java)Script
* ```
*
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
*/
class RegExpSequence extends RegExpTerm, TRegExpSequence {
RegExpSequence() {
this = TRegExpSequence(re, start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
}
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
/** Gets the element preceding `element` in this sequence. */
RegExpTerm previousElement(RegExpTerm element) { element = nextElement(result) }
/** Gets the element following `element` in this sequence. */
RegExpTerm nextElement(RegExpTerm element) {
exists(int i |
element = this.getChild(i) and
result = this.getChild(i + 1)
)
}
override string getPrimaryQLClass() { result = "RegExpSequence" }
}
pragma[nomagic]
private int seqChildEnd(Regex re, int start, int end, int i) {
result = seqChild(re, start, end, i).getEnd()
}
// moved out so we can use it in the charpred
private RegExpTerm seqChild(Regex re, int start, int end, int i) {
re.sequence(start, end) and
(
i = 0 and
result.getRegex() = re and
result.getStart() = start and
exists(int itemEnd |
re.item(start, itemEnd) and
result.getEnd() = itemEnd
)
or
i > 0 and
result.getRegex() = re and
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
result.getStart() = itemStart and
re.item(itemStart, result.getEnd())
)
)
}
/**
* An alternative term, that is, a term of the form `a|b`.
*
* Example:
*
* ```
* ECMA|Java
* ```
*/
class RegExpAlt extends RegExpTerm, TRegExpAlt {
RegExpAlt() { this = TRegExpAlt(re, start, end) }
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
result.getStart() = start and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
result.getEnd() = part_end
)
or
i > 0 and
result.getRegex() = re and
exists(int part_start |
part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
|
result.getStart() = part_start and
re.alternationOption(start, end, part_start, result.getEnd())
)
}
override string getPrimaryQLClass() { result = "RegExpAlt" }
}
/**
* An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference.
*
* Example:
*
* ```
* \.
* \w
* ```
*/
class RegExpEscape extends RegExpNormalChar {
RegExpEscape() { re.escapedCharacter(start, end) }
/**
* Gets the name of the escaped; for example, `w` for `\w`.
* TODO: Handle named escapes.
*/
override string getValue() {
this.isIdentityEscape() and result = this.getUnescaped()
or
this.getUnescaped() = "n" and result = "\n"
or
this.getUnescaped() = "r" and result = "\r"
or
this.getUnescaped() = "t" and result = "\t"
or
// TODO: Find a way to include a formfeed character
// this.getUnescaped() = "f" and result = " "
// or
isUnicode() and
result = getUnicode()
}
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
override string getPrimaryQLClass() { result = "RegExpEscape" }
string getUnescaped() { result = this.getText().suffix(1) }
/**
* Gets the text for this escape. That is e.g. "\w".
*/
private string getText() { result = re.getText().substring(start, end) }
/**
* Holds if this is a unicode escape.
*/
private predicate isUnicode() { getText().prefix(2) = ["\\u", "\\U"] }
/**
* Gets the unicode char for this escape.
* E.g. for `\u0061` this returns "a".
*/
private string getUnicode() {
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
result = codepoint.toUnicode()
)
}
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
this.isUnicode() and
exists(string hex, string char | hex = this.getText().suffix(2) |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
}
}
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
hex = [0 .. 9].toString() and
result = hex.toInt()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/**
* A character class escape in a regular expression.
* That is, an escaped charachter that denotes multiple characters.
*
* Examples:
*
* ```
* \w
* \S
* ```
*/
class RegExpCharacterClassEscape extends RegExpEscape {
// string value;
RegExpCharacterClassEscape() {
// value = re.getText().substring(start + 1, end) and
// value in ["d", "D", "s", "S", "w", "W"]
this.getValue() in ["d", "D", "s", "S", "w", "W"]
}
/** Gets the name of the character class; for example, `w` for `\w`. */
// override string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
}
/**
* A character class in a regular expression.
*
* Examples:
*
* ```
* [a-z_]
* [^<>&]
* ```
*/
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
predicate isInverted() { re.getChar(start + 1) = "^" }
string getCharThing(int i) { result = re.getChar(i + start) }
predicate isUniversalClass() {
// [^]
isInverted() and not exists(getAChild())
or
// [\w\W] and similar
not isInverted() and
exists(string cce1, string cce2 |
cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and
cce2 = getAChild().(RegExpCharacterClassEscape).getValue()
|
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
exists(int itemStart, int itemEnd |
result.getStart() = itemStart and
re.char_set_start(start, itemStart) and
re.char_set_child(start, itemStart, itemEnd) and
result.getEnd() = itemEnd
)
or
i > 0 and
result.getRegex() = re and
exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
result.getStart() = itemStart and
re.char_set_child(start, itemStart, result.getEnd())
)
}
override string getPrimaryQLClass() { result = "RegExpCharacterClass" }
}
/**
* A character range in a character class in a regular expression.
*
* Example:
*
* ```
* a-z
* ```
*/
class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
int lower_end;
int upper_start;
RegExpCharacterRange() {
this = TRegExpCharacterRange(re, start, end) and
re.charRange(_, start, lower_end, upper_start, end)
}
predicate isRange(string lo, string hi) {
lo = re.getText().substring(start, lower_end) and
hi = re.getText().substring(upper_start, end)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
result.getStart() = start and
result.getEnd() = lower_end
or
i = 1 and
result.getRegex() = re and
result.getStart() = upper_start and
result.getEnd() = end
}
override string getPrimaryQLClass() { result = "RegExpCharacterRange" }
}
/**
* A normal character in a regular expression, that is, a character
* without special meaning. This includes escaped characters.
*
* Examples:
* ```
* t
* \t
* ```
*/
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
predicate isCharacter() { any() }
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpNormalChar" }
}
/**
* A constant regular expression term, that is, a regular expression
* term matching a single string. Currently, this will always be a single character.
*
* Example:
*
* ```
* a
* ```
*/
class RegExpConstant extends RegExpTerm {
string value;
RegExpConstant() {
this = TRegExpNormalChar(re, start, end) and
not this instanceof RegExpCharacterClassEscape and
// exclude chars in qualifiers
// TODO: push this into regex library
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
qstart <= start and end <= qend
) and
value = this.(RegExpNormalChar).getValue()
// This will never hold
// or
// this = TRegExpSpecialChar(re, start, end) and
// re.inCharSet(start) and
// value = this.(RegExpSpecialChar).getChar()
}
predicate isCharacter() { any() }
string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpConstant" }
}
/**
* A grouped regular expression.
*
* Examples:
*
* ```
* (ECMA|Java)
* (?:ECMA|Java)
* (?<quote>['"])
* ```
*/
class RegExpGroup extends RegExpTerm, TRegExpGroup {
RegExpGroup() { this = TRegExpGroup(re, start, end) }
/**
* Gets the index of this capture group within the enclosing regular
* expression literal.
*
* For example, in the regular expression `/((a?).)(?:b)/`, the
* group `((a?).)` has index 1, the group `(a?)` nested inside it
* has index 2, and the group `(?:b)` has no index, since it is
* not a capture group.
*/
int getNumber() { result = re.getGroupNumber(start, end) }
/** Holds if this is a named capture group. */
predicate isNamed() { exists(this.getName()) }
/** Gets the name of this capture group, if any. */
string getName() { result = re.getGroupName(start, end) }
predicate isCharacter() { any() }
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) {
result.getRegex() = re and
i = 0 and
re.groupContents(start, end, result.getStart(), result.getEnd())
}
override string getPrimaryQLClass() { result = "RegExpGroup" }
}
/**
* A special character in a regular expression.
*
* Examples:
* ```
* ^
* $
* .
* ```
*/
class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
string char;
RegExpSpecialChar() {
this = TRegExpSpecialChar(re, start, end) and
re.specialCharacter(start, end, char)
}
predicate isCharacter() { any() }
string getChar() { result = char }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpSpecialChar" }
}
/**
* A dot regular expression.
*
* Example:
*
* ```
* .
* ```
*/
class RegExpDot extends RegExpSpecialChar {
RegExpDot() { this.getChar() = "." }
override string getPrimaryQLClass() { result = "RegExpDot" }
}
/**
* A dollar assertion `$` matching the end of a line.
*
* Example:
*
* ```
* $
* ```
*/
class RegExpDollar extends RegExpSpecialChar {
RegExpDollar() { this.getChar() = "$" }
override string getPrimaryQLClass() { result = "RegExpDollar" }
}
/**
* A caret assertion `^` matching the beginning of a line.
*
* Example:
*
* ```
* ^
* ```
*/
class RegExpCaret extends RegExpSpecialChar {
RegExpCaret() { this.getChar() = "^" }
override string getPrimaryQLClass() { result = "RegExpCaret" }
}
/**
* A zero-width match, that is, either an empty group or an assertion.
*
* Examples:
* ```
* ()
* (?=\w)
* ```
*/
class RegExpZeroWidthMatch extends RegExpGroup {
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
override predicate isCharacter() { any() }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }
}
/**
* A zero-width lookahead or lookbehind assertion.
*
* Examples:
*
* ```
* (?=\w)
* (?!\n)
* (?<=\.)
* (?<!\\)
* ```
*/
class RegExpSubPattern extends RegExpZeroWidthMatch {
RegExpSubPattern() { not re.emptyGroup(start, end) }
/** Gets the lookahead term. */
RegExpTerm getOperand() {
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
result.getRegex() = re and
result.getStart() = in_start and
result.getEnd() = in_end
)
}
}
/**
* A zero-width lookahead assertion.
*
* Examples:
*
* ```
* (?=\w)
* (?!\n)
* ```
*/
abstract class RegExpLookahead extends RegExpSubPattern { }
/**
* A positive-lookahead assertion.
*
* Examples:
*
* ```
* (?=\w)
* ```
*/
class RegExpPositiveLookahead extends RegExpLookahead {
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
}
/**
* A negative-lookahead assertion.
*
* Examples:
*
* ```
* (?!\n)
* ```
*/
class RegExpNegativeLookahead extends RegExpLookahead {
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
}
/**
* A zero-width lookbehind assertion.
*
* Examples:
*
* ```
* (?<=\.)
* (?<!\\)
* ```
*/
abstract class RegExpLookbehind extends RegExpSubPattern { }
/**
* A positive-lookbehind assertion.
*
* Examples:
*
* ```
* (?<=\.)
* ```
*/
class RegExpPositiveLookbehind extends RegExpLookbehind {
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
}
/**
* A negative-lookbehind assertion.
*
* Examples:
*
* ```
* (?<!\\)
* ```
*/
class RegExpNegativeLookbehind extends RegExpLookbehind {
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
}
/**
* A back reference, that is, a term of the form `\i` or `\k<name>`
* in a regular expression.
*
* Examples:
*
* ```
* \1
* (?P=quote)
* ```
*/
class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
/**
* Gets the number of the capture group this back reference refers to, if any.
*/
int getNumber() { result = re.getBackrefNumber(start, end) }
/**
* Gets the name of the capture group this back reference refers to, if any.
*/
string getName() { result = re.getBackrefName(start, end) }
/** Gets the capture group this back reference refers to. */
RegExpGroup getGroup() {
result.getLiteral() = this.getLiteral() and
(
result.getNumber() = this.getNumber() or
result.getName() = this.getName()
)
}
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpBackRef" }
}
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }

View File

@@ -55,12 +55,12 @@ private module SensitiveDataModeling {
* Gets a reference to a function that is considered to be a sensitive source of
* `classification`.
*/
private DataFlow::LocalSourceNode sensitiveFunction(
private DataFlow::TypeTrackingNode sensitiveFunction(
DataFlow::TypeTracker t, SensitiveDataClassification classification
) {
t.start() and
exists(Function f |
nameIndicatesSensitiveData(f.getName(), classification) and
f.getName() = sensitiveString(classification) and
result.asExpr() = f.getDefinition()
)
or
@@ -76,28 +76,16 @@ private module SensitiveDataModeling {
}
/**
* Gets a reference to a string constant that, if used as the key in a lookup,
* indicates the presence of sensitive data with `classification`.
*/
private DataFlow::LocalSourceNode sensitiveLookupStringConst(
DataFlow::TypeTracker t, SensitiveDataClassification classification
) {
t.start() and
nameIndicatesSensitiveData(result.asExpr().(StrConst).getText(), classification)
or
exists(DataFlow::TypeTracker t2 |
result = sensitiveLookupStringConst(t2, classification).track(t2, t)
)
}
/**
* Gets a reference to a string constant that, if used as the key in a lookup,
* indicates the presence of sensitive data with `classification`.
*
* Also see `extraStepForCalls`.
* Gets a reference (in local scope) to a string constant that, if used as the key in
* a lookup, indicates the presence of sensitive data with `classification`.
*/
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
// Note: If this is implemented with type-tracking, we will get cross-talk as
// illustrated in python/ql/test/experimental/dataflow/sensitive-data/test.py
exists(DataFlow::LocalSourceNode source |
source.asExpr().(StrConst).getText() = sensitiveString(classification) and
source.flowsTo(result)
)
}
/** A function call that is considered a source of sensitive data. */
@@ -109,7 +97,7 @@ private module SensitiveDataModeling {
or
// to cover functions that we don't have the definition for, and where the
// reference to the function has not already been marked as being sensitive
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
this.getFunction().asCfgNode().(NameNode).getId() = sensitiveString(classification)
}
override SensitiveDataClassification getClassification() { result = classification }
@@ -118,8 +106,10 @@ private module SensitiveDataModeling {
/**
* Tracks any modeled source of sensitive data (with any classification),
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
*
* Also see `extraStepForCalls`.
*/
private DataFlow::LocalSourceNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
t.start() and
result instanceof SensitiveDataSource
or
@@ -129,6 +119,8 @@ private module SensitiveDataModeling {
/**
* Tracks any modeled source of sensitive data (with any classification),
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
*
* Also see `extraStepForCalls`.
*/
private DataFlow::Node possibleSensitiveCallable() {
possibleSensitiveCallable(DataFlow::TypeTracker::end()).flowsTo(result)
@@ -172,6 +164,68 @@ private module SensitiveDataModeling {
nodeFrom = possibleSensitiveCallable()
}
pragma[nomagic]
private string sensitiveStrConstCandidate() {
result = any(StrConst s | not s.isDocString()).getText() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveAttributeNameCandidate() {
result = any(DataFlow::AttrRead a).getAttributeName() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveParameterNameCandidate() {
result = any(Parameter p).getName() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveFunctionNameCandidate() {
result = any(Function f).getName() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveNameCandidate() {
result = any(Name n).getId() and
not result.regexpMatch(notSensitiveRegexp())
}
/**
* This helper predicate serves to deduplicate the results of the preceding predicates. This
* means that if, say, an attribute and a function parameter have the same name, then that name will
* only be matched once, which greatly cuts down on the number of regexp matches that have to be
* performed.
*
* Under normal circumstances, deduplication is only performed when a predicate is materialized, and
* so to see the effect of this we must create a separate predicate that calculates the union of the
* preceding predicates.
*/
pragma[nomagic]
private string sensitiveStringCandidate() {
result in [
sensitiveNameCandidate(), sensitiveAttributeNameCandidate(),
sensitiveParameterNameCandidate(), sensitiveFunctionNameCandidate(),
sensitiveStrConstCandidate()
]
}
/**
* Returns strings (primarily the names of various program entities) that may contain sensitive data
* with the classification `classification`.
*
* This helper predicate ends up being very similar to `nameIndicatesSensitiveData`,
* but is performance optimized to limit the number of regexp matches that have to be performed.
*/
pragma[nomagic]
private string sensitiveString(SensitiveDataClassification classification) {
result = sensitiveStringCandidate() and
result.regexpMatch(maybeSensitiveRegexp(classification))
}
/**
* Any kind of variable assignment (also including with/for) where the name indicates
* it contains sensitive data.
@@ -190,7 +244,7 @@ private module SensitiveDataModeling {
SensitiveVariableAssignment() {
exists(DefinitionNode def |
nameIndicatesSensitiveData(def.(NameNode).getId(), classification) and
def.(NameNode).getId() = sensitiveString(classification) and
(
this.asCfgNode() = def.getValue()
or
@@ -201,7 +255,7 @@ private module SensitiveDataModeling {
)
or
exists(With with |
nameIndicatesSensitiveData(with.getOptionalVars().(Name).getId(), classification) and
with.getOptionalVars().(Name).getId() = sensitiveString(classification) and
this.asExpr() = with.getContextExpr()
)
}
@@ -217,7 +271,7 @@ private module SensitiveDataModeling {
// Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
// I considered excluding any `from ... import something_sensitive`, but then realized that
// we should flag up `form ... import password as ...` as a password
nameIndicatesSensitiveData(this.(DataFlow::AttrRead).getAttributeName(), classification)
this.(DataFlow::AttrRead).getAttributeName() = sensitiveString(classification)
or
// Things like `getattr(foo, <reference-to-string>)`
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
@@ -254,9 +308,7 @@ private module SensitiveDataModeling {
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
SensitiveDataClassification classification;
SensitiveParameter() {
nameIndicatesSensitiveData(this.getParameter().getName(), classification)
}
SensitiveParameter() { this.getParameter().getName() = sensitiveString(classification) }
override SensitiveDataClassification getClassification() { result = classification }
}

View File

@@ -23,7 +23,7 @@ class OptionalAttributeName = Internal::OptionalContentName;
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```ql
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
* DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or

View File

@@ -17,6 +17,13 @@ abstract class AttrRef extends Node {
*/
abstract Node getObject();
/**
* Holds if this data flow node accesses attribute named `attrName` on object `object`.
*/
predicate accesses(Node object, string attrName) {
this.getObject() = object and this.getAttributeName() = attrName
}
/**
* Gets the expression node that defines the attribute being accessed, if any. This is
* usually an identifier or literal.

View File

@@ -943,13 +943,8 @@ private module Stage2 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
any()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1122,8 +1117,7 @@ private module Stage2 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -1393,9 +1387,9 @@ private module Stage2 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself
@@ -1615,13 +1609,8 @@ private module Stage3 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
any()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1816,8 +1805,7 @@ private module Stage3 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -2087,9 +2075,9 @@ private module Stage3 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself
@@ -2364,20 +2352,16 @@ private module Stage4 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
c = resolveCall(call, outercc) and
checkCallContextCall(outercc, call, c) and
if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
}
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) {
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
checkCallContextReturn(innercc, c, call) and
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
resolveReturn(innercc, inner, call)
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
localFlowEntry(node, config) and
@@ -2579,8 +2563,7 @@ private module Stage4 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -2850,9 +2833,9 @@ private module Stage4 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself

View File

@@ -943,13 +943,8 @@ private module Stage2 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
any()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1122,8 +1117,7 @@ private module Stage2 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -1393,9 +1387,9 @@ private module Stage2 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself
@@ -1615,13 +1609,8 @@ private module Stage3 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
any()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1816,8 +1805,7 @@ private module Stage3 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -2087,9 +2075,9 @@ private module Stage3 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself
@@ -2364,20 +2352,16 @@ private module Stage4 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
c = resolveCall(call, outercc) and
checkCallContextCall(outercc, call, c) and
if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
}
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) {
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
checkCallContextReturn(innercc, c, call) and
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
resolveReturn(innercc, inner, call)
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
localFlowEntry(node, config) and
@@ -2579,8 +2563,7 @@ private module Stage4 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -2850,9 +2833,9 @@ private module Stage4 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself

View File

@@ -943,13 +943,8 @@ private module Stage2 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
any()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1122,8 +1117,7 @@ private module Stage2 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -1393,9 +1387,9 @@ private module Stage2 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself
@@ -1615,13 +1609,8 @@ private module Stage3 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
any()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1816,8 +1805,7 @@ private module Stage3 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -2087,9 +2075,9 @@ private module Stage3 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself
@@ -2364,20 +2352,16 @@ private module Stage4 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
c = resolveCall(call, outercc) and
checkCallContextCall(outercc, call, c) and
if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
}
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) {
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
checkCallContextReturn(innercc, c, call) and
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
resolveReturn(innercc, inner, call)
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
localFlowEntry(node, config) and
@@ -2579,8 +2563,7 @@ private module Stage4 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -2850,9 +2833,9 @@ private module Stage4 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself

View File

@@ -943,13 +943,8 @@ private module Stage2 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
any()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1122,8 +1117,7 @@ private module Stage2 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -1393,9 +1387,9 @@ private module Stage2 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself
@@ -1615,13 +1609,8 @@ private module Stage3 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
any()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1816,8 +1805,7 @@ private module Stage3 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -2087,9 +2075,9 @@ private module Stage3 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself
@@ -2364,20 +2352,16 @@ private module Stage4 {
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
c = resolveCall(call, outercc) and
checkCallContextCall(outercc, call, c) and
if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
}
bindingset[call, c]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) {
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
checkCallContextReturn(innercc, c, call) and
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[innercc, inner, call]
private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
resolveReturn(innercc, inner, call)
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
localFlowEntry(node, config) and
@@ -2579,8 +2563,7 @@ private module Stage4 {
fwdFlow(ret, innercc, argAp, ap, config) and
flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
inner = ret.getEnclosingCallable() and
checkCallContextReturn(innercc, inner, call) and
ccOut = getCallContextReturn(inner, call)
ccOut = getCallContextReturn(inner, call, innercc)
|
ap instanceof ApNil or allowsFieldFlow = true
)
@@ -2850,9 +2833,9 @@ private module Stage4 {
exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
parameterFlow(p, ap, ap0, c, config) and
c = ret.getEnclosingCallable() and
revFlow(ret, true, apSome(_), ap0, config) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), pragma[only_bind_into](ap0),
revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
pragma[only_bind_into](config)) and
fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
kind = ret.getKind() and
p.getPosition() = pos and
// we don't expect a parameter to return stored in itself

View File

@@ -1117,6 +1117,44 @@ ReturnPosition getReturnPosition(ReturnNodeExt ret) {
result = getReturnPosition0(ret, ret.getKind())
}
/**
* Checks whether `inner` can return to `call` in the call context `innercc`.
* Assumes a context of `inner = viableCallableExt(call)`.
*/
bindingset[innercc, inner, call]
predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) {
innercc instanceof CallContextAny
or
exists(DataFlowCallable c0, DataFlowCall call0 |
callEnclosingCallable(call0, inner) and
innercc = TReturn(c0, call0) and
c0 = prunedViableImplInCallContextReverse(call0, call)
)
}
/**
* Checks whether `call` can resolve to `calltarget` in the call context `cc`.
* Assumes a context of `calltarget = viableCallableExt(call)`.
*/
bindingset[cc, call, calltarget]
predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) {
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
if reducedViableImplInCallContext(call, _, ctx)
then calltarget = prunedViableImplInCallContext(call, ctx)
else any()
)
or
cc instanceof CallContextSomeCall
or
cc instanceof CallContextAny
or
cc instanceof CallContextReturn
}
/**
* Resolves a return from `callable` in `cc` to `call`. This is equivalent to
* `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`.
*/
bindingset[cc, callable]
predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
cc instanceof CallContextAny and callable = viableCallableExt(call)
@@ -1128,6 +1166,10 @@ predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall
)
}
/**
* Resolves a call from `call` in `cc` to `result`. This is equivalent to
* `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`.
*/
bindingset[call, cc]
DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |

View File

@@ -33,15 +33,20 @@ private import DataFlowPrivate
class LocalSourceNode extends Node {
cached
LocalSourceNode() {
not simpleLocalFlowStep(_, this) and
// Currently, we create synthetic post-update nodes for
// - arguments to calls that may modify said argument
// - direct reads a writes of object attributes
// Both of these preserve the identity of the underlying pointer, and hence we exclude these as
// local source nodes.
// We do, however, allow the post-update nodes that arise from object creation (which are non-synthetic).
not this instanceof SyntheticPostUpdateNode
this instanceof ExprNode and
not simpleLocalFlowStep(_, this)
or
// We include all module variable nodes, as these act as stepping stones between writes and
// reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be
// unable to track across global variables.
//
// Once the `track` and `backtrack` methods have been fully deprecated, this disjunct can be
// removed, and the entire class can extend `ExprNode`. At that point, `TypeTrackingNode` should
// be used for type tracking instead of `LocalSourceNode`.
this instanceof ModuleVariableNode
or
// We explicitly include any read of a global variable, as some of these may have local flow going
// into them.
this = any(ModuleVariableNode mvn).getARead()
}
@@ -115,6 +120,53 @@ class LocalSourceNode extends Node {
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
}
/**
* A node that can be used for type tracking or type back-tracking.
*
* All steps made during type tracking should be between instances of this class.
*/
class TypeTrackingNode = LocalSourceNode;
/** Temporary holding ground for the `TypeTrackingNode` class. */
private module FutureWork {
class FutureTypeTrackingNode extends Node {
FutureTypeTrackingNode() {
this instanceof LocalSourceNode
or
this instanceof ModuleVariableNode
}
/**
* Holds if this node can flow to `nodeTo` in one or more local flow steps.
*
* For `ModuleVariableNode`s, the only "local" step is to the node itself.
* For `LocalSourceNode`s, this is the usual notion of local flow.
*/
pragma[inline]
predicate flowsTo(Node node) {
this instanceof ModuleVariableNode and this = node
or
this.(LocalSourceNode).flowsTo(node)
}
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
/**
* Gets a node that may flow into this one using one heap and/or interprocedural step.
*
* See `TypeBackTracker` for more details about how to use this.
*/
pragma[inline]
TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
}
}
cached
private module Cached {
/**
@@ -127,11 +179,21 @@ private module Cached {
source = sink
or
exists(Node second |
simpleLocalFlowStep(source, second) and
simpleLocalFlowStep*(second, sink)
localSourceFlowStep(source, second) and
localSourceFlowStep*(second, sink)
)
}
/**
* Helper predicate for `hasLocalSource`. Removes any steps go to module variable reads, as these
* are already local source nodes in their own right.
*/
cached
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo) and
not nodeTo = any(ModuleVariableNode v).getARead()
}
/**
* Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`.
*/

View File

@@ -1,7 +1,8 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
private import semmle.python.ApiGraphs
/**
* Holds if `node` should be a sanitizer in all global taint flow configurations
@@ -82,13 +83,13 @@ predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
*/
predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
// transforming something tainted into a string will make the string tainted
exists(CallNode call | call = nodeTo.getNode() |
call.getFunction().(NameNode).getId() in ["str", "bytes", "unicode"] and
exists(DataFlow::CallCfgNode call | call = nodeTo |
(
nodeFrom.getNode() = call.getArg(0)
call = API::builtin(["str", "bytes", "unicode"]).getACall()
or
nodeFrom.getNode() = call.getArgByName("object")
)
call.getFunction().asCfgNode().(NameNode).getId() in ["str", "bytes", "unicode"]
) and
nodeFrom in [call.getArg(0), call.getArgByName("object")]
)
or
// String methods. Note that this doesn't recognize `meth = "foo".upper; meth()`
@@ -155,39 +156,37 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
// construction by literal
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
storeStep(nodeFrom, _, nodeTo)
DataFlowPrivate::storeStep(nodeFrom, _, nodeTo)
or
// constructor call
exists(CallNode call | call = nodeTo.asCfgNode() |
call.getFunction().(NameNode).getId() in [
"list", "set", "frozenset", "dict", "defaultdict", "tuple"
] and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
call.getArg(0) = nodeFrom
// TODO: Properly handle defaultdict/namedtuple
)
or
// functions operating on collections
exists(CallNode call | call = nodeTo.asCfgNode() |
call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and
call.getArg(0) = nodeFrom
)
or
// methods
exists(CallNode call, string name | call = nodeTo.asCfgNode() |
name in [
exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
methodName in [
// general
"copy", "pop",
// dict
"values", "items", "get", "popitem"
] and
call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
call.calls(nodeFrom, methodName)
)
or
// list.append, set.add
exists(CallNode call, string name |
name in ["append", "add"] and
call.getFunction().(AttrNode).getObject(name) =
nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode().asCfgNode() and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::MethodCallNode call, DataFlow::Node obj |
call.calls(obj, ["append", "add"]) and
obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and
call.getArg(0) = nodeFrom
)
}
@@ -195,14 +194,9 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
*/
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(CallNode call | call = nodeTo.getNode() |
// Fully qualified: copy.copy, copy.deepcopy
(
call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
or
call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
) and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() and
call.getArg(0) = nodeFrom
)
}

View File

@@ -59,7 +59,7 @@ private module Cached {
* Steps contained in this predicate should _not_ depend on the call graph.
*/
cached
predicate stepNoCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
predicate stepNoCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepNoCall(mid, nodeTo, summary))
}
@@ -68,7 +68,7 @@ private module Cached {
* inter-procedural step from `nodeFrom` to `nodeTo`.
*/
cached
predicate stepCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
predicate stepCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
}
}
@@ -96,7 +96,7 @@ class StepSummary extends TStepSummary {
}
pragma[noinline]
private predicate smallstepNoCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
private predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
jumpStep(nodeFrom, nodeTo) and
summary = LevelStep()
or
@@ -109,7 +109,7 @@ private predicate smallstepNoCall(Node nodeFrom, LocalSourceNode nodeTo, StepSum
}
pragma[noinline]
private predicate smallstepCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
private predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
callStep(nodeFrom, nodeTo) and summary = CallStep()
or
returnStep(nodeFrom, nodeTo) and
@@ -129,7 +129,7 @@ module StepSummary {
* call graph.
*/
pragma[inline]
predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
predicate step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
stepNoCall(nodeFrom, nodeTo, summary)
or
stepCall(nodeFrom, nodeTo, summary)
@@ -143,7 +143,7 @@ module StepSummary {
* type-preserving steps.
*/
pragma[inline]
predicate smallstep(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
predicate smallstep(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
smallstepNoCall(nodeFrom, nodeTo, summary)
or
smallstepCall(nodeFrom, nodeTo, summary)
@@ -174,7 +174,7 @@ module StepSummary {
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
predicate localSourceStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
predicate localSourceStoreStep(Node nodeFrom, TypeTrackingNode nodeTo, string content) {
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
}
}
@@ -192,7 +192,7 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentNam
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```ql
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
* DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
@@ -275,7 +275,7 @@ class TypeTracker extends TTypeTracker {
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*/
pragma[inline]
TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
TypeTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
exists(StepSummary summary |
StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
result = this.append(pragma[only_bind_into](summary))
@@ -342,7 +342,7 @@ private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, Optional
* for back-tracking some callback type `myCallback`:
*
* ```ql
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
* DataFlow::TypeTrackingNode myCallback(DataFlow::TypeBackTracker t) {
* t.start() and
* result = (< some API call >).getArgument(< n >).getALocalSource()
* or
@@ -351,7 +351,7 @@ private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, Optional
* )
* }
*
* DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
* DataFlow::TypeTrackingNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
* ```
*
* Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
@@ -418,7 +418,7 @@ class TypeBackTracker extends TTypeBackTracker {
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
*/
pragma[inline]
TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
TypeBackTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
exists(StepSummary summary |
StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
this = result.prepend(pragma[only_bind_into](summary))
@@ -431,7 +431,7 @@ class TypeBackTracker extends TTypeBackTracker {
*
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
* in the flowgraph, and not just the edges between
* `LocalSourceNode`s. It may therefore be less performant.
* `TypeTrackingNode`s. It may therefore be less performant.
*
* Type tracking predicates using small steps typically take the following form:
* ```ql

View File

@@ -8,7 +8,7 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPr
class Node = DataFlowPublic::Node;
class LocalSourceNode = DataFlowPublic::LocalSourceNode;
class TypeTrackingNode = DataFlowPublic::TypeTrackingNode;
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;

View File

@@ -114,7 +114,7 @@ module AiohttpWebModel {
* Gets a reference to a class, that has been backtracked from the view-class handler
* argument `origin` (to a route-setup for view-classes).
*/
private DataFlow::LocalSourceNode viewClassBackTracker(
private DataFlow::TypeTrackingNode viewClassBackTracker(
DataFlow::TypeBackTracker t, DataFlow::Node origin
) {
t.start() and
@@ -284,7 +284,7 @@ module AiohttpWebModel {
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `aiohttp.web.Request`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -295,6 +295,36 @@ module AiohttpWebModel {
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the `aiohttp.web.Response` class
*
* See https://docs.aiohttp.org/en/stable/web_reference.html#response-classes
*/
module Response {
/**
* A source of instances of `aiohttp.web.Response`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use `Response::instance()` predicate to get
* references to instances of `aiohttp.web.Response`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `aiohttp.web.Response`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `aiohttp.web.Response`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the `aiohttp.StreamReader` class
*
@@ -314,7 +344,7 @@ module AiohttpWebModel {
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `aiohttp.StreamReader`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -488,35 +518,46 @@ module AiohttpWebModel {
* - https://docs.aiohttp.org/en/stable/web_quickstart.html#aiohttp-web-exceptions
*/
class AiohttpWebResponseInstantiation extends HTTP::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
Response::InstanceSource, DataFlow::CallCfgNode {
API::Node apiNode;
AiohttpWebResponseInstantiation() {
this = API::moduleImport("aiohttp").getMember("web").getMember("Response").getACall()
or
exists(string httpExceptionClassName |
httpExceptionClassName in [
"HTTPException", "HTTPSuccessful", "HTTPOk", "HTTPCreated", "HTTPAccepted",
"HTTPNonAuthoritativeInformation", "HTTPNoContent", "HTTPResetContent",
"HTTPPartialContent", "HTTPRedirection", "HTTPMultipleChoices", "HTTPMovedPermanently",
"HTTPFound", "HTTPSeeOther", "HTTPNotModified", "HTTPUseProxy", "HTTPTemporaryRedirect",
"HTTPPermanentRedirect", "HTTPError", "HTTPClientError", "HTTPBadRequest",
"HTTPUnauthorized", "HTTPPaymentRequired", "HTTPForbidden", "HTTPNotFound",
"HTTPMethodNotAllowed", "HTTPNotAcceptable", "HTTPProxyAuthenticationRequired",
"HTTPRequestTimeout", "HTTPConflict", "HTTPGone", "HTTPLengthRequired",
"HTTPPreconditionFailed", "HTTPRequestEntityTooLarge", "HTTPRequestURITooLong",
"HTTPUnsupportedMediaType", "HTTPRequestRangeNotSatisfiable", "HTTPExpectationFailed",
"HTTPMisdirectedRequest", "HTTPUnprocessableEntity", "HTTPFailedDependency",
"HTTPUpgradeRequired", "HTTPPreconditionRequired", "HTTPTooManyRequests",
"HTTPRequestHeaderFieldsTooLarge", "HTTPUnavailableForLegalReasons", "HTTPServerError",
"HTTPInternalServerError", "HTTPNotImplemented", "HTTPBadGateway",
"HTTPServiceUnavailable", "HTTPGatewayTimeout", "HTTPVersionNotSupported",
"HTTPVariantAlsoNegotiates", "HTTPInsufficientStorage", "HTTPNotExtended",
"HTTPNetworkAuthenticationRequired"
] and
this =
API::moduleImport("aiohttp").getMember("web").getMember(httpExceptionClassName).getACall()
this = apiNode.getACall() and
(
apiNode = API::moduleImport("aiohttp").getMember("web").getMember("Response")
or
exists(string httpExceptionClassName |
httpExceptionClassName in [
"HTTPException", "HTTPSuccessful", "HTTPOk", "HTTPCreated", "HTTPAccepted",
"HTTPNonAuthoritativeInformation", "HTTPNoContent", "HTTPResetContent",
"HTTPPartialContent", "HTTPRedirection", "HTTPMultipleChoices",
"HTTPMovedPermanently", "HTTPFound", "HTTPSeeOther", "HTTPNotModified",
"HTTPUseProxy", "HTTPTemporaryRedirect", "HTTPPermanentRedirect", "HTTPError",
"HTTPClientError", "HTTPBadRequest", "HTTPUnauthorized", "HTTPPaymentRequired",
"HTTPForbidden", "HTTPNotFound", "HTTPMethodNotAllowed", "HTTPNotAcceptable",
"HTTPProxyAuthenticationRequired", "HTTPRequestTimeout", "HTTPConflict", "HTTPGone",
"HTTPLengthRequired", "HTTPPreconditionFailed", "HTTPRequestEntityTooLarge",
"HTTPRequestURITooLong", "HTTPUnsupportedMediaType", "HTTPRequestRangeNotSatisfiable",
"HTTPExpectationFailed", "HTTPMisdirectedRequest", "HTTPUnprocessableEntity",
"HTTPFailedDependency", "HTTPUpgradeRequired", "HTTPPreconditionRequired",
"HTTPTooManyRequests", "HTTPRequestHeaderFieldsTooLarge",
"HTTPUnavailableForLegalReasons", "HTTPServerError", "HTTPInternalServerError",
"HTTPNotImplemented", "HTTPBadGateway", "HTTPServiceUnavailable",
"HTTPGatewayTimeout", "HTTPVersionNotSupported", "HTTPVariantAlsoNegotiates",
"HTTPInsufficientStorage", "HTTPNotExtended", "HTTPNetworkAuthenticationRequired"
] and
apiNode = API::moduleImport("aiohttp").getMember("web").getMember(httpExceptionClassName)
)
)
}
/**
* INTERNAL: Do not use.
*
* Get the internal `API::Node` that this is call of.
*/
API::Node getApiNode() { result = apiNode }
override DataFlow::Node getBody() {
result in [this.getArgByName("text"), this.getArgByName("body")]
}
@@ -534,6 +575,11 @@ module AiohttpWebModel {
}
}
/** Gets an HTTP response instance. */
private API::Node aiohttpResponseInstance() {
result = any(AiohttpWebResponseInstantiation call).getApiNode().getReturn()
}
/**
* An instantiation of aiohttp.web HTTP redirect exception.
*
@@ -559,4 +605,61 @@ module AiohttpWebModel {
result in [this.getArg(0), this.getArgByName("location")]
}
}
/**
* A call to `set_cookie` on a HTTP Response.
*/
class AiohttpResponseSetCookieCall extends HTTP::Server::CookieWrite::Range, DataFlow::CallCfgNode {
AiohttpResponseSetCookieCall() {
this = aiohttpResponseInstance().getMember("set_cookie").getACall()
}
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() { result in [this.getArg(0), this.getArgByName("name")] }
override DataFlow::Node getValueArg() { result in [this.getArg(1), this.getArgByName("value")] }
}
/**
* A call to `del_cookie` on a HTTP Response.
*/
class AiohttpResponseDelCookieCall extends HTTP::Server::CookieWrite::Range, DataFlow::CallCfgNode {
AiohttpResponseDelCookieCall() {
this = aiohttpResponseInstance().getMember("del_cookie").getACall()
}
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() { result in [this.getArg(0), this.getArgByName("name")] }
override DataFlow::Node getValueArg() { none() }
}
/**
* A dict-like write to an item of the `cookies` attribute on a HTTP response, such as
* `response.cookies[name] = value`.
*/
class AiohttpResponseCookieSubscriptWrite extends HTTP::Server::CookieWrite::Range {
DataFlow::Node index;
DataFlow::Node value;
AiohttpResponseCookieSubscriptWrite() {
exists(SubscriptNode subscript |
// To give `this` a value, we need to choose between either LHS or RHS,
// and just go with the LHS
this.asCfgNode() = subscript
|
subscript.getObject() = aiohttpResponseInstance().getMember("cookies").getAUse().asCfgNode() and
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
index.asCfgNode() = subscript.getIndex()
)
}
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() { result = index }
override DataFlow::Node getValueArg() { result = value }
}
}

View File

@@ -75,7 +75,7 @@ private module CryptographyModel {
}
/** Gets a reference to a predefined curve class with a specific key size (in bits), as well as the origin of the class. */
private DataFlow::LocalSourceNode curveClassWithKeySize(
private DataFlow::TypeTrackingNode curveClassWithKeySize(
DataFlow::TypeTracker t, int keySize, DataFlow::Node origin
) {
t.start() and
@@ -93,7 +93,7 @@ private module CryptographyModel {
}
/** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */
private DataFlow::LocalSourceNode curveClassInstanceWithKeySize(
private DataFlow::TypeTrackingNode curveClassInstanceWithKeySize(
DataFlow::TypeTracker t, int keySize, DataFlow::Node origin
) {
t.start() and
@@ -202,7 +202,7 @@ private module CryptographyModel {
}
/** Gets a reference to a Cipher instance using algorithm with `algorithmName`. */
DataFlow::LocalSourceNode cipherInstance(DataFlow::TypeTracker t, string algorithmName) {
DataFlow::TypeTrackingNode cipherInstance(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
exists(DataFlow::CallCfgNode call | result = call |
call =
@@ -226,7 +226,7 @@ private module CryptographyModel {
}
/** Gets a reference to the encryptor of a Cipher instance using algorithm with `algorithmName`. */
DataFlow::LocalSourceNode cipherEncryptor(DataFlow::TypeTracker t, string algorithmName) {
DataFlow::TypeTrackingNode cipherEncryptor(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
result.(DataFlow::MethodCallNode).calls(cipherInstance(algorithmName), "encryptor")
or
@@ -243,7 +243,7 @@ private module CryptographyModel {
}
/** Gets a reference to the dncryptor of a Cipher instance using algorithm with `algorithmName`. */
DataFlow::LocalSourceNode cipherDecryptor(DataFlow::TypeTracker t, string algorithmName) {
DataFlow::TypeTrackingNode cipherDecryptor(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
result.(DataFlow::MethodCallNode).calls(cipherInstance(algorithmName), "decryptor")
or
@@ -298,7 +298,7 @@ private module CryptographyModel {
}
/** Gets a reference to a Hash instance using algorithm with `algorithmName`. */
private DataFlow::LocalSourceNode hashInstance(DataFlow::TypeTracker t, string algorithmName) {
private DataFlow::TypeTrackingNode hashInstance(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
exists(DataFlow::CallCfgNode call | result = call |
call =

View File

@@ -401,7 +401,7 @@ private module PrivateDjango {
* Gets an instance of the `django.db.models.expressions.RawSQL` class,
* that was initiated with the SQL represented by `sql`.
*/
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t, DataFlow::Node sql) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, DataFlow::Node sql) {
t.start() and
exists(DataFlow::CallCfgNode c | result = c |
c = classRef().getACall() and
@@ -578,7 +578,7 @@ private module PrivateDjango {
abstract class InstanceSource extends DataFlow::Node { }
/** Gets a reference to an instance of `django.http.request.HttpRequest`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -643,7 +643,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.HttpResponse`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -709,7 +709,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.HttpResponseRedirect`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -771,7 +771,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.HttpResponsePermanentRedirect`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -824,7 +824,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.HttpResponseNotModified`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -878,7 +878,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.HttpResponseBadRequest`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -932,7 +932,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.HttpResponseNotFound`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -986,7 +986,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.HttpResponseForbidden`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -1041,7 +1041,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.HttpResponseNotAllowed`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -1095,7 +1095,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.HttpResponseGone`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -1149,7 +1149,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.HttpResponseServerError`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -1203,7 +1203,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.JsonResponse`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -1260,7 +1260,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.StreamingHttpResponse`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -1317,7 +1317,7 @@ private module PrivateDjango {
}
/** Gets a reference to an instance of `django.http.response.FileResponse`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -1329,7 +1329,7 @@ private module PrivateDjango {
}
/** Gets a reference to the `django.http.response.HttpResponse.write` function. */
private DataFlow::LocalSourceNode write(
private DataFlow::TypeTrackingNode write(
django::http::response::HttpResponse::InstanceSource instance, DataFlow::TypeTracker t
) {
t.startInAttr("write") and
@@ -1350,9 +1350,9 @@ private module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/ref/request-response/#django.http.HttpResponse.write
*/
class HttpResponseWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::CallCfgNode {
HTTP::Server::HttpResponse::Range instance;
django::http::response::HttpResponse::InstanceSource instance;
HttpResponseWriteCall() { node.getFunction() = write(instance).asCfgNode() }
HttpResponseWriteCall() { this.getFunction() = write(instance) }
override DataFlow::Node getBody() {
result in [this.getArg(0), this.getArgByName("content")]
@@ -1364,6 +1364,77 @@ private module PrivateDjango {
override string getMimetypeDefault() { result = instance.getMimetypeDefault() }
}
/**
* A call to `set_cookie` on a HTTP Response.
*/
class DjangoResponseSetCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DjangoResponseSetCookieCall() {
this.calls(django::http::response::HttpResponse::instance(), "set_cookie")
}
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() {
result in [this.getArg(0), this.getArgByName("key")]
}
override DataFlow::Node getValueArg() {
result in [this.getArg(1), this.getArgByName("value")]
}
}
/**
* A call to `delete_cookie` on a HTTP Response.
*/
class DjangoResponseDeleteCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DjangoResponseDeleteCookieCall() {
this.calls(django::http::response::HttpResponse::instance(), "delete_cookie")
}
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() {
result in [this.getArg(0), this.getArgByName("key")]
}
override DataFlow::Node getValueArg() { none() }
}
/**
* A dict-like write to an item of the `cookies` attribute on a HTTP response, such as
* `response.cookies[name] = value`.
*/
class DjangoResponseCookieSubscriptWrite extends HTTP::Server::CookieWrite::Range {
DataFlow::Node index;
DataFlow::Node value;
DjangoResponseCookieSubscriptWrite() {
exists(SubscriptNode subscript, DataFlow::AttrRead cookieLookup |
// To give `this` a value, we need to choose between either LHS or RHS,
// and just go with the LHS
this.asCfgNode() = subscript
|
cookieLookup.getAttributeName() = "cookies" and
cookieLookup.getObject() = django::http::response::HttpResponse::instance() and
exists(DataFlow::Node subscriptObj |
subscriptObj.asCfgNode() = subscript.getObject()
|
cookieLookup.flowsTo(subscriptObj)
) and
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
index.asCfgNode() = subscript.getIndex()
)
}
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() { result = index }
override DataFlow::Node getValueArg() { result = value }
}
}
}
@@ -1464,7 +1535,7 @@ private module PrivateDjango {
*/
class DjangoViewClassHelper extends Class {
/** Gets a reference to this class. */
private DataFlow::LocalSourceNode getARef(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode getARef(DataFlow::TypeTracker t) {
t.start() and
result.asExpr().(ClassExpr) = this.getParent()
or
@@ -1475,7 +1546,7 @@ private module PrivateDjango {
DataFlow::Node getARef() { this.getARef(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to the `as_view` classmethod of this class. */
private DataFlow::LocalSourceNode asViewRef(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode asViewRef(DataFlow::TypeTracker t) {
t.startInAttr("as_view") and
result = this.getARef()
or
@@ -1486,7 +1557,7 @@ private module PrivateDjango {
DataFlow::Node asViewRef() { this.asViewRef(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to the result of calling the `as_view` classmethod of this class. */
private DataFlow::LocalSourceNode asViewResult(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode asViewResult(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = this.asViewRef().asCfgNode()
or

View File

@@ -104,7 +104,7 @@ private module FabricV2 {
}
/** Gets a reference to an instance of `fabric.connection.Connection`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -123,7 +123,7 @@ private module FabricV2 {
* - https://docs.fabfile.org/en/2.5/api/connection.html#fabric.connection.Connection.sudo
* - https://docs.fabfile.org/en/2.5/api/connection.html#fabric.connection.Connection.local
*/
private DataFlow::LocalSourceNode instanceRunMethods(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instanceRunMethods(DataFlow::TypeTracker t) {
t.startInAttr(["run", "sudo", "local"]) and
result = instance()
or

View File

@@ -163,7 +163,7 @@ module Flask {
}
/** Gets a reference to an instance of `flask.Response`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -465,4 +465,39 @@ module Flask {
result = "text/html"
}
}
// ---------------------------------------------------------------------------
// flask.Response related
// ---------------------------------------------------------------------------
/**
* A call to `set_cookie` on a Flask HTTP Response.
*
* See https://flask.palletsprojects.com/en/2.0.x/api/#flask.Response.set_cookie
*/
class FlaskResponseSetCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
FlaskResponseSetCookieCall() { this.calls(Flask::Response::instance(), "set_cookie") }
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() { result in [this.getArg(0), this.getArgByName("key")] }
override DataFlow::Node getValueArg() { result in [this.getArg(1), this.getArgByName("value")] }
}
/**
* A call to `delete_cookie` on a Flask HTTP Response.
*
* See https://flask.palletsprojects.com/en/2.0.x/api/#flask.Response.delete_cookie
*/
class FlaskResponseDeleteCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
FlaskResponseDeleteCookieCall() { this.calls(Flask::Response::instance(), "delete_cookie") }
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() { result in [this.getArg(0), this.getArgByName("key")] }
override DataFlow::Node getValueArg() { none() }
}
}

View File

@@ -36,7 +36,7 @@ private module Invoke {
}
/** Gets a reference to an instance of `invoke.context.Context`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
(
result = invoke::context::Context::classRef().getACall()
@@ -54,7 +54,7 @@ private module Invoke {
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to the `run` or `sudo` methods on a `invoke.context.Context` instance. */
private DataFlow::LocalSourceNode instanceRunMethods(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instanceRunMethods(DataFlow::TypeTracker t) {
t.startInAttr(["run", "sudo"]) and
result = invoke::context::Context::instance()
or

View File

@@ -0,0 +1,151 @@
/**
* Provides classes modeling security-relevant aspects of the `MarkupSafe` PyPI package.
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `MarkupSafe` PyPI package.
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
*/
private module MarkupSafeModel {
/**
* Provides models for the `markupsafe.Markup` class
*
* See https://markupsafe.palletsprojects.com/en/2.0.x/escaping/#markupsafe.Markup.
*/
module Markup {
/** Gets a reference to the `markupsafe.Markup` class. */
API::Node classRef() {
result = API::moduleImport("markupsafe").getMember("Markup")
or
result = API::moduleImport("flask").getMember("Markup")
}
/**
* A source of instances of `markupsafe.Markup`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Markup::instance()` to get references to instances of `markupsafe.Markup`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `markupsafe.Markup`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
override CallNode node;
ClassInstantiation() { this = classRef().getACall() }
}
/** Gets a reference to an instance of `markupsafe.Markup`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `markupsafe.Markup`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** A string concatenation with a `markupsafe.Markup` involved. */
class StringConcat extends Markup::InstanceSource, DataFlow::CfgNode {
override BinaryExprNode node;
StringConcat() {
node.getOp() instanceof Add and
instance().asCfgNode() in [node.getLeft(), node.getRight()]
}
}
/** A string format with `markupsafe.Markup` as the format string. */
class StringFormat extends Markup::InstanceSource, DataFlow::MethodCallNode {
StringFormat() { this.calls(instance(), "format") }
}
/** A %-style string format with `markupsafe.Markup` as the format string. */
class PercentStringFormat extends Markup::InstanceSource, DataFlow::CfgNode {
override BinaryExprNode node;
PercentStringFormat() {
node.getOp() instanceof Mod and
instance().asCfgNode() = node.getLeft()
}
}
/** Taint propagation for `markupsafe.Markup`. */
class AddtionalTaintSteps extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(ClassInstantiation).getArg(0) = nodeFrom
}
}
}
/** Any escaping performed via the `markupsafe` package. */
abstract private class MarkupSafeEscape extends Escaping::Range {
override string getKind() {
// TODO: this package claims to escape for both HTML and XML, but for now we don't
// model XML.
result = Escaping::getHtmlKind()
}
}
/** A call to any of the escaping functions in `markupsafe` */
private class MarkupSafeEscapeCall extends Markup::InstanceSource, MarkupSafeEscape,
DataFlow::CallCfgNode {
MarkupSafeEscapeCall() {
this = API::moduleImport("markupsafe").getMember(["escape", "escape_silent"]).getACall()
or
this = Markup::classRef().getMember("escape").getACall()
or
this = API::moduleImport("flask").getMember("escape").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
}
/**
* An escape from string concatenation with a `markupsafe.Markup` involved.
*
* Only things that are not already a `markupsafe.Markup` instances will be escaped.
*/
private class MarkupEscapeFromStringConcat extends MarkupSafeEscape, Markup::StringConcat {
override DataFlow::Node getAnInput() {
result.asCfgNode() in [node.getLeft(), node.getRight()] and
not result = Markup::instance()
}
override DataFlow::Node getOutput() { result = this }
}
/** A escape from string format with `markupsafe.Markup` as the format string. */
private class MarkupEscapeFromStringFormat extends MarkupSafeEscape, Markup::StringFormat {
override DataFlow::Node getAnInput() {
result in [this.getArg(_), this.getArgByName(_)] and
not result = Markup::instance()
}
override DataFlow::Node getOutput() { result = this }
}
/** A escape from %-style string format with `markupsafe.Markup` as the format string. */
private class MarkupEscapeFromPercentStringFormat extends MarkupSafeEscape,
Markup::PercentStringFormat {
override DataFlow::Node getAnInput() {
result.asCfgNode() = node.getRight() and
not result = Markup::instance()
}
override DataFlow::Node getOutput() { result = this }
}
}

View File

@@ -48,7 +48,7 @@ module Multidict {
}
/** Gets a reference to an instance of a `MultiDictProxy` class. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or

View File

@@ -1,6 +1,6 @@
/**
* Provides classes modeling security-relevant aspects of the `MySQL-python` PyPI package
* (imported as `MySQLdb`).
* Provides classes modeling security-relevant aspects of the `MySQL-python` and `mysqlclient` PyPI packages
* (both imported as `MySQLdb`) -- the `mysqlclient` package is a fork of `MySQL-python`.
*
* See
* - https://mysqlclient.readthedocs.io/index.html
@@ -15,10 +15,13 @@ private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
/**
* Provides models for the `MySQL-python` PyPI package (imported as `MySQLdb`).
* Provides models for the `MySQL-python` and `mysqlclient` PyPI packages
* (both imported as `MySQLdb`) -- the `mysqlclient` package is a fork of `MySQL-python`.
*
* See
* - https://mysqlclient.readthedocs.io/index.html
* - https://pypi.org/project/MySQL-python/
* - https://pypi.org/project/mysqlclient/
*/
private module MySQLdb {
// ---------------------------------------------------------------------------

View File

@@ -383,23 +383,74 @@ private module Stdlib {
}
}
/** Gets a reference to the builtin `open` function */
private API::Node getOpenFunctionRef() {
result = API::builtin("open")
or
// io.open is a special case, since it is an alias for the builtin `open`
result = API::moduleImport("io").getMember("open")
}
/**
* A call to the builtin `open` function.
* See https://docs.python.org/3/library/functions.html#open
*/
private class OpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
OpenCall() {
this = API::builtin("open").getACall()
or
// io.open is a special case, since it is an alias for the builtin `open`
this = API::moduleImport("io").getMember("open").getACall()
}
OpenCall() { this = getOpenFunctionRef().getACall() }
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("file")]
}
}
/** Gets a reference to an open file. */
private DataFlow::TypeTrackingNode openFile(DataFlow::TypeTracker t, FileSystemAccess openCall) {
t.start() and
result = openCall and
(
openCall instanceof OpenCall
or
openCall instanceof PathLibOpenCall
)
or
exists(DataFlow::TypeTracker t2 | result = openFile(t2, openCall).track(t2, t))
}
/** Gets a reference to an open file. */
private DataFlow::Node openFile(FileSystemAccess openCall) {
openFile(DataFlow::TypeTracker::end(), openCall).flowsTo(result)
}
/** Gets a reference to the `write` or `writelines` method on an open file. */
private DataFlow::TypeTrackingNode writeMethodOnOpenFile(
DataFlow::TypeTracker t, FileSystemAccess openCall
) {
t.startInAttr(["write", "writelines"]) and
result = openFile(openCall)
or
exists(DataFlow::TypeTracker t2 | result = writeMethodOnOpenFile(t2, openCall).track(t2, t))
}
/** Gets a reference to the `write` or `writelines` method on an open file. */
private DataFlow::Node writeMethodOnOpenFile(FileSystemAccess openCall) {
writeMethodOnOpenFile(DataFlow::TypeTracker::end(), openCall).flowsTo(result)
}
/** A call to the `write` or `writelines` method on an opened file, such as `open("foo", "w").write(...)`. */
private class WriteCallOnOpenFile extends FileSystemWriteAccess::Range, DataFlow::CallCfgNode {
FileSystemAccess openCall;
WriteCallOnOpenFile() { this.getFunction() = writeMethodOnOpenFile(openCall) }
override DataFlow::Node getAPathArgument() {
// best effort attempt to give the path argument, that was initially given to the
// `open` call.
result = openCall.getAPathArgument()
}
override DataFlow::Node getADataNode() { result in [this.getArg(0), this.getArgByName("data")] }
}
/**
* An exec statement (only Python 2).
* See https://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
@@ -620,7 +671,7 @@ private module Stdlib {
API::Node getlistResult() { result = getlistRef().getReturn() }
/** Gets a reference to a list of fields. */
private DataFlow::LocalSourceNode fieldList(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode fieldList(DataFlow::TypeTracker t) {
t.start() and
// TODO: Should have better handling of subscripting
result.asCfgNode().(SubscriptNode).getObject() = instance().getAUse().asCfgNode()
@@ -636,7 +687,7 @@ private module Stdlib {
}
/** Gets a reference to a field. */
private DataFlow::LocalSourceNode field(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode field(DataFlow::TypeTracker t) {
t.start() and
// TODO: Should have better handling of subscripting
result.asCfgNode().(SubscriptNode).getObject() =
@@ -832,7 +883,7 @@ private module Stdlib {
}
/** Gets a reference to an instance of the `BaseHTTPRequestHandler` class or any subclass. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -958,7 +1009,7 @@ private module Stdlib {
* Gets a reference to a `pathlib.Path` object.
* This type tracker makes the monomorphic API use assumption.
*/
private DataFlow::LocalSourceNode pathlibPath(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode pathlibPath(DataFlow::TypeTracker t) {
// Type construction
t.start() and
result = pathlib().getMember(pathlibPathConstructor()).getACall()
@@ -1001,11 +1052,14 @@ private module Stdlib {
/** Gets a reference to a `pathlib.Path` object. */
DataFlow::LocalSourceNode pathlibPath() { result = pathlibPath(DataFlow::TypeTracker::end()) }
/** A file system access from a `pathlib.Path` method call. */
private class PathlibFileAccess extends FileSystemAccess::Range, DataFlow::CallCfgNode {
DataFlow::AttrRead fileAccess;
string attrbuteName;
PathlibFileAccess() {
fileAccess.getAttributeName() in [
attrbuteName = fileAccess.getAttributeName() and
attrbuteName in [
"stat", "chmod", "exists", "expanduser", "glob", "group", "is_dir", "is_file", "is_mount",
"is_symlink", "is_socket", "is_fifo", "is_block_device", "is_char_device", "iter_dir",
"lchmod", "lstat", "mkdir", "open", "owner", "read_bytes", "read_text", "readlink",
@@ -1019,6 +1073,18 @@ private module Stdlib {
override DataFlow::Node getAPathArgument() { result = fileAccess.getObject() }
}
/** A file system write from a `pathlib.Path` method call. */
private class PathlibFileWrites extends PathlibFileAccess, FileSystemWriteAccess::Range {
PathlibFileWrites() { attrbuteName in ["write_bytes", "write_text"] }
override DataFlow::Node getADataNode() { result in [this.getArg(0), this.getArgByName("data")] }
}
/** A call to the `open` method on a `pathlib.Path` instance. */
private class PathLibOpenCall extends PathlibFileAccess {
PathLibOpenCall() { attrbuteName = "open" }
}
/** An additional taint steps for objects of type `pathlib.Path` */
private class PathlibPathTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
@@ -1076,118 +1142,179 @@ private module Stdlib {
)
}
}
}
// ---------------------------------------------------------------------------
// hashlib
// ---------------------------------------------------------------------------
/** Gets a call to `hashlib.new` with `algorithmName` as the first argument. */
private DataFlow::CallCfgNode hashlibNewCall(string algorithmName) {
exists(DataFlow::Node nameArg |
result = API::moduleImport("hashlib").getMember("new").getACall() and
nameArg in [result.getArg(0), result.getArgByName("name")] and
exists(StrConst str |
nameArg.getALocalSource() = DataFlow::exprNode(str) and
algorithmName = str.getText()
)
)
}
/** Gets a reference to the result of calling `hashlib.new` with `algorithmName` as the first argument. */
private DataFlow::LocalSourceNode hashlibNewResult(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
result = hashlibNewCall(algorithmName)
or
exists(DataFlow::TypeTracker t2 | result = hashlibNewResult(t2, algorithmName).track(t2, t))
}
/** Gets a reference to the result of calling `hashlib.new` with `algorithmName` as the first argument. */
DataFlow::Node hashlibNewResult(string algorithmName) {
hashlibNewResult(DataFlow::TypeTracker::end(), algorithmName).flowsTo(result)
}
/**
* A hashing operation by supplying initial data when calling the `hashlib.new` function.
*/
class HashlibNewCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
string hashName;
HashlibNewCall() {
this = hashlibNewCall(hashName) and
exists([this.getArg(1), this.getArgByName("data")])
}
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
override DataFlow::Node getAnInput() { result in [this.getArg(1), this.getArgByName("data")] }
}
/**
* A hashing operation by using the `update` method on the result of calling the `hashlib.new` function.
*/
class HashlibNewUpdateCall extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
string hashName;
HashlibNewUpdateCall() {
exists(DataFlow::AttrRead attr |
attr.getObject() = hashlibNewResult(hashName) and
this.getFunction() = attr and
attr.getAttributeName() = "update"
// ---------------------------------------------------------------------------
// hashlib
// ---------------------------------------------------------------------------
/** Gets a call to `hashlib.new` with `algorithmName` as the first argument. */
private DataFlow::CallCfgNode hashlibNewCall(string algorithmName) {
exists(DataFlow::Node nameArg |
result = API::moduleImport("hashlib").getMember("new").getACall() and
nameArg in [result.getArg(0), result.getArgByName("name")] and
exists(StrConst str |
nameArg.getALocalSource() = DataFlow::exprNode(str) and
algorithmName = str.getText()
)
)
}
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* A hashing operation from the `hashlib` package using one of the predefined classes
* (such as `hashlib.md5`). `hashlib.new` is not included, since it is handled by
* `HashlibNewCall` and `HashlibNewUpdateCall`.
*/
abstract class HashlibGenericHashOperation extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
string hashName;
API::Node hashClass;
bindingset[this]
HashlibGenericHashOperation() {
not hashName = "new" and
hashClass = API::moduleImport("hashlib").getMember(hashName)
}
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
}
/**
* A hashing operation from the `hashlib` package using one of the predefined classes
* (such as `hashlib.md5`), by calling its' `update` mehtod.
*/
class HashlibHashClassUpdateCall extends HashlibGenericHashOperation {
HashlibHashClassUpdateCall() { this = hashClass.getReturn().getMember("update").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* A hashing operation from the `hashlib` package using one of the predefined classes
* (such as `hashlib.md5`), by passing data to when instantiating the class.
*/
class HashlibDataPassedToHashClass extends HashlibGenericHashOperation {
HashlibDataPassedToHashClass() {
// we only want to model calls to classes such as `hashlib.md5()` if initial data
// is passed as an argument
this = hashClass.getACall() and
exists([this.getArg(0), this.getArgByName("string")])
}
override DataFlow::Node getAnInput() {
result = this.getArg(0)
/** Gets a reference to the result of calling `hashlib.new` with `algorithmName` as the first argument. */
private DataFlow::TypeTrackingNode hashlibNewResult(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
result = hashlibNewCall(algorithmName)
or
// in Python 3.9, you are allowed to use `hashlib.md5(string=<bytes-like>)`.
result = this.getArgByName("string")
exists(DataFlow::TypeTracker t2 | result = hashlibNewResult(t2, algorithmName).track(t2, t))
}
/** Gets a reference to the result of calling `hashlib.new` with `algorithmName` as the first argument. */
DataFlow::Node hashlibNewResult(string algorithmName) {
hashlibNewResult(DataFlow::TypeTracker::end(), algorithmName).flowsTo(result)
}
/**
* A hashing operation by supplying initial data when calling the `hashlib.new` function.
*/
class HashlibNewCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
string hashName;
HashlibNewCall() {
this = hashlibNewCall(hashName) and
exists([this.getArg(1), this.getArgByName("data")])
}
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
override DataFlow::Node getAnInput() { result in [this.getArg(1), this.getArgByName("data")] }
}
/**
* A hashing operation by using the `update` method on the result of calling the `hashlib.new` function.
*/
class HashlibNewUpdateCall extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
string hashName;
HashlibNewUpdateCall() {
exists(DataFlow::AttrRead attr |
attr.getObject() = hashlibNewResult(hashName) and
this.getFunction() = attr and
attr.getAttributeName() = "update"
)
}
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/** Helper predicate for the `HashLibGenericHashOperation` charpred, to prevent a bad join order. */
pragma[nomagic]
private API::Node hashlibMember(string hashName) {
result = API::moduleImport("hashlib").getMember(hashName) and
hashName != "new"
}
/**
* A hashing operation from the `hashlib` package using one of the predefined classes
* (such as `hashlib.md5`). `hashlib.new` is not included, since it is handled by
* `HashlibNewCall` and `HashlibNewUpdateCall`.
*/
abstract class HashlibGenericHashOperation extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
string hashName;
API::Node hashClass;
bindingset[this]
HashlibGenericHashOperation() { hashClass = hashlibMember(hashName) }
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
}
/**
* A hashing operation from the `hashlib` package using one of the predefined classes
* (such as `hashlib.md5`), by calling its' `update` mehtod.
*/
class HashlibHashClassUpdateCall extends HashlibGenericHashOperation {
HashlibHashClassUpdateCall() { this = hashClass.getReturn().getMember("update").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* A hashing operation from the `hashlib` package using one of the predefined classes
* (such as `hashlib.md5`), by passing data to when instantiating the class.
*/
class HashlibDataPassedToHashClass extends HashlibGenericHashOperation {
HashlibDataPassedToHashClass() {
// we only want to model calls to classes such as `hashlib.md5()` if initial data
// is passed as an argument
this = hashClass.getACall() and
exists([this.getArg(0), this.getArgByName("string")])
}
override DataFlow::Node getAnInput() {
result = this.getArg(0)
or
// in Python 3.9, you are allowed to use `hashlib.md5(string=<bytes-like>)`.
result = this.getArgByName("string")
}
}
// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
/**
* Provides models for the `logging.Logger` class and subclasses.
*
* See https://docs.python.org/3.9/library/logging.html#logging.Logger.
*/
module Logger {
/** Gets a reference to the `logging.Logger` class or any subclass. */
API::Node subclassRef() {
result = API::moduleImport("logging").getMember("Logger").getASubclass*()
}
/** Gets a reference to an instance of `logging.Logger` or any subclass. */
API::Node instance() {
result = subclassRef().getReturn()
or
result = API::moduleImport("logging").getMember("root")
or
result = API::moduleImport("logging").getMember("getLogger").getReturn()
}
}
/**
* A call to one of the logging methods from `logging` or on a `logging.Logger`
* subclass.
*
* See:
* - https://docs.python.org/3.9/library/logging.html#logging.debug
* - https://docs.python.org/3.9/library/logging.html#logging.Logger.debug
*/
class LoggerLogCall extends Logging::Range, DataFlow::CallCfgNode {
/** The argument-index where the message is passed. */
int msgIndex;
LoggerLogCall() {
exists(string method |
method in ["critical", "fatal", "error", "warning", "warn", "info", "debug", "exception"] and
msgIndex = 0
or
method = "log" and
msgIndex = 1
|
this = Logger::instance().getMember(method).getACall()
or
this = API::moduleImport("logging").getMember(method).getACall()
)
}
override DataFlow::Node getAnInput() {
result = this.getArgByName("msg")
or
result = this.getArg(any(int i | i >= msgIndex))
}
}
}

View File

@@ -54,7 +54,7 @@ private module Tornado {
}
/** Gets a reference to this class. */
private DataFlow::LocalSourceNode getARef(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode getARef(DataFlow::TypeTracker t) {
t.start() and
result.asExpr().(ClassExpr) = this.getParent()
or
@@ -87,7 +87,7 @@ private module Tornado {
}
/** Gets a reference to an instance of the `tornado.web.RequestHandler` class or any subclass. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -98,7 +98,7 @@ private module Tornado {
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to one of the methods `get_argument`, `get_body_argument`, `get_query_argument`. */
private DataFlow::LocalSourceNode argumentMethod(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode argumentMethod(DataFlow::TypeTracker t) {
t.startInAttr(["get_argument", "get_body_argument", "get_query_argument"]) and
result = instance()
or
@@ -111,7 +111,7 @@ private module Tornado {
}
/** Gets a reference to one of the methods `get_arguments`, `get_body_arguments`, `get_query_arguments`. */
private DataFlow::LocalSourceNode argumentsMethod(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode argumentsMethod(DataFlow::TypeTracker t) {
t.startInAttr(["get_arguments", "get_body_arguments", "get_query_arguments"]) and
result = instance()
or
@@ -124,7 +124,7 @@ private module Tornado {
}
/** Gets a reference the `redirect` method. */
private DataFlow::LocalSourceNode redirectMethod(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode redirectMethod(DataFlow::TypeTracker t) {
t.startInAttr("redirect") and
result = instance()
or
@@ -137,7 +137,7 @@ private module Tornado {
}
/** Gets a reference to the `write` method. */
private DataFlow::LocalSourceNode writeMethod(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode writeMethod(DataFlow::TypeTracker t) {
t.startInAttr("write") and
result = instance()
or
@@ -207,7 +207,7 @@ private module Tornado {
}
/** Gets a reference to an instance of `tornado.web.Application`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -218,7 +218,7 @@ private module Tornado {
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to the `add_handlers` method. */
private DataFlow::LocalSourceNode add_handlers(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode add_handlers(DataFlow::TypeTracker t) {
t.startInAttr("add_handlers") and
result = instance()
or
@@ -264,7 +264,7 @@ private module Tornado {
}
/** Gets a reference to an instance of `tornado.httputil.HttpServerRequest`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -275,7 +275,7 @@ private module Tornado {
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to the `full_url` method. */
private DataFlow::LocalSourceNode full_url(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode full_url(DataFlow::TypeTracker t) {
t.startInAttr("full_url") and
result = instance()
or
@@ -422,7 +422,7 @@ private module Tornado {
/**
* A call to the `tornado.web.RequestHandler.redirect` method.
*
* See https://www.tornadoweb.org/en/stable/web.html?highlight=write#tornado.web.RequestHandler.redirect
* See https://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.redirect
*/
private class TornadoRequestHandlerRedirectCall extends HTTP::Server::HttpRedirectResponse::Range,
DataFlow::CallCfgNode {
@@ -444,7 +444,7 @@ private module Tornado {
/**
* A call to the `tornado.web.RequestHandler.write` method.
*
* See https://www.tornadoweb.org/en/stable/web.html?highlight=write#tornado.web.RequestHandler.write
* See https://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.write
*/
private class TornadoRequestHandlerWriteCall extends HTTP::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
@@ -458,4 +458,22 @@ private module Tornado {
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
}
/**
* A call to the `tornado.web.RequestHandler.set_cookie` method.
*
* See https://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.set_cookie
*/
class TornadoRequestHandlerSetCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
TornadoRequestHandlerSetCookieCall() {
this.calls(tornado::web::RequestHandler::instance(), "set_cookie")
}
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() { result in [this.getArg(0), this.getArgByName("name")] }
override DataFlow::Node getValueArg() { result in [this.getArg(1), this.getArgByName("value")] }
}
}

View File

@@ -101,7 +101,7 @@ private module Twisted {
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `twisted.web.server.Request`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
@@ -247,4 +247,42 @@ private module Twisted {
override string getMimetypeDefault() { result = "text/html" }
}
/**
* A call to the `addCookie` function on a twisted request.
*
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#addCookie
*/
class TwistedRequestAddCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
TwistedRequestAddCookieCall() { this.calls(Twisted::Request::instance(), "addCookie") }
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() { result in [this.getArg(0), this.getArgByName("k")] }
override DataFlow::Node getValueArg() { result in [this.getArg(1), this.getArgByName("v")] }
}
/**
* A call to `append` on the `cookies` attribute of a twisted request.
*
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#cookies
*/
class TwistedRequestCookiesAppendCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
TwistedRequestCookiesAppendCall() {
exists(DataFlow::AttrRead cookiesLookup |
cookiesLookup.getObject() = Twisted::Request::instance() and
cookiesLookup.getAttributeName() = "cookies" and
this.calls(cookiesLookup, "append")
)
}
override DataFlow::Node getHeaderArg() { result = this.getArg(0) }
override DataFlow::Node getNameArg() { none() }
override DataFlow::Node getValueArg() { none() }
}
}

View File

@@ -40,7 +40,7 @@ module Yarl {
}
/** Gets a reference to an instance of `yarl.URL`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or

View File

@@ -44,7 +44,7 @@ private Expr lastDecoratorCall(Function func) {
* print(inst.my_method)
* ```
*/
private DataFlow::LocalSourceNode poorMansFunctionTracker(DataFlow::TypeTracker t, Function func) {
private DataFlow::TypeTrackingNode poorMansFunctionTracker(DataFlow::TypeTracker t, Function func) {
t.start() and
(
not exists(func.getADecorator()) and

View File

@@ -20,7 +20,7 @@ abstract class SelfRefMixin extends Class {
* Note: TODO: This doesn't take MRO into account
* Note: TODO: This doesn't take staticmethod/classmethod into account
*/
private DataFlow::LocalSourceNode getASelfRef(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode getASelfRef(DataFlow::TypeTracker t) {
t.start() and
result.(DataFlow::ParameterNode).getParameter() = this.getAMethod().getArg(0)
or

View File

@@ -71,7 +71,7 @@ private string canonical_name(API::Node flag) {
* A type tracker for regular expression flag names. Holds if the result is a node that may refer
* to the `re` flag with the canonical name `flag_name`
*/
private DataFlow::LocalSourceNode re_flag_tracker(string flag_name, DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode re_flag_tracker(string flag_name, DataFlow::TypeTracker t) {
t.start() and
exists(API::Node flag | flag_name = canonical_name(flag) and result = flag.getAUse())
or
@@ -121,8 +121,82 @@ deprecated string mode_from_mode_object(Value obj) {
abstract class RegexString extends Expr {
RegexString() { (this instanceof Bytes or this instanceof Unicode) }
/**
* Helper predicate for `char_set_start(int start, int end)`.
*
* In order to identify left brackets ('[') which actually start a character class,
* we perform a left to right scan of the string.
*
* To avoid negative recursion we return a boolean. See `escaping`,
* the helper for `escapingChar`, for a clean use of this pattern.
*
* result is true for those start chars that actually mark a start of a char set.
*/
boolean char_set_start(int pos) {
exists(int index |
// is opening bracket
this.char_set_delimiter(index, pos) = true and
(
// if this is the first bracket, `pos` starts a char set
index = 1 and result = true
or
// if the previous char set delimiter was not a closing bracket, `pos` does
// not start a char set. This is needed to handle cases such as `[[]` (a
// char set that matches the `[` char)
index > 1 and
not this.char_set_delimiter(index - 1, _) = false and
result = false
or
// special handling of cases such as `[][]` (the character-set of the characters `]` and `[`).
exists(int prev_closing_bracket_pos |
// previous bracket is a closing bracket
this.char_set_delimiter(index - 1, prev_closing_bracket_pos) = false and
if
// check if the character that comes before the previous closing bracket
// is an opening bracket (taking `^` into account)
exists(int pos_before_prev_closing_bracket |
if this.getChar(prev_closing_bracket_pos - 1) = "^"
then pos_before_prev_closing_bracket = prev_closing_bracket_pos - 2
else pos_before_prev_closing_bracket = prev_closing_bracket_pos - 1
|
this.char_set_delimiter(index - 2, pos_before_prev_closing_bracket) = true
)
then
// brackets without anything in between is not valid character ranges, so
// the first closing bracket in `[]]` and `[^]]` does not count,
//
// and we should _not_ mark the second opening bracket in `[][]` and `[^][]`
// as starting a new char set. ^ ^
exists(int pos_before_prev_closing_bracket |
this.char_set_delimiter(index - 2, pos_before_prev_closing_bracket) = true
|
result = this.char_set_start(pos_before_prev_closing_bracket).booleanNot()
)
else
// if not, `pos` does in fact mark a real start of a character range
result = true
)
)
)
}
/**
* Helper predicate for chars that could be character-set delimiters.
* Holds if the (non-escaped) char at `pos` in the string, is the (one-based) `index` occurrence of a bracket (`[` or `]`) in the string.
* Result if `true` is the char is `[`, and `false` if the char is `]`.
*/
boolean char_set_delimiter(int index, int pos) {
pos = rank[index](int p | this.nonEscapedCharAt(p) = "[" or this.nonEscapedCharAt(p) = "]") and
(
this.nonEscapedCharAt(pos) = "[" and result = true
or
this.nonEscapedCharAt(pos) = "]" and result = false
)
}
/** Hold is a character set starts between `start` and `end`. */
predicate char_set_start(int start, int end) {
this.nonEscapedCharAt(start) = "[" and
this.char_set_start(start) = true and
(
this.getChar(start + 1) = "^" and end = start + 2
or
@@ -143,8 +217,99 @@ abstract class RegexString extends Expr {
)
}
/** An indexed version of `char_set_token/3` */
private predicate char_set_token(int charset_start, int index, int token_start, int token_end) {
token_start =
rank[index](int start, int end | this.char_set_token(charset_start, start, end) | start) and
this.char_set_token(charset_start, token_start, token_end)
}
/** Either a char or a - */
private predicate char_set_token(int charset_start, int start, int end) {
this.char_set_start(charset_start, start) and
(
this.escapedCharacter(start, end)
or
exists(this.nonEscapedCharAt(start)) and end = start + 1
)
or
this.char_set_token(charset_start, _, start) and
(
this.escapedCharacter(start, end)
or
exists(this.nonEscapedCharAt(start)) and
end = start + 1 and
not this.getChar(start) = "]"
)
}
/**
* Holds if the character set starting at `charset_start` contains either
* a character or a range found between `start` and `end`.
*/
predicate char_set_child(int charset_start, int start, int end) {
this.char_set_token(charset_start, start, end) and
not exists(int range_start, int range_end |
this.charRange(charset_start, range_start, _, _, range_end) and
range_start <= start and
range_end >= end
)
or
this.charRange(charset_start, start, _, _, end)
}
/**
* Holds if the character set starting at `charset_start` contains a character range
* with lower bound found between `start` and `lower_end`
* and upper bound found between `upper_start` and `end`.
*/
predicate charRange(int charset_start, int start, int lower_end, int upper_start, int end) {
exists(int index |
this.charRangeEnd(charset_start, index) = true and
this.char_set_token(charset_start, index - 2, start, lower_end) and
this.char_set_token(charset_start, index, upper_start, end)
)
}
/**
* Helper predicate for `charRange`.
* We can determine where character ranges end by a left to right sweep.
*
* To avoid negative recursion we return a boolean. See `escaping`,
* the helper for `escapingChar`, for a clean use of this pattern.
*/
private boolean charRangeEnd(int charset_start, int index) {
this.char_set_token(charset_start, index, _, _) and
(
index in [1, 2] and result = false
or
index > 2 and
exists(int connector_start |
this.char_set_token(charset_start, index - 1, connector_start, _) and
this.nonEscapedCharAt(connector_start) = "-" and
result =
this.charRangeEnd(charset_start, index - 2)
.booleanNot()
.booleanAnd(this.charRangeEnd(charset_start, index - 1).booleanNot())
)
or
not exists(int connector_start |
this.char_set_token(charset_start, index - 1, connector_start, _) and
this.nonEscapedCharAt(connector_start) = "-"
) and
result = false
)
}
/** Holds if the character at `pos` is a "\" that is actually escaping what comes after. */
predicate escapingChar(int pos) { this.escaping(pos) = true }
/**
* Helper predicate for `escapingChar`.
* In order to avoid negative recusrion, we return a boolean.
* This way, we can refer to `escaping(pos - 1).booleanNot()`
* rather than to a negated version of `escaping(pos)`.
*/
private boolean escaping(int pos) {
pos = -1 and result = false
or
@@ -164,14 +329,14 @@ abstract class RegexString extends Expr {
string nonEscapedCharAt(int i) {
result = this.getText().charAt(i) and
not this.escapingChar(i - 1)
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
}
private predicate isOptionDivider(int i) { this.nonEscapedCharAt(i) = "|" }
private predicate isGroupEnd(int i) { this.nonEscapedCharAt(i) = ")" }
private predicate isGroupEnd(int i) { this.nonEscapedCharAt(i) = ")" and not this.inCharSet(i) }
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" }
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
predicate failedToParse(int i) {
exists(this.getChar(i)) and
@@ -192,16 +357,25 @@ abstract class RegexString extends Expr {
not exists(int i | start + 2 < i and i < end - 1 | this.getChar(i) = "}")
}
private predicate escapedCharacter(int start, int end) {
/**
* Holds if an escaped character is found between `start` and `end`.
* Escaped characters include hex values, octal values and named escapes,
* but excludes backreferences.
*/
predicate escapedCharacter(int start, int end) {
this.escapingChar(start) and
not exists(this.getText().substring(start + 1, end + 1).toInt()) and
not this.numbered_backreference(start, _, _) and
(
// hex value \xhh
this.getChar(start + 1) = "x" and end = start + 4
or
// octal value \ooo
end in [start + 2 .. start + 4] and
exists(this.getText().substring(start + 1, end).toInt())
this.getText().substring(start + 1, end).toInt() >= 0 and
not (
end < start + 4 and
exists(this.getText().substring(start + 1, end + 1).toInt())
)
or
// 16-bit hex value \uhhhh
this.getChar(start + 1) = "u" and end = start + 6
@@ -213,18 +387,19 @@ abstract class RegexString extends Expr {
or
// escape not handled above, update when adding a new case
not this.getChar(start + 1) in ["x", "u", "U", "N"] and
not exists(this.getChar(start + 1).toInt()) and
end = start + 2
)
}
private predicate inCharSet(int index) {
/** Holds if `index` is inside a character set. */
predicate inCharSet(int index) {
exists(int x, int y | this.charSet(x, y) and index in [x + 1 .. y - 2])
}
/*
/**
* 'simple' characters are any that don't alter the parsing of the regex.
*/
private predicate simpleCharacter(int start, int end) {
end = start + 1 and
not this.charSet(start, _) and
@@ -238,7 +413,7 @@ abstract class RegexString extends Expr {
or
start = z - 2
or
start > y and start < z - 2 and not c = "-"
start > y and start < z - 2 and not this.charRange(_, _, start, end, _)
)
or
not this.inCharSet(start) and
@@ -246,7 +421,7 @@ abstract class RegexString extends Expr {
not c = "[" and
not c = ")" and
not c = "|" and
not this.qualifier(start, _, _)
not this.qualifier(start, _, _, _)
)
}
@@ -257,7 +432,8 @@ abstract class RegexString extends Expr {
or
this.escapedCharacter(start, end)
) and
not exists(int x, int y | this.group_start(x, y) and x <= start and y >= end)
not exists(int x, int y | this.group_start(x, y) and x <= start and y >= end) and
not exists(int x, int y | this.backreference(x, y) and x <= start and y >= end)
}
predicate normalCharacter(int start, int end) {
@@ -302,12 +478,13 @@ abstract class RegexString extends Expr {
or
this.negativeAssertionGroup(start, end)
or
positiveLookaheadAssertionGroup(start, end)
this.positiveLookaheadAssertionGroup(start, end)
or
this.positiveLookbehindAssertionGroup(start, end)
}
private predicate emptyGroup(int start, int end) {
/** Holds if an empty group is found between `start` and `end`. */
predicate emptyGroup(int start, int end) {
exists(int endm1 | end = endm1 + 1 |
this.group_start(start, endm1) and
this.isGroupEnd(endm1)
@@ -340,13 +517,29 @@ abstract class RegexString extends Expr {
)
}
private predicate positiveLookaheadAssertionGroup(int start, int end) {
/** Holds if a negative lookahead is found between `start` and `end` */
predicate negativeLookaheadAssertionGroup(int start, int end) {
exists(int in_start | this.negative_lookahead_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
}
/** Holds if a negative lookbehind is found between `start` and `end` */
predicate negativeLookbehindAssertionGroup(int start, int end) {
exists(int in_start | this.negative_lookbehind_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
}
/** Holds if a positive lookahead is found between `start` and `end` */
predicate positiveLookaheadAssertionGroup(int start, int end) {
exists(int in_start | this.lookahead_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
}
private predicate positiveLookbehindAssertionGroup(int start, int end) {
/** Holds if a positive lookbehind is found between `start` and `end` */
predicate positiveLookbehindAssertionGroup(int start, int end) {
exists(int in_start | this.lookbehind_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
@@ -405,6 +598,8 @@ abstract class RegexString extends Expr {
this.getChar(start + 1) = "?" and
this.getChar(start + 2) = "P" and
this.getChar(start + 3) = "=" and
// Should this be looking for unescaped ")"?
// TODO: test this
end = min(int i | i > start + 4 and this.getChar(i) = "?")
}
@@ -495,6 +690,7 @@ abstract class RegexString extends Expr {
private predicate numbered_backreference(int start, int end, int value) {
this.escapingChar(start) and
not this.getChar(start + 1) = "0" and
exists(string text, string svalue, int len |
end = start + len and
text = this.getText() and
@@ -503,7 +699,7 @@ abstract class RegexString extends Expr {
svalue = text.substring(start + 1, start + len) and
value = svalue.toInt() and
not exists(text.substring(start + 1, start + len + 1).toInt()) and
value != 0
value > 0
)
}
@@ -527,43 +723,55 @@ abstract class RegexString extends Expr {
this.group(start, end)
or
this.charSet(start, end)
or
this.backreference(start, end)
}
private predicate qualifier(int start, int end, boolean maybe_empty) {
this.short_qualifier(start, end, maybe_empty) and not this.getChar(end) = "?"
private predicate qualifier(int start, int end, boolean maybe_empty, boolean may_repeat_forever) {
this.short_qualifier(start, end, maybe_empty, may_repeat_forever) and
not this.getChar(end) = "?"
or
exists(int short_end | this.short_qualifier(start, short_end, maybe_empty) |
exists(int short_end | this.short_qualifier(start, short_end, maybe_empty, may_repeat_forever) |
if this.getChar(short_end) = "?" then end = short_end + 1 else end = short_end
)
}
private predicate short_qualifier(int start, int end, boolean maybe_empty) {
private predicate short_qualifier(
int start, int end, boolean maybe_empty, boolean may_repeat_forever
) {
(
this.getChar(start) = "+" and maybe_empty = false
this.getChar(start) = "+" and maybe_empty = false and may_repeat_forever = true
or
this.getChar(start) = "*" and maybe_empty = true
this.getChar(start) = "*" and maybe_empty = true and may_repeat_forever = true
or
this.getChar(start) = "?" and maybe_empty = true
this.getChar(start) = "?" and maybe_empty = true and may_repeat_forever = false
) and
end = start + 1
or
exists(int endin | end = endin + 1 |
this.getChar(start) = "{" and
this.getChar(endin) = "}" and
end > start and
exists(string multiples | multiples = this.getText().substring(start + 1, endin) |
multiples.regexpMatch("0+") and maybe_empty = true
or
multiples.regexpMatch("0*,[0-9]*") and maybe_empty = true
or
multiples.regexpMatch("0*[1-9][0-9]*") and maybe_empty = false
or
multiples.regexpMatch("0*[1-9][0-9]*,[0-9]*") and maybe_empty = false
) and
not exists(int mid |
this.getChar(mid) = "}" and
mid > start and
mid < endin
exists(string lower, string upper |
this.multiples(start, end, lower, upper) and
(if lower = "" or lower.toInt() = 0 then maybe_empty = true else maybe_empty = false) and
if upper = "" then may_repeat_forever = true else may_repeat_forever = false
)
}
/**
* Holds if a repetition quantifier is found between `start` and `end`,
* with the given lower and upper bounds. If a bound is omitted, the corresponding
* string is empty.
*/
predicate multiples(int start, int end, string lower, string upper) {
this.getChar(start) = "{" and
this.getChar(end - 1) = "}" and
exists(string inner | inner = this.getText().substring(start + 1, end - 1) |
inner.regexpMatch("[0-9]+") and
lower = inner and
upper = lower
or
inner.regexpMatch("[0-9]*,[0-9]*") and
exists(int commaIndex | commaIndex = inner.indexOf(",") |
lower = inner.prefix(commaIndex) and
upper = inner.suffix(commaIndex + 1)
)
)
}
@@ -572,19 +780,29 @@ abstract class RegexString extends Expr {
* Whether the text in the range start,end is a qualified item, where item is a character,
* a character set or a group.
*/
predicate qualifiedItem(int start, int end, boolean maybe_empty) {
this.qualifiedPart(start, _, end, maybe_empty)
predicate qualifiedItem(int start, int end, boolean maybe_empty, boolean may_repeat_forever) {
this.qualifiedPart(start, _, end, maybe_empty, may_repeat_forever)
}
private predicate qualifiedPart(int start, int part_end, int end, boolean maybe_empty) {
/**
* Holds if a qualified part is found between `start` and `part_end` and the qualifier is
* found between `part_end` and `end`.
*
* `maybe_empty` is true if the part is optional.
* `may_repeat_forever` is true if the part may be repeated unboundedly.
*/
predicate qualifiedPart(
int start, int part_end, int end, boolean maybe_empty, boolean may_repeat_forever
) {
this.baseItem(start, part_end) and
this.qualifier(part_end, end, maybe_empty)
this.qualifier(part_end, end, maybe_empty, may_repeat_forever)
}
private predicate item(int start, int end) {
this.qualifiedItem(start, end, _)
/** Holds if the range `start`, `end` contains a character, a quantifier, a character set or a group. */
predicate item(int start, int end) {
this.qualifiedItem(start, end, _, _)
or
this.baseItem(start, end) and not this.qualifier(end, _, _)
this.baseItem(start, end) and not this.qualifier(end, _, _, _)
}
private predicate subsequence(int start, int end) {
@@ -607,7 +825,7 @@ abstract class RegexString extends Expr {
*/
predicate sequence(int start, int end) {
this.sequenceOrQualified(start, end) and
not this.qualifiedItem(start, end, _)
not this.qualifiedItem(start, end, _, _)
}
private predicate sequenceOrQualified(int start, int end) {
@@ -618,7 +836,8 @@ abstract class RegexString extends Expr {
private predicate item_start(int start) {
this.character(start, _) or
this.isGroupStart(start) or
this.charSet(start, _)
this.charSet(start, _) or
this.backreference(start, _)
}
private predicate item_end(int end) {
@@ -628,7 +847,7 @@ abstract class RegexString extends Expr {
or
this.charSet(_, end)
or
this.qualifier(_, end, _)
this.qualifier(_, end, _, _)
}
private predicate top_level(int start, int end) {
@@ -680,14 +899,14 @@ abstract class RegexString extends Expr {
or
exists(int x | this.firstPart(x, end) |
this.emptyMatchAtStartGroup(x, start) or
this.qualifiedItem(x, start, true) or
this.qualifiedItem(x, start, true, _) or
this.specialCharacter(x, start, "^")
)
or
exists(int y | this.firstPart(start, y) |
this.item(start, end)
or
this.qualifiedPart(start, end, y, _)
this.qualifiedPart(start, end, y, _, _)
)
or
exists(int x, int y | this.firstPart(x, y) |
@@ -704,7 +923,7 @@ abstract class RegexString extends Expr {
exists(int y | this.lastPart(start, y) |
this.emptyMatchAtEndGroup(end, y)
or
this.qualifiedItem(end, y, true)
this.qualifiedItem(end, y, true, _)
or
this.specialCharacter(end, y, "$")
or
@@ -716,7 +935,7 @@ abstract class RegexString extends Expr {
this.item(start, end)
)
or
exists(int y | this.lastPart(start, y) | this.qualifiedPart(start, end, y, _))
exists(int y | this.lastPart(start, y) | this.qualifiedPart(start, end, y, _, _))
or
exists(int x, int y | this.lastPart(x, y) |
this.groupContents(x, y, start, end)
@@ -733,7 +952,7 @@ abstract class RegexString extends Expr {
(
this.character(start, end)
or
this.qualifiedItem(start, end, _)
this.qualifiedItem(start, end, _, _)
or
this.charSet(start, end)
) and
@@ -748,7 +967,7 @@ abstract class RegexString extends Expr {
(
this.character(start, end)
or
this.qualifiedItem(start, end, _)
this.qualifiedItem(start, end, _, _)
or
this.charSet(start, end)
) and

View File

@@ -0,0 +1,39 @@
/**
* Provides a taint-tracking configuration for "Clear-text logging of sensitive information".
*
* Note, for performance reasons: only import this file if
* `CleartextLogging::Configuration` is needed, otherwise
* `CleartextLoggingCustomizations` should be imported instead.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.dataflow.new.SensitiveDataSources
/**
* Provides a taint-tracking configuration for detecting "Clear-text logging of sensitive information".
*/
module CleartextLogging {
import CleartextLoggingCustomizations::CleartextLogging
/**
* A taint-tracking configuration for detecting "Clear-text logging of sensitive information".
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CleartextLogging" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node)
or
node instanceof Sanitizer
}
}
}

View File

@@ -0,0 +1,68 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "Clear-text logging of sensitive information"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.SensitiveDataSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "Clear-text logging of sensitive information"
* vulnerabilities, as well as extension points for adding your own.
*/
module CleartextLogging {
/**
* A data flow source for "Clear-text logging of sensitive information" vulnerabilities.
*/
abstract class Source extends DataFlow::Node {
/** Gets the classification of the sensitive data. */
abstract string getClassification();
}
/**
* A data flow sink for "Clear-text logging of sensitive information" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "Clear-text logging of sensitive information" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A source of sensitive data, considered as a flow source.
*/
class SensitiveDataSourceAsSource extends Source, SensitiveDataSource {
override SensitiveDataClassification getClassification() {
result = SensitiveDataSource.super.getClassification()
}
}
/** A piece of data logged, considered as a flow sink. */
class LoggingAsSink extends Sink {
LoggingAsSink() { this = any(Logging write).getAnInput() }
}
/** A piece of data printed, considered as a flow sink. */
class PrintedDataAsSink extends Sink {
PrintedDataAsSink() {
this = API::builtin("print").getACall().getArg(_)
or
// special handling of writing to `sys.stdout` and `sys.stderr`, which is
// essentially the same as printing
this =
API::moduleImport("sys")
.getMember(["stdout", "stderr"])
.getMember("write")
.getACall()
.getArg(0)
}
}
}

View File

@@ -0,0 +1,39 @@
/**
* Provides a taint-tracking configuration for "Clear-text storage of sensitive information".
*
* Note, for performance reasons: only import this file if
* `CleartextStorage::Configuration` is needed, otherwise
* `CleartextStorageCustomizations` should be imported instead.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.dataflow.new.SensitiveDataSources
/**
* Provides a taint-tracking configuration for detecting "Clear-text storage of sensitive information".
*/
module CleartextStorage {
import CleartextStorageCustomizations::CleartextStorage
/**
* A taint-tracking configuration for detecting "Clear-text storage of sensitive information".
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CleartextStorage" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node)
or
node instanceof Sanitizer
}
}
}

View File

@@ -0,0 +1,62 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "Clear-text storage of sensitive information"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.SensitiveDataSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "Clear-text storage of sensitive information"
* vulnerabilities, as well as extension points for adding your own.
*/
module CleartextStorage {
/**
* A data flow source for "Clear-text storage of sensitive information" vulnerabilities.
*/
abstract class Source extends DataFlow::Node {
/** Gets the classification of the sensitive data. */
abstract string getClassification();
}
/**
* A data flow sink for "Clear-text storage of sensitive information" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "Clear-text storage of sensitive information" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A source of sensitive data, considered as a flow source.
*/
class SensitiveDataSourceAsSource extends Source, SensitiveDataSource {
override SensitiveDataClassification getClassification() {
result = SensitiveDataSource.super.getClassification()
}
}
/** The data written to a file, considered as a flow sink. */
class FileWriteDataAsSink extends Sink {
FileWriteDataAsSink() { this = any(FileSystemWriteAccess write).getADataNode() }
}
/** The data written to a cookie on a HTTP response, considered as a flow sink. */
class CookieWriteAsSink extends Sink {
CookieWriteAsSink() {
exists(HTTP::Server::CookieWrite write |
this = write.getValueArg()
or
this = write.getHeaderArg()
)
}
}
}

View File

@@ -1,26 +1,42 @@
/**
* Provides a taint-tracking configuration for detecting code injection
* vulnerabilities.
* Provides a taint-tracking configuration for detecting "code injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `CodeInjection::Configuration` is needed, otherwise
* `CodeInjectionCustomizations` should be imported instead.
*/
import python
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting code injection vulnerabilities.
* Provides a taint-tracking configuration for detecting "code injection" vulnerabilities.
*/
class CodeInjectionConfiguration extends TaintTracking::Configuration {
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
module CodeInjection {
import CodeInjectionCustomizations::CodeInjection
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
/**
* A taint-tracking configuration for detecting "code injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CodeInjection" }
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
/**
* DEPRECATED: Don't extend this class for customization, since this will lead to bad
* performance, instead use the new `CodeInjectionCustomizations.qll` file, and extend
* its' classes.
*/
deprecated class CodeInjectionConfiguration = CodeInjection::Configuration;

View File

@@ -0,0 +1,55 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "code injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "code injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module CodeInjection {
/**
* A data flow source for "code injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "code injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "code injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "code injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A code execution, considered as a flow sink.
*/
class CodeExecutionAsSink extends Sink {
CodeExecutionAsSink() { this = any(CodeExecution e).getCode() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}

View File

@@ -1,56 +1,42 @@
/**
* Provides a taint-tracking configuration for detecting command injection
* vulnerabilities.
* Provides a taint-tracking configuration for detecting "command injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `CommandInjection::Configuration` is needed, otherwise
* `CommandInjectionCustomizations` should be imported instead.
*/
import python
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting command injection vulnerabilities.
* Provides a taint-tracking configuration for detecting "command injection" vulnerabilities.
*/
class CommandInjectionConfiguration extends TaintTracking::Configuration {
CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }
module CommandInjection {
import CommandInjectionCustomizations::CommandInjection
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
/**
* A taint-tracking configuration for detecting "command injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CommandInjection" }
override predicate isSink(DataFlow::Node sink) {
sink = any(SystemCommandExecution e).getCommand() and
// Since the implementation of standard library functions such `os.popen` looks like
// ```py
// def popen(cmd, mode="r", buffering=-1):
// ...
// proc = subprocess.Popen(cmd, ...)
// ```
// any time we would report flow to the `os.popen` sink, we can ALSO report the flow
// from the `cmd` parameter to the `subprocess.Popen` sink -- obviously we don't
// want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `os.system` calls in the following example
// due to use-use flow
// ```py
// os.system(cmd)
// os.system(cmd)
// ```
//
// Best solution I could come up with is to exclude all sinks inside the modules of
// known sinks. This does have a downside: If we have overlooked a function in any
// of these, that internally runs a command, we no longer give an alert :| -- and we
// need to keep them updated (which is hard to remember)
//
// This does not only affect `os.popen`, but also the helper functions in
// `subprocess`. See:
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
}
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
/**
* DEPRECATED: Don't extend this class for customization, since this will lead to bad
* performance, instead use the new `CommandInjectionCustomizations.qll` file, and extend
* its' classes.
*/
deprecated class CommandInjectionConfiguration = CommandInjection::Configuration;

View File

@@ -0,0 +1,87 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "command injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "command injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module CommandInjection {
/**
* A data flow source for "command injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "command injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "command injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "command injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A command execution, considered as a flow sink.
*/
class CommandExecutionAsSink extends Sink {
CommandExecutionAsSink() {
this = any(SystemCommandExecution e).getCommand() and
// Since the implementation of standard library functions such `os.popen` looks like
// ```py
// def popen(cmd, mode="r", buffering=-1):
// ...
// proc = subprocess.Popen(cmd, ...)
// ```
// any time we would report flow to the `os.popen` sink, we can ALSO report the flow
// from the `cmd` parameter to the `subprocess.Popen` sink -- obviously we don't
// want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `os.system` calls in the following example
// due to use-use flow
// ```py
// os.system(cmd)
// os.system(cmd)
// ```
//
// Best solution I could come up with is to exclude all sinks inside the modules of
// known sinks. This does have a downside: If we have overlooked a function in any
// of these, that internally runs a command, we no longer give an alert :| -- and we
// need to keep them updated (which is hard to remember)
//
// This does not only affect `os.popen`, but also the helper functions in
// `subprocess`. See:
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
not this.getScope().getEnclosingModule().getName() in [
"os", "subprocess", "platform", "popen2"
]
}
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}

View File

@@ -1,37 +1,19 @@
/**
* Provides a taint-tracking configuration for detecting path injection
* vulnerabilities.
* Provides taint-tracking configurations for detecting "path injection" vulnerabilities.
*
* We detect cases where a user-controlled path is used in an unsafe manner,
* meaning it is not both normalized and _afterwards_ checked.
*
* It does so by dividing the problematic situation into two cases:
* 1. The file path is never normalized.
* This is easily detected by using normalization as a sanitizer.
*
* 2. The file path is normalized at least once, but never checked afterwards.
* This is detected by finding the earliest normalization and then ensuring that
* no checks happen later. Since we start from the earliest normalization,
* we know that the absence of checks means that no normalization has a
* check after it. (No checks after a second normalization would be ok if
* there was a check between the first and the second.)
*
* Note that one could make the dual split on whether the file path is ever checked. This does
* not work as nicely, however, since checking is modelled as a `BarrierGuard` rather than
* as a `Sanitizer`. That means that only some dataflow paths out of a check will be removed,
* and so identifying the last check is not possible simply by finding a dataflow path from it
* to a sink.
* Note, for performance reasons: only import this file if
* the Configurations or the `pathInjection` predicate are needed, otherwise
* `PathInjectionCustomizations` should be imported instead.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.DataFlow2
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.TaintTracking2
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.DataFlow2
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.TaintTracking2
import ChainedConfigs12
import semmle.python.dataflow.new.BarrierGuards
import PathInjectionCustomizations::PathInjection
// ---------------------------------------------------------------------------
// Case 1. The path is never normalized.
@@ -40,16 +22,14 @@ import semmle.python.dataflow.new.BarrierGuards
class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) {
sink = any(FileSystemAccess e).getAPathArgument()
}
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
guard instanceof SanitizerGuard
}
}
@@ -68,14 +48,14 @@ predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
class FirstNormalizationConfiguration extends TaintTracking::Configuration {
FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
guard instanceof SanitizerGuard
}
}
@@ -85,14 +65,12 @@ class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuratio
override predicate isSource(DataFlow::Node source) { source instanceof Path::PathNormalization }
override predicate isSink(DataFlow::Node sink) {
sink = any(FileSystemAccess e).getAPathArgument()
}
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof Path::SafeAccessCheck
or
guard instanceof StringConstCompare
guard instanceof SanitizerGuard
}
}

View File

@@ -0,0 +1,56 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "path injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, and sinks for detecting
* "path injection"
* vulnerabilities, as well as extension points for adding your own.
*
* Since the path-injection configuration setup is rather complicated, we do not
* expose a `Sanitizer` class, and instead you should extend
* `Path::PathNormalization::Range` and `Path::SafeAccessCheck::Range` from
* `semmle.python.Concepts` instead.
*/
module PathInjection {
/**
* A data flow source for "path injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "path injection" vulnerabilities.
* Such as a file system access.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer guard for "path injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A file system access, considered as a flow sink.
*/
class FileSystemAccessAsSink extends Sink {
FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}

View File

@@ -0,0 +1,35 @@
/**
* Provides a taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `PolynomialReDoS::Configuration` is needed, otherwise
* `PolynomialReDoSCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*/
module PolynomialReDoS {
import PolynomialReDoSCustomizations::PolynomialReDoS
/**
* A taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "PolynomialReDoS" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,212 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "polynomial regular expression denial of service (ReDoS)"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.DataFlow2
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.RegexTreeView
private import semmle.python.ApiGraphs
/**
* Provides default sources, sinks and sanitizers for detecting
* "polynomial regular expression denial of service (ReDoS)"
* vulnerabilities, as well as extension points for adding your own.
*/
module PolynomialReDoS {
/**
* A data flow source for "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/** Gets the regex that is being executed by this node. */
abstract RegExpTerm getRegExp();
/**
* Gets the node to highlight in the alert message.
*/
DataFlow::Node getHighlight() { result = this }
}
/**
* A sanitizer for "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A regex execution, considered as a flow sink.
*/
class RegexExecutionAsSink extends Sink {
RegExpTerm t;
RegexExecutionAsSink() {
exists(CompiledRegexes::RegexExecution re |
re.getRegexNode().asExpr() = t.getRegex() and
this = re.getString()
) and
t.isRootTerm()
}
/** Gets the regex that is being executed by this node. */
override RegExpTerm getRegExp() { result = t }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}
/** Helper module for tracking compiled regexes. */
private module CompiledRegexes {
// TODO: This module should be refactored and merged with the experimental work done on detecting
// regex injections, such that this can be expressed from just using a concept.
/** A configuration for finding uses of compiled regexes. */
class RegexDefinitionConfiguration extends DataFlow2::Configuration {
RegexDefinitionConfiguration() { this = "RegexDefinitionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RegexDefinitonSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexDefinitionSink }
}
/** A regex compilation. */
class RegexDefinitonSource extends DataFlow::CallCfgNode {
DataFlow::Node regexNode;
RegexDefinitonSource() {
this = API::moduleImport("re").getMember("compile").getACall() and
regexNode in [this.getArg(0), this.getArgByName("pattern")]
}
/** Gets the regex that is being compiled by this node. */
RegExpTerm getRegExp() { result.getRegex() = regexNode.asExpr() and result.isRootTerm() }
/** Gets the data flow node for the regex being compiled by this node. */
DataFlow::Node getRegexNode() { result = regexNode }
}
/** A use of a compiled regex. */
class RegexDefinitionSink extends DataFlow::Node {
RegexExecutionMethod method;
DataFlow::CallCfgNode executingCall;
RegexDefinitionSink() {
exists(DataFlow::AttrRead reMethod |
executingCall.getFunction() = reMethod and
reMethod.getAttributeName() = method and
this = reMethod.getObject()
)
}
/** Gets the method used to execute the regex. */
RegexExecutionMethod getMethod() { result = method }
/** Gets the data flow node for the executing call. */
DataFlow::CallCfgNode getExecutingCall() { result = executingCall }
}
/** A data flow node executing a regex. */
abstract class RegexExecution extends DataFlow::Node {
/** Gets the data flow node for the regex being compiled by this node. */
abstract DataFlow::Node getRegexNode();
/** Gets a dataflow node for the string to be searched or matched against. */
abstract DataFlow::Node getString();
}
private class RegexExecutionMethod extends string {
RegexExecutionMethod() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
}
/** Gets the index of the argument representing the string to be searched by a regex. */
int stringArg(RegexExecutionMethod method) {
method in ["match", "fullmatch", "search", "split", "findall", "finditer"] and
result = 1
or
method in ["sub", "subn"] and
result = 2
}
/**
* A class to find `re` methods immediately executing an expression.
*
* See `RegexExecutionMethods`
*/
class DirectRegex extends DataFlow::CallCfgNode, RegexExecution {
RegexExecutionMethod method;
DirectRegex() { this = API::moduleImport("re").getMember(method).getACall() }
override DataFlow::Node getRegexNode() {
result in [this.getArg(0), this.getArgByName("pattern")]
}
override DataFlow::Node getString() {
result in [this.getArg(stringArg(method)), this.getArgByName("string")]
}
}
/**
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution {
DataFlow::Node regexNode;
RegexExecutionMethod method;
CompiledRegex() {
exists(
RegexDefinitionConfiguration conf, RegexDefinitonSource source, RegexDefinitionSink sink
|
conf.hasFlow(source, sink) and
regexNode = source.getRegexNode() and
method = sink.getMethod() and
this = sink.getExecutingCall()
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override DataFlow::Node getString() {
result in [this.getArg(stringArg(method) - 1), this.getArgByName("string")]
}
}
}

View File

@@ -1,32 +1,42 @@
/**
* Provides a taint-tracking configuration for detecting reflected server-side
* cross-site scripting vulnerabilities.
* Provides a taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `ReflectedXSS::Configuration` is needed, otherwise
* `ReflectedXSSCustomizations` should be imported instead.
*/
import python
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting reflected server-side cross-site
* scripting vulnerabilities.
* Provides a taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
class ReflectedXssConfiguration extends TaintTracking::Configuration {
ReflectedXssConfiguration() { this = "ReflectedXssConfiguration" }
module ReflectedXSS {
import ReflectedXSSCustomizations::ReflectedXSS
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
/**
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "ReflectedXSS" }
override predicate isSink(DataFlow::Node sink) {
exists(HTTP::Server::HttpResponse response |
response.getMimetype().toLowerCase() = "text/html" and
sink = response.getBody()
)
}
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
/**
* DEPRECATED: Don't extend this class for customization, since this will lead to bad
* performance, instead use the new `ReflectedXSSCustomizations.qll` file, and extend
* its' classes.
*/
deprecated class ReflectedXssConfiguration = ReflectedXSS::Configuration;

View File

@@ -0,0 +1,76 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "reflected server-side cross-site scripting"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "reflected server-side cross-site scripting"
* vulnerabilities, as well as extension points for adding your own.
*/
module ReflectedXSS {
/**
* A data flow source for "reflected server-side cross-site scripting" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "reflected server-side cross-site scripting" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "reflected server-side cross-site scripting" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "reflected server-side cross-site scripting" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* The body of a HTTP response that will be returned from a server, considered as a flow sink.
*/
class ServerHttpResponseBodyAsSink extends Sink {
ServerHttpResponseBodyAsSink() {
exists(HTTP::Server::HttpResponse response |
response.getMimetype().toLowerCase() = "text/html" and
this = response.getBody()
)
}
}
/**
* An HTML escaping, considered as a sanitizer.
*/
class HtmlEscapingAsSanitizer extends Sanitizer {
HtmlEscapingAsSanitizer() {
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
//
// However, it is better than `getAnInput()`. Due to use-use flow, that would remove
// the taint-flow to `SINK()` in `some_escape(tainted); SINK(tainted)`.
this = any(HtmlEscaping esc).getOutput()
}
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}

View File

@@ -1,26 +1,42 @@
/**
* Provides a taint-tracking configuration for detecting SQL injection
* vulnerabilities.
* Provides a taint-tracking configuration for detecting "SQL injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `SqlInjection::Configuration` is needed, otherwise
* `SqlInjectionCustomizations` should be imported instead.
*/
import python
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting SQL injection vulnerabilities.
* Provides a taint-tracking configuration for detecting "SQL injection" vulnerabilities.
*/
class SQLInjectionConfiguration extends TaintTracking::Configuration {
SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }
module SqlInjection {
import SqlInjectionCustomizations::SqlInjection
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
/**
* A taint-tracking configuration for detecting "SQL injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "SqlInjection" }
override predicate isSink(DataFlow::Node sink) { sink = any(SqlExecution e).getSql() }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
/**
* DEPRECATED: Don't extend this class for customization, since this will lead to bad
* performance, instead use the new `SqlInjectionCustomizations.qll` file, and extend
* its' classes.
*/
deprecated class SQLInjectionConfiguration = SqlInjection::Configuration;

View File

@@ -0,0 +1,55 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "SQL injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "SQL injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module SqlInjection {
/**
* A data flow source for "SQL injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "SQL injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "SQL injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "SQL injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A SQL statement of a SQL execution, considered as a flow sink.
*/
class SqlExecutionAsSink extends Sink {
SqlExecutionAsSink() { this = any(SqlExecution e).getSql() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}

View File

@@ -1,33 +1,51 @@
/**
* Provides a taint-tracking configuration for detecting stack trace exposure
* vulnerabilities.
* Provides a taint-tracking configuration for detecting "stack trace exposure" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `StackTraceExposure::Configuration` is needed, otherwise
* `StackTraceExposureCustomizations` should be imported instead.
*/
import python
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.internal.Attributes
private import ExceptionInfo
/**
* A taint-tracking configuration for detecting stack trace exposure.
* Provides a taint-tracking configuration for detecting "stack trace exposure" vulnerabilities.
*/
class StackTraceExposureConfiguration extends TaintTracking::Configuration {
StackTraceExposureConfiguration() { this = "StackTraceExposureConfiguration" }
module StackTraceExposure {
import StackTraceExposureCustomizations::StackTraceExposure
override predicate isSource(DataFlow::Node source) { source instanceof ExceptionInfo }
/**
* A taint-tracking configuration for detecting "stack trace exposure" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "StackTraceExposure" }
override predicate isSink(DataFlow::Node sink) {
sink = any(HTTP::Server::HttpResponse response).getBody()
}
override predicate isSource(DataFlow::Node source) { source instanceof Source }
// A stack trace is accessible as the `__traceback__` attribute of a caught exception.
// seehttps://docs.python.org/3/reference/datamodel.html#traceback-objects
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(AttrRead attr | attr.getAttributeName() = "__traceback__" |
nodeFrom = attr.getObject() and
nodeTo = attr
)
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
// A stack trace is accessible as the `__traceback__` attribute of a caught exception.
// seehttps://docs.python.org/3/reference/datamodel.html#traceback-objects
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::AttrRead attr | attr.getAttributeName() = "__traceback__" |
nodeFrom = attr.getObject() and
nodeTo = attr
)
}
}
}
/**
* DEPRECATED: Don't extend this class for customization, since this will lead to bad
* performance, instead use the new `StackTraceExposureCustomizations.qll` file, and extend
* its' classes.
*/
deprecated class StackTraceExposureConfiguration = StackTraceExposure::Configuration;

View File

@@ -0,0 +1,52 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "stack trace exposure"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.BarrierGuards
private import ExceptionInfo
/**
* Provides default sources, sinks and sanitizers for detecting
* "stack trace exposure"
* vulnerabilities, as well as extension points for adding your own.
*/
module StackTraceExposure {
/**
* A data flow source for "stack trace exposure" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "stack trace exposure" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "stack trace exposure" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "stack trace exposure" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of exception info, considered as a flow source.
*/
class ExceptionInfoAsSource extends Source {
ExceptionInfoAsSource() { this instanceof ExceptionInfo }
}
/**
* The body of a HTTP response that will be returned from a server, considered as a flow sink.
*/
class ServerHttpResponseBodyAsSink extends Sink {
ServerHttpResponseBodyAsSink() { this = any(HTTP::Server::HttpResponse response).getBody() }
}
}

View File

@@ -1,32 +1,42 @@
/**
* Provides a taint-tracking configuration for detecting arbitrary code execution
* vulnerabilities due to deserializing user-controlled data.
* Provides a taint-tracking configuration for detecting "code execution from deserialization" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `UnsafeDeserialization::Configuration` is needed, otherwise
* `UnsafeDeserializationCustomizations` should be imported instead.
*/
import python
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting arbitrary code execution
* vulnerabilities due to deserializing user-controlled data.
* Provides a taint-tracking configuration for detecting "code execution from deserialization" vulnerabilities.
*/
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
UnsafeDeserializationConfiguration() { this = "UnsafeDeserializationConfiguration" }
module UnsafeDeserialization {
import UnsafeDeserializationCustomizations::UnsafeDeserialization
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
/**
* A taint-tracking configuration for detecting "code execution from deserialization" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "UnsafeDeserialization" }
override predicate isSink(DataFlow::Node sink) {
exists(Decoding d |
d.mayExecuteInput() and
sink = d.getAnInput()
)
}
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
/**
* DEPRECATED: Don't extend this class for customization, since this will lead to bad
* performance, instead use the new `UnsafeDeserializationCustomizations.qll` file, and extend
* its' classes.
*/
deprecated class UnsafeDeserializationConfiguration = UnsafeDeserialization::Configuration;

View File

@@ -0,0 +1,60 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "code execution from deserialization"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "code execution from deserialization"
* vulnerabilities, as well as extension points for adding your own.
*/
module UnsafeDeserialization {
/**
* A data flow source for "code execution from deserialization" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "code execution from deserialization" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "code execution from deserialization" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "code execution from deserialization" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* An insecure decoding, considered as a flow sink.
*/
class InsecureDecodingAsSink extends Sink {
InsecureDecodingAsSink() {
exists(Decoding d |
d.mayExecuteInput() and
this = d.getAnInput()
)
}
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}

View File

@@ -1,37 +1,42 @@
/**
* Provides a taint-tracking configuration for detecting URL redirection
* vulnerabilities.
* Provides a taint-tracking configuration for detecting "URL redirection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `UrlRedirect::Configuration` is needed, otherwise
* `UrlRedirectCustomizations` should be imported instead.
*/
import python
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting URL redirection vulnerabilities.
* Provides a taint-tracking configuration for detecting "URL redirection" vulnerabilities.
*/
class UrlRedirectConfiguration extends TaintTracking::Configuration {
UrlRedirectConfiguration() { this = "UrlRedirectConfiguration" }
module UrlRedirect {
import UrlRedirectCustomizations::UrlRedirect
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
/**
* A taint-tracking configuration for detecting "URL redirection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "UrlRedirect" }
override predicate isSink(DataFlow::Node sink) {
sink = any(HTTP::Server::HttpRedirectResponse e).getRedirectLocation()
}
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSanitizer(DataFlow::Node node) {
// Url redirection is a problem only if the user controls the prefix of the URL.
// TODO: This is a copy of the taint-sanitizer from the old points-to query, which doesn't
// cover formatting.
exists(BinaryExprNode string_concat | string_concat.getOp() instanceof Add |
string_concat.getRight() = node.asCfgNode()
)
}
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
/**
* DEPRECATED: Don't extend this class for customization, since this will lead to bad
* performance, instead use the new `UrlRedirectCustomizations.qll` file, and extend
* its' classes.
*/
deprecated class UrlRedirectConfiguration = UrlRedirect::Configuration;

View File

@@ -0,0 +1,71 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "URL redirection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "URL redirection"
* vulnerabilities, as well as extension points for adding your own.
*/
module UrlRedirect {
/**
* A data flow source for "URL redirection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "URL redirection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "URL redirection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "URL redirection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A HTTP redirect response, considered as a flow sink.
*/
class RedirectLocationAsSink extends Sink {
RedirectLocationAsSink() {
this = any(HTTP::Server::HttpRedirectResponse e).getRedirectLocation()
}
}
/**
* The right side of a string-concat, considered as a sanitizer.
*/
class StringConcatAsSanitizer extends Sanitizer {
StringConcatAsSanitizer() {
// Url redirection is a problem only if the user controls the prefix of the URL.
// TODO: This is a copy of the taint-sanitizer from the old points-to query, which doesn't
// cover formatting.
exists(BinaryExprNode string_concat | string_concat.getOp() instanceof Add |
string_concat.getRight() = this.asCfgNode()
)
}
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}

View File

@@ -0,0 +1,342 @@
/**
* This library implements the analysis described in the following two papers:
*
* James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for
* Regular Expression Denial-of-Service Attacks. NSS 2013.
* (http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf)
* Asiri Rathnayake, Hayo Thielecke: Static Analysis for Regular Expression
* Exponential Runtime via Substructural Logics. 2014.
* (https://www.cs.bham.ac.uk/~hxt/research/redos_full.pdf)
*
* The basic idea is to search for overlapping cycles in the NFA, that is,
* states `q` such that there are two distinct paths from `q` to itself
* that consume the same word `w`.
*
* For any such state `q`, an attack string can be constructed as follows:
* concatenate a prefix `v` that takes the NFA to `q` with `n` copies of
* the word `w` that leads back to `q` along two different paths, followed
* by a suffix `x` that is _not_ accepted in state `q`. A backtracking
* implementation will need to explore at least 2^n different ways of going
* from `q` back to itself while trying to match the `n` copies of `w`
* before finally giving up.
*
* Now in order to identify overlapping cycles, all we have to do is find
* pumpable forks, that is, states `q` that can transition to two different
* states `r1` and `r2` on the same input symbol `c`, such that there are
* paths from both `r1` and `r2` to `q` that consume the same word. The latter
* condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)`
* in the product NFA.
*
* This is what the library does. It makes a simple attempt to construct a
* prefix `v` leading into `q`, but only to improve the alert message.
* And the library tries to prove the existence of a suffix that ensures
* rejection. This check might fail, which can cause false positives.
*
* Finally, sometimes it depends on the translation whether the NFA generated
* for a regular expression has a pumpable fork or not. We implement one
* particular translation, which may result in false positives or negatives
* relative to some particular JavaScript engine.
*
* More precisely, the library constructs an NFA from a regular expression `r`
* as follows:
*
* * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing
* the state of the automaton before attempting to match the `i`th character in `t`.
* * There is one accepting state `Accept(r)`.
* * There is a special `AcceptAnySuffix(r)` state, which accepts any suffix string
* by using an epsilon transition to `Accept(r)` and an any transition to itself.
* * Transitions between states may be labelled with epsilon, or an abstract
* input symbol.
* * Each abstract input symbol represents a set of concrete input characters:
* either a single character, a set of characters represented by a
* character class, or the set of all characters.
* * The product automaton is constructed lazily, starting with pair states
* `(q, q)` where `q` is a fork, and proceding along an over-approximate
* step relation.
* * The over-approximate step relation allows transitions along pairs of
* abstract input symbols where the symbols have overlap in the characters they accept.
* * Once a trace of pairs of abstract input symbols that leads from a fork
* back to itself has been identified, we attempt to construct a concrete
* string corresponding to it, which may fail.
* * Lastly we ensure that any state reached by repeating `n` copies of `w` has
* a suffix `x` (possible empty) that is most likely __not__ accepted.
*/
import ReDoSUtil
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideBacktracking(State s) {
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
}
/**
* A infinitely repeating quantifier that might backtrack.
*/
private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
MaybeBacktrackingRepetition() {
exists(RegExpTerm child |
child instanceof RegExpAlt or
child instanceof RegExpQuantifier
|
child.getParent+() = this
)
}
}
/**
* A state in the product automaton.
*/
private newtype TStatePair =
/**
* We lazily only construct those states that we are actually
* going to need: `(q, q)` for every fork state `q`, and any
* pair of states that can be reached from a pair that we have
* already constructed. To cut down on the number of states,
* we only represent states `(q1, q2)` where `q1` is lexicographically
* no bigger than `q2`.
*
* States are only constructed if both states in the pair are
* inside a repetition that might backtrack.
*/
MkStatePair(State q1, State q2) {
isFork(q1, _, _, _, _) and q2 = q1
or
(step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and
rankState(q1) <= rankState(q2)
}
/**
* Gets a unique number for a `state`.
* Is used to create an ordering of states, where states with the same `toString()` will be ordered differently.
*/
private int rankState(State state) {
state =
rank[result](State s, Location l |
l = s.getRepr().getLocation()
|
s order by l.getStartLine(), l.getStartColumn(), s.toString()
)
}
/**
* A state in the product automaton.
*/
private class StatePair extends TStatePair {
State q1;
State q2;
StatePair() { this = MkStatePair(q1, q2) }
/** Gets a textual representation of this element. */
string toString() { result = "(" + q1 + ", " + q2 + ")" }
/** Gets the first component of the state pair. */
State getLeft() { result = q1 }
/** Gets the second component of the state pair. */
State getRight() { result = q2 }
}
/**
* Holds for all constructed state pairs.
*
* Used in `statePairDist`
*/
private predicate isStatePair(StatePair p) { any() }
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r`.
*
* Used in `statePairDist`
*/
private predicate delta2(StatePair q, StatePair r) { step(q, _, _, r) }
/**
* Gets the minimum length of a path from `q` to `r` in the
* product automaton.
*/
private int statePairDist(StatePair q, StatePair r) =
shortestDistances(isStatePair/1, delta2/2)(q, r, result)
/**
* Holds if there are transitions from `q` to `r1` and from `q` to `r2`
* labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not
* trivially have an empty intersection.
*
* This predicate only holds for states associated with regular expressions
* that have at least one repetition quantifier in them (otherwise the
* expression cannot be vulnerable to ReDoS attacks anyway).
*/
pragma[noopt]
private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
stateInsideBacktracking(q) and
exists(State q1, State q2 |
q1 = epsilonSucc*(q) and
delta(q1, s1, r1) and
q2 = epsilonSucc*(q) and
delta(q2, s2, r2) and
// Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
// From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
// and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
exists(intersect(s1, s2))
|
s1 != s2
or
r1 != r2
or
r1 = r2 and q1 != q2
or
// If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state:
// one that uses the loop and one that doesn't. The engine will separately attempt to match with each path,
// despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not.
// To avoid every state in the loop becoming a fork state,
// we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop
// (every epsilon-loop must contain such a state).
//
// We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself.
// This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`.
// The below code is therefore a heuritic, that only flags regular expressions such as `/(a*)*b/`,
// and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently.
r1 = r2 and
q1 = q2 and
epsilonSucc+(q) = q and
exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
// One of the mid states is an infinite quantifier itself
exists(State mid, RegExpTerm term |
mid = epsilonSucc+(q) and
term = mid.getRepr() and
term instanceof InfiniteRepetitionQuantifier and
q = epsilonSucc+(mid) and
not mid = q
)
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
/**
* Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only
* one or the other is defined.
*/
private StatePair mkStatePair(State q1, State q2) {
result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1)
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1` and `s2`, respectively.
*/
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2))
}
/**
* Holds if there are transitions from the components of `q` to `r1` and `r2`
* labelled with `s1` and `s2`, respectively.
*
* We only consider transitions where the resulting states `(r1, r2)` are both
* inside a repetition that might backtrack.
*/
pragma[noopt]
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
// use noopt to force the join on `intersect` to happen last.
exists(intersect(s1, s2))
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, TTrace t) {
exists(StatePair p |
isReachableFromFork(_, p, t, _) and
step(p, s1, s2, _)
)
or
t = Nil() and isFork(_, s1, s2, _, _)
}
/**
* A list of pairs of input symbols that describe a path in the product automaton
* starting from some fork state.
*/
private class Trace extends TTrace {
/** Gets a textual representation of this element. */
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) |
result = "Step(" + s1 + ", " + s2 + ", " + t + ")"
)
}
}
/**
* Gets a string corresponding to the trace `t`.
*/
private string concretise(Trace t) {
t = Nil() and result = ""
or
exists(InputSymbol s1, InputSymbol s2, Trace rest | t = Step(s1, s2, rest) |
result = concretise(rest) + intersect(s1, s2)
)
}
/**
* Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is
* a path from `r` back to `(fork, fork)` with `rem` steps.
*/
private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
// base case
exists(InputSymbol s1, InputSymbol s2, State q1, State q2 |
isFork(fork, s1, s2, q1, q2) and
r = MkStatePair(q1, q2) and
w = Step(s1, s2, Nil()) and
rem = statePairDist(r, MkStatePair(fork, fork))
)
or
// recursive case
exists(StatePair p, Trace v, InputSymbol s1, InputSymbol s2 |
isReachableFromFork(fork, p, v, rem + 1) and
step(p, s1, s2, r) and
w = Step(s1, s2, v) and
rem >= statePairDist(r, MkStatePair(fork, fork))
)
}
/**
* Gets a state in the product automaton from which `(fork, fork)` is
* reachable in zero or more epsilon transitions.
*/
private StatePair getAForkPair(State fork) {
isFork(fork, _, _, _, _) and
result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork))
}
/**
* Holds if `fork` is a pumpable fork with word `w`.
*/
private predicate isPumpable(State fork, string w) {
exists(StatePair q, Trace t |
isReachableFromFork(fork, q, t, _) and
q = getAForkPair(fork) and
w = concretise(t)
)
}
/**
* An instantiation of `ReDoSConfiguration` for exponential backtracking.
*/
class ExponentialReDoSConfiguration extends ReDoSConfiguration {
ExponentialReDoSConfiguration() { this = "ExponentialReDoSConfiguration" }
override predicate isReDoSCandidate(State state, string pump) { isPumpable(state, pump) }
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,15 @@
/**
* This module should provide a class hierarchy corresponding to a parse tree of regular expressions.
*/
import python
import semmle.python.RegexTreeView
/**
* Holds if the regular expression should not be considered.
*
* For javascript we make the pragmatic performance optimization to ignore files we did not extract.
*/
predicate isExcluded(RegExpParent parent) {
not exists(parent.getRegex().getLocation().getFile().getRelativePath())
}

View File

@@ -0,0 +1,420 @@
/**
* Provides classes for working with regular expressions that can
* perform backtracking in superlinear time.
*/
import ReDoSUtil
/*
* This module implements the analysis described in the paper:
* Valentin Wustholz, Oswaldo Olivo, Marijn J. H. Heule, and Isil Dillig:
* Static Detection of DoS Vulnerabilities in
* Programs that use Regular Expressions
* (Extended Version).
* (https://arxiv.org/pdf/1701.04045.pdf)
*
* Theorem 3 from the paper describes the basic idea.
*
* The following explains the idea using variables and predicate names that are used in the implementation:
* We consider a pair of repetitions, which we will call `pivot` and `succ`.
*
* We create a product automaton of 3-tuples of states (see `StateTuple`).
* There exists a transition `(a,b,c) -> (d,e,f)` in the product automaton
* iff there exists three transitions in the NFA `a->d, b->e, c->f` where those three
* transitions all match a shared character `char`. (see `getAThreewayIntersect`)
*
* We start a search in the product automaton at `(pivot, pivot, succ)`,
* and search for a series of transitions (a `Trace`), such that we end
* at `(pivot, succ, succ)` (see `isReachableFromStartTuple`).
*
* For example, consider the regular expression `/^\d*5\w*$/`.
* The search will start at the tuple `(\d*, \d*, \w*)` and search
* for a path to `(\d*, \w*, \w*)`.
* This path exists, and consists of a single transition in the product automaton,
* where the three corresponding NFA edges all match the character `"5"`.
*
* The start-state in the NFA has an any-transition to itself, this allows us to
* flag regular expressions such as `/a*$/` - which does not have a start anchor -
* and can thus start matching anywhere.
*
* The implementation is not perfect.
* It has the same suffix detection issue as the `js/redos` query, which can cause false positives.
* It also doesn't find all transitions in the product automaton, which can cause false negatives.
*/
/**
* An instantiaion of `ReDoSConfiguration` for superlinear ReDoS.
*/
class SuperLinearReDoSConfiguration extends ReDoSConfiguration {
SuperLinearReDoSConfiguration() { this = "SuperLinearReDoSConfiguration" }
override predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
}
/**
* Gets any root (start) state of a regular expression.
*/
private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
private newtype TStateTuple =
MkStateTuple(State q1, State q2, State q3) {
// starts at (pivot, pivot, succ)
isStartLoops(q1, q3) and q1 = q2
or
step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
}
/**
* A state in the product automaton.
* The product automaton contains 3-tuples of states.
*
* We lazily only construct those states that we are actually
* going to need.
* Either a start state `(pivot, pivot, succ)`, or a state
* where there exists a transition from an already existing state.
*
* The exponential variant of this query (`js/redos`) uses an optimization
* trick where `q1 <= q2`. This trick cannot be used here as the order
* of the elements matter.
*/
class StateTuple extends TStateTuple {
State q1;
State q2;
State q3;
StateTuple() { this = MkStateTuple(q1, q2, q3) }
/**
* Gest a string repesentation of this tuple.
*/
string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" }
/**
* Holds if this tuple is `(r1, r2, r3)`.
*/
pragma[noinline]
predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
}
/**
* A module for determining feasible tuples for the product automaton.
*
* The implementation is split into many predicates for performance reasons.
*/
private module FeasibleTuple {
/**
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
*/
pragma[inline]
predicate isFeasibleTuple(State r1, State r2, State r3) {
// The first element is either inside a repetition (or the start state itself)
isRepetitionOrStart(r1) and
// The last element is inside a repetition
stateInsideRepetition(r3) and
// The states are reachable in the NFA in the order r1 -> r2 -> r3
delta+(r1) = r2 and
delta+(r2) = r3 and
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
canReachABeginning(r1) and
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
canReachATarget(r3)
}
/**
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
*/
pragma[noinline]
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideRepetition(State s) {
s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
}
/**
* Holds if there exists a path in the NFA from `s` to a "pivot" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachABeginning(State s) {
delta+(s) = any(State pivot | isStartLoops(pivot, _))
}
/**
* Holds if there exists a path in the NFA from `s` to a "succ" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) }
}
/**
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
*
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
*/
predicate isStartLoops(State pivot, State succ) {
pivot != succ and
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
delta+(pivot) = succ and
(
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
or
pivot = mkMatch(any(RegExpRoot root))
)
}
/**
* Gets a state for which there exists a transition in the NFA from `s'.
*/
State delta(State s) { delta(s, _, result) }
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noinline]
predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
exists(State r1, State r2, State r3 |
step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
)
}
/**
* Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
* labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noopt]
predicate step(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
) {
exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
deltaClosed(q3, s3, r3) and
// use noopt to force the join on `getAThreewayIntersect` to happen last.
exists(getAThreewayIntersect(s1, s2, s3))
)
}
/**
* Gets a char that is matched by all the edges `s1`, `s2`, and `s3`.
*
* The result is not complete, and might miss some combination of edges that share some character.
*/
pragma[noinline]
string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)]
or
result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)]
or
result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)]
}
/**
* Gets the minimum and maximum characters that intersect between `a` and `b`.
* This predicate is used to limit the size of `getAThreewayIntersect`.
*/
pragma[noinline]
string minAndMaxIntersect(InputSymbol a, InputSymbol b) {
result = [min(intersect(a, b)), max(intersect(a, b))]
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
exists(StateTuple p |
isReachableFromStartTuple(_, _, p, t, _) and
step(p, s1, s2, s3, _)
)
or
exists(State pivot, State succ | isStartLoops(pivot, succ) |
t = Nil() and step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, _)
)
}
/**
* A list of tuples of input symbols that describe a path in the product automaton
* starting from some start state.
*/
class Trace extends TTrace {
/**
* Gets a string representation of this Trace that can be used for debug purposes.
*/
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
)
}
}
/**
* Gets a string corresponding to the trace `t`.
*/
string concretise(Trace t) {
t = Nil() and result = ""
or
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace rest | t = Step(s1, s2, s3, rest) |
result = concretise(rest) + getAThreewayIntersect(s1, s2, s3)
)
}
/**
* Holds if there exists a transition from `r` to `q` in the product automaton.
* Notice that the arguments are flipped, and thus the direction is backwards.
*/
pragma[noinline]
predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
/**
* Holds if `tuple` is an end state in our search.
* That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
*/
predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
/**
* Gets the minimum length of a path from `r` to some an end state `end`.
*
* The implementation searches backwards from the end-tuple.
* This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small.
* The `end` argument must always be an end state.
*/
int distBackFromEnd(StateTuple r, StateTuple end) =
shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
/**
* Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
* `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
* and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
* and a path from a start-state to `tuple` follows the transitions in `trace`.
*/
predicate isReachableFromStartTuple(State pivot, State succ, StateTuple tuple, Trace trace, int dist) {
// base case. The first step is inlined to start the search after all possible 1-steps, and not just the ones with the shortest path.
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, State q1, State q2, State q3 |
isStartLoops(pivot, succ) and
step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
tuple = MkStateTuple(q1, q2, q3) and
trace = Step(s1, s2, s3, Nil()) and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
)
or
// recursive case
exists(StateTuple p, Trace v, InputSymbol s1, InputSymbol s2, InputSymbol s3 |
isReachableFromStartTuple(pivot, succ, p, v, dist + 1) and
dist = isReachableFromStartTupleHelper(pivot, succ, tuple, p, s1, s2, s3) and
trace = Step(s1, s2, s3, v)
)
}
/**
* Helper predicate for the recursive case in `isReachableFromStartTuple`.
*/
pragma[noinline]
private int isReachableFromStartTupleHelper(
State pivot, State succ, StateTuple r, StateTuple p, InputSymbol s1, InputSymbol s2,
InputSymbol s3
) {
result = distBackFromEnd(r, MkStateTuple(pivot, succ, succ)) and
step(p, s1, s2, s3, r)
}
/**
* Gets the tuple `(pivot, succ, succ)` from the product automaton.
*/
StateTuple getAnEndTuple(State pivot, State succ) {
isStartLoops(pivot, succ) and
result = MkStateTuple(pivot, succ, succ)
}
/**
* Holds if matching repetitions of `pump` can:
* 1) Transition from `pivot` back to `pivot`.
* 2) Transition from `pivot` to `succ`.
* 3) Transition from `succ` to `succ`.
*
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
*
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
* available in the `hasReDoSResult` predicate.
*/
predicate isPumpable(State pivot, State succ, string pump) {
exists(StateTuple q, Trace t |
isReachableFromStartTuple(pivot, succ, q, t, _) and
q = getAnEndTuple(pivot, succ) and
pump = concretise(t)
)
}
/**
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
*/
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot |
hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and
prev = pivot.getRepr()
)
}
/**
* Gets a message for why `term` can cause polynomial backtracking.
*/
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
polynimalReDoS(term, pump, prefixMsg, prev) and
result =
"Strings " + prefixMsg + "with many repetitions of '" + pump +
"' can start matching anywhere after the start of the preceeding " + prev
}
/**
* A term that may cause a regular expression engine to perform a
* polynomial number of match attempts, relative to the input length.
*/
class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
string reason;
string pump;
string prefixMsg;
RegExpTerm prev;
PolynomialBackTrackingTerm() {
reason = getReasonString(this, pump, prefixMsg, prev) and
// there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg)
}
/**
* Holds if all non-empty successors to the polynomial backtracking term matches the end of the line.
*/
predicate isAtEndLine() {
forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) |
succ instanceof RegExpDollar
)
}
/**
* Gets the string that should be repeated to cause this regular expression to perform polynomially.
*/
string getPumpString() { result = pump }
/**
* Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
*/
string getPrefixMessage() { result = prefixMsg }
/**
* Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
*/
RegExpTerm getPreviousLoop() { result = prev }
/**
* Gets the reason for the number of match attempts.
*/
string getReason() { result = reason }
}