mirror of
https://github.com/github/codeql.git
synced 2026-01-30 06:42:57 +01:00
Merge remote-tracking branch 'origin/main' into jorgectf/python/headerInjection
This commit is contained in:
@@ -0,0 +1,31 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>If an LDAP query doesn't carry any kind of authentication, anonymous binds causes an empty or None-set password
|
||||
to result in a successful authentication.</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>Use a non-empty password while establishing an LDAP connection.</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>In the following examples, the code builds a LDAP query whose execution carries no authentication or binds anonymously.</p>
|
||||
|
||||
<sample src="examples/auth_bad_2.py" />
|
||||
<sample src="examples/auth_bad_3.py" />
|
||||
|
||||
<p>In the third and fourth examples, the authentication is established using a password.</p>
|
||||
|
||||
<sample src="examples/auth_good_2.py" />
|
||||
<sample src="examples/auth_good_3.py" />
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-4433">RSPEC-4433</a>.</li>
|
||||
<li>Python2: <a href="https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html">LDAP Documentation</a>.</li>
|
||||
<li>Python3: <a href="https://ldap3.readthedocs.io/en/latest/">LDAP Documentation</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* @name Improper LDAP Authentication
|
||||
* @description A user-controlled query carries no authentication
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @id py/improper-ldap-auth
|
||||
* @tags experimental
|
||||
* security
|
||||
* external/cwe/cwe-287
|
||||
*/
|
||||
|
||||
// Determine precision above
|
||||
import python
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
|
||||
predicate authenticatesImproperly(LDAPBind ldapBind) {
|
||||
(
|
||||
DataFlow::localFlow(DataFlow::exprNode(any(None noneName)), ldapBind.getPassword()) or
|
||||
not exists(ldapBind.getPassword())
|
||||
)
|
||||
or
|
||||
exists(StrConst emptyString |
|
||||
emptyString.getText() = "" and
|
||||
DataFlow::localFlow(DataFlow::exprNode(emptyString), ldapBind.getPassword())
|
||||
)
|
||||
}
|
||||
|
||||
from LDAPBind ldapBind
|
||||
where authenticatesImproperly(ldapBind)
|
||||
select ldapBind, "The following LDAP bind operation is executed without authentication"
|
||||
@@ -0,0 +1,14 @@
|
||||
from flask import request, Flask
|
||||
import ldap
|
||||
import ldap.filter
|
||||
import ldap.dn
|
||||
|
||||
|
||||
@app.route("/bind_example")
|
||||
def bind_example():
|
||||
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
|
||||
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
|
||||
|
||||
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
|
||||
ldap_connection.bind('cn=root', "")
|
||||
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
|
||||
@@ -0,0 +1,13 @@
|
||||
from ldap3 import Server, Connection, ALL
|
||||
from flask import request, Flask
|
||||
from ldap3.utils.dn import escape_rdn
|
||||
from ldap3.utils.conv import escape_filter_chars
|
||||
|
||||
@app.route("/passwordNone")
|
||||
def passwordNone():
|
||||
dn = "dc={}".format(escape_rdn(request.args['dc']))
|
||||
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
|
||||
|
||||
srv = Server('servername', get_info=ALL)
|
||||
conn = Connection(srv, user='user_dn', password=None)
|
||||
status, result, response, _ = conn.search(dn, search_filter)
|
||||
@@ -0,0 +1,14 @@
|
||||
from flask import request, Flask
|
||||
import ldap
|
||||
import ldap.filter
|
||||
import ldap.dn
|
||||
|
||||
|
||||
@app.route("/bind_example")
|
||||
def bind_example():
|
||||
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
|
||||
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
|
||||
|
||||
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
|
||||
ldap_connection.bind('cn=root', "SecurePa$$!")
|
||||
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
|
||||
@@ -0,0 +1,14 @@
|
||||
from ldap3 import Server, Connection, ALL
|
||||
from flask import request, Flask
|
||||
from ldap3.utils.dn import escape_rdn
|
||||
from ldap3.utils.conv import escape_filter_chars
|
||||
|
||||
@app.route("/passwordFromEnv")
|
||||
def passwordFromEnv():
|
||||
dn = "dc={}".format(escape_rdn(request.args['dc']))
|
||||
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
|
||||
|
||||
srv = Server('servername', get_info=ALL)
|
||||
conn = Connection(srv, user='user_dn',
|
||||
password="SecurePa$$!")
|
||||
status, result, response, _ = conn.search(dn, search_filter)
|
||||
@@ -0,0 +1,6 @@
|
||||
import python
|
||||
import semmle.python.security.performance.SuperlinearBackTracking
|
||||
|
||||
from PolynomialBackTrackingTerm t
|
||||
where t.getLocation().getFile().getBaseName() = "KnownCVEs.py"
|
||||
select t.getRegex(), t, t.getReason()
|
||||
@@ -0,0 +1,108 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
|
||||
<qhelp>
|
||||
|
||||
<include src="ReDoSIntroduction.inc.qhelp" />
|
||||
|
||||
<example>
|
||||
<p>
|
||||
|
||||
Consider this use of a regular expression, which removes
|
||||
all leading and trailing whitespace in a string:
|
||||
|
||||
</p>
|
||||
|
||||
<sample language="python">
|
||||
re.sub(r"^\s+|\s+$", "", text) # BAD
|
||||
</sample>
|
||||
|
||||
<p>
|
||||
|
||||
The sub-expression <code>"\s+$"</code> will match the
|
||||
whitespace characters in <code>text</code> from left to right, but it
|
||||
can start matching anywhere within a whitespace sequence. This is
|
||||
problematic for strings that do <strong>not</strong> end with a whitespace
|
||||
character. Such a string will force the regular expression engine to
|
||||
process each whitespace sequence once per whitespace character in the
|
||||
sequence.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
This ultimately means that the time cost of trimming a
|
||||
string is quadratic in the length of the string. So a string like
|
||||
<code>"a b"</code> will take milliseconds to process, but a similar
|
||||
string with a million spaces instead of just one will take several
|
||||
minutes.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
Avoid this problem by rewriting the regular expression to
|
||||
not contain the ambiguity about when to start matching whitespace
|
||||
sequences. For instance, by using a negative look-behind
|
||||
(<code>^\s+|(?<!\s)\s+$</code>), or just by using the built-in strip
|
||||
method (<code>text.strip()</code>).
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
Note that the sub-expression <code>"^\s+"</code> is
|
||||
<strong>not</strong> problematic as the <code>^</code> anchor restricts
|
||||
when that sub-expression can start matching, and as the regular
|
||||
expression engine matches from left to right.
|
||||
|
||||
</p>
|
||||
|
||||
</example>
|
||||
|
||||
<example>
|
||||
|
||||
<p>
|
||||
|
||||
As a similar, but slightly subtler problem, consider the
|
||||
regular expression that matches lines with numbers, possibly written
|
||||
using scientific notation:
|
||||
</p>
|
||||
|
||||
<sample language="python">
|
||||
^0\.\d+E?\d+$ # BAD
|
||||
</sample>
|
||||
|
||||
<p>
|
||||
|
||||
The problem with this regular expression is in the
|
||||
sub-expression <code>\d+E?\d+</code> because the second
|
||||
<code>\d+</code> can start matching digits anywhere after the first
|
||||
match of the first <code>\d+</code> if there is no <code>E</code> in
|
||||
the input string.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
This is problematic for strings that do <strong>not</strong>
|
||||
end with a digit. Such a string will force the regular expression
|
||||
engine to process each digit sequence once per digit in the sequence,
|
||||
again leading to a quadratic time complexity.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
To make the processing faster, the regular expression
|
||||
should be rewritten such that the two <code>\d+</code> sub-expressions
|
||||
do not have overlapping matches: <code>^0\.\d+(E\d+)?$</code>.
|
||||
|
||||
</p>
|
||||
|
||||
</example>
|
||||
|
||||
<include src="ReDoSReferences.inc.qhelp"/>
|
||||
|
||||
</qhelp>
|
||||
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @name Polynomial regular expression used on uncontrolled data
|
||||
* @description A regular expression that can require polynomial time
|
||||
* to match may be vulnerable to denial-of-service attacks.
|
||||
* @kind path-problem
|
||||
* @problem.severity warning
|
||||
* @precision high
|
||||
* @id py/polynomial-redos
|
||||
* @tags security
|
||||
* external/cwe/cwe-730
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.performance.SuperlinearBackTracking
|
||||
import semmle.python.security.dataflow.PolynomialReDoS
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
|
||||
PolynomialReDoS::Sink sinkNode, PolynomialBackTrackingTerm regexp
|
||||
where
|
||||
config.hasFlowPath(source, sink) and
|
||||
sinkNode = sink.getNode() and
|
||||
regexp.getRootTerm() = sinkNode.getRegExp()
|
||||
// not (
|
||||
// source.getNode().(Source).getKind() = "url" and
|
||||
// regexp.isAtEndLine()
|
||||
// )
|
||||
select sinkNode.getHighlight(), source, sink,
|
||||
"This $@ that depends on $@ may run slow on strings " + regexp.getPrefixMessage() +
|
||||
"with many repetitions of '" + regexp.getPumpString() + "'.", regexp, "regular expression",
|
||||
source.getNode(), "a user-provided value"
|
||||
34
python/ql/src/experimental/Security/CWE-730/ReDoS.qhelp
Normal file
34
python/ql/src/experimental/Security/CWE-730/ReDoS.qhelp
Normal file
@@ -0,0 +1,34 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
|
||||
<qhelp>
|
||||
|
||||
<include src="ReDoSIntroduction.inc.qhelp" />
|
||||
|
||||
<example>
|
||||
<p>
|
||||
Consider this regular expression:
|
||||
</p>
|
||||
<sample language="python">
|
||||
^_(__|.)+_$
|
||||
</sample>
|
||||
<p>
|
||||
Its sub-expression <code>"(__|.)+?"</code> can match the string <code>"__"</code> either by the
|
||||
first alternative <code>"__"</code> to the left of the <code>"|"</code> operator, or by two
|
||||
repetitions of the second alternative <code>"."</code> to the right. Thus, a string consisting
|
||||
of an odd number of underscores followed by some other character will cause the regular
|
||||
expression engine to run for an exponential amount of time before rejecting the input.
|
||||
</p>
|
||||
<p>
|
||||
This problem can be avoided by rewriting the regular expression to remove the ambiguity between
|
||||
the two branches of the alternative inside the repetition:
|
||||
</p>
|
||||
<sample language="python">
|
||||
^_(__|[^_])+_$
|
||||
</sample>
|
||||
</example>
|
||||
|
||||
<include src="ReDoSReferences.inc.qhelp"/>
|
||||
|
||||
</qhelp>
|
||||
25
python/ql/src/experimental/Security/CWE-730/ReDoS.ql
Normal file
25
python/ql/src/experimental/Security/CWE-730/ReDoS.ql
Normal file
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* @name Inefficient regular expression
|
||||
* @description A regular expression that requires exponential time to match certain inputs
|
||||
* can be a performance bottleneck, and may be vulnerable to denial-of-service
|
||||
* attacks.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @precision high
|
||||
* @id py/redos
|
||||
* @tags security
|
||||
* external/cwe/cwe-730
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.performance.ExponentialBackTracking
|
||||
|
||||
from RegExpTerm t, string pump, State s, string prefixMsg
|
||||
where
|
||||
hasReDoSResult(t, pump, s, prefixMsg) and
|
||||
// exclude verbose mode regexes for now
|
||||
not t.getRegex().getAMode() = "VERBOSE"
|
||||
select t,
|
||||
"This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
|
||||
"containing many repetitions of '" + pump + "'."
|
||||
@@ -0,0 +1,54 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>
|
||||
|
||||
Some regular expressions take a long time to match certain
|
||||
input strings to the point where the time it takes to match a string
|
||||
of length <i>n</i> is proportional to <i>n<sup>k</sup></i> or even
|
||||
<i>2<sup>n</sup></i>. Such regular expressions can negatively affect
|
||||
performance, or even allow a malicious user to perform a Denial of
|
||||
Service ("DoS") attack by crafting an expensive input string for the
|
||||
regular expression to match.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
The regular expression engine provided by Python uses a backtracking non-deterministic finite
|
||||
automata to implement regular expression matching. While this approach
|
||||
is space-efficient and allows supporting advanced features like
|
||||
capture groups, it is not time-efficient in general. The worst-case
|
||||
time complexity of such an automaton can be polynomial or even
|
||||
exponential, meaning that for strings of a certain shape, increasing
|
||||
the input length by ten characters may make the automaton about 1000
|
||||
times slower.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
Typically, a regular expression is affected by this
|
||||
problem if it contains a repetition of the form <code>r*</code> or
|
||||
<code>r+</code> where the sub-expression <code>r</code> is ambiguous
|
||||
in the sense that it can match some string in multiple ways. More
|
||||
information about the precise circumstances can be found in the
|
||||
references.
|
||||
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
|
||||
Modify the regular expression to remove the ambiguity, or
|
||||
ensure that the strings matched with the regular expression are short
|
||||
enough that the time-complexity does not matter.
|
||||
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,16 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<references>
|
||||
<li>
|
||||
OWASP:
|
||||
<a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.
|
||||
</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Time_complexity">Time complexity</a>.</li>
|
||||
<li>James Kirrage, Asiri Rathnayake, Hayo Thielecke:
|
||||
<a href="http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf">Static Analysis for Regular Expression Denial-of-Service Attack</a>.
|
||||
</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
Reference in New Issue
Block a user