Merge remote-tracking branch 'upstream/main' into post-release-prep/codeql-cli-2.13.3

This commit is contained in:
Arthur Baars
2023-05-30 21:27:53 +02:00
360 changed files with 40677 additions and 18302 deletions

View File

@@ -15,8 +15,7 @@
</p>
<sample language="python">
re.sub(r"^\s+|\s+$", "", text) # BAD
</sample>
re.sub(r"^\s+|\s+$", "", text) # BAD</sample>
<p>
@@ -71,8 +70,7 @@
</p>
<sample language="python">
^0\.\d+E?\d+$ # BAD
</sample>
^0\.\d+E?\d+$ # BAD</sample>
<p>
@@ -103,6 +101,32 @@
</example>
<example>
<p>
Sometimes it is unclear how a regular expression can be rewritten to
avoid the problem. In such cases, it often suffices to limit the
length of the input string. For instance, the following
regular expression is used to match numbers, and on some non-number
inputs it can have quadratic time complexity:
</p>
<sample language="python">
match = re.search(r'^(\+|-)?(\d+|(\d*\.\d*))?(E|e)?([-+])?(\d+)?$', str) </sample>
<p>
It is not immediately obvious how to rewrite this regular expression
to avoid the problem. However, you can mitigate performance issues by limiting the length
to 1000 characters, which will always finish in a reasonable amount
of time.
</p>
<sample language="python">
if len(str) &gt; 1000:
raise ValueError("Input too long")
match = re.search(r'^(\+|-)?(\d+|(\d*\.\d*))?(E|e)?([-+])?(\d+)?$', str) </sample>
</example>
<include src="ReDoSReferences.inc.qhelp"/>
</qhelp>

View File

@@ -0,0 +1,32 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<p>Security checks bypass due to a Unicode transformation</p>
<p> If security checks or logical validation is performed before unicode normalization, the
security checks or logical validation could be bypassed due to a potential Unicode
character collision. The validation we consider are: any character escaping, any regex
validation, or any string manipulation (such as <code>str.split</code>). </p>
</overview>
<recommendation>
<p> Perform Unicode normalization before the logical validation. </p>
</recommendation>
<example>
<p> The following example showcases the bypass of all checks performed by <code>
flask.escape()</code> due to a post-unicode normalization.</p>
<p>For instance: the character U+FE64 (<code>﹤</code>) is not filtered-out by the flask
escape function. But due to the Unicode normalization, the character is transformed and
would become U+003C (<code> &lt; </code> ).</p>
<sample src="escape-bypass.py" />
</example>
<references>
<li> Research study: <a
href="https://gosecure.github.io/presentations/2021-02-unicode-owasp-toronto/philippe_arteau_owasp_unicode_v4.pdf">
Unicode vulnerabilities that could bYte you
</a> and <a
href="https://gosecure.github.io/unicode-pentester-cheatsheet/">Unicode pentest
cheatsheet</a>. </li>
</references>
</qhelp>

View File

@@ -0,0 +1,24 @@
/**
* @name Bypass Logical Validation Using Unicode Characters
* @description A Unicode transformation is using a remote user-controlled data. The transformation is a Unicode normalization using the algorithms "NFC" or "NFKC". In all cases, the security measures implemented or the logical validation performed to escape any injection characters, to validate using regex patterns or to perform string-based checks, before the Unicode transformation are **bypassable** by special Unicode characters.
* @kind path-problem
* @id py/unicode-bypass-validation
* @precision high
* @problem.severity error
* @tags security
* experimental
* external/cwe/cwe-176
* external/cwe/cwe-179
* external/cwe/cwe-180
*/
import python
import UnicodeBypassValidationQuery
import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters.",
sink.getNode(), "Unicode transformation (Unicode normalization)", source.getNode(),
"remote user-controlled data"

View File

@@ -0,0 +1,30 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "Unicode transformation"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
/**
* Provides default sources, sinks and sanitizers for detecting
* "Unicode transformation"
* vulnerabilities, as well as extension points for adding your own.
*/
module UnicodeBypassValidation {
/**
* A data flow source for "Unicode transformation" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "Unicode transformation" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "Unicode transformation" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
}

View File

@@ -0,0 +1,73 @@
/**
* Provides a taint-tracking configuration for detecting "Unicode transformation mishandling" vulnerabilities.
*/
private import python
import semmle.python.ApiGraphs
import semmle.python.Concepts
import semmle.python.dataflow.new.internal.DataFlowPublic
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.internal.TaintTrackingPrivate
import semmle.python.dataflow.new.RemoteFlowSources
import UnicodeBypassValidationCustomizations::UnicodeBypassValidation
/** A state signifying that a logical validation has not been performed. */
class PreValidation extends DataFlow::FlowState {
PreValidation() { this = "PreValidation" }
}
/** A state signifying that a logical validation has been performed. */
class PostValidation extends DataFlow::FlowState {
PostValidation() { this = "PostValidation" }
}
/**
* A taint-tracking configuration for detecting "Unicode transformation mishandling" vulnerabilities.
*
* This configuration uses two flow states, `PreValidation` and `PostValidation`,
* to track the requirement that a logical validation has been performed before the Unicode Transformation.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "UnicodeBypassValidation" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
source instanceof RemoteFlowSource and state instanceof PreValidation
}
override predicate isAdditionalTaintStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
(
exists(Escaping escaping | nodeFrom = escaping.getAnInput() and nodeTo = escaping.getOutput())
or
exists(RegexExecution re | nodeFrom = re.getString() and nodeTo = re)
or
stringManipulation(nodeFrom, nodeTo) and
not nodeTo.(DataFlow::MethodCallNode).getMethodName() in ["encode", "decode"]
) and
stateFrom instanceof PreValidation and
stateTo instanceof PostValidation
}
/* A Unicode Tranformation (Unicode tranformation) is considered a sink when the algorithm used is either NFC or NFKC. */
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
exists(API::CallNode cn |
cn = API::moduleImport("unicodedata").getMember("normalize").getACall() and
sink = cn.getArg(1)
or
cn = API::moduleImport("unidecode").getMember("unidecode").getACall() and
sink = cn.getArg(0)
or
cn = API::moduleImport("pyunormalize").getMember(["NFC", "NFD", "NFKC", "NFKD"]).getACall() and
sink = cn.getArg(0)
or
cn = API::moduleImport("pyunormalize").getMember("normalize").getACall() and
sink = cn.getArg(1)
or
cn = API::moduleImport("textnorm").getMember("normalize_unicode").getACall() and
sink = cn.getArg(0)
) and
state instanceof PostValidation
}
}

View File

@@ -0,0 +1,11 @@
import unicodedata
from flask import Flask, request, escape, render_template
app = Flask(__name__)
@app.route("/unsafe1")
def unsafe1():
user_input = escape(request.args.get("ui"))
normalized_user_input = unicodedata.normalize("NFKC", user_input)
return render_template("result.html", normalized_user_input=normalized_user_input)

View File

@@ -0,0 +1,24 @@
/**
* @name Summarized callable call sites
* @description A call site for which we have a summarized callable
* @kind problem
* @problem.severity recommendation
* @id py/meta/summarized-callable-call-sites
* @tags meta
* @precision very-low
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.FlowSummary
import meta.MetaMetrics
from DataFlow::Node useSite, SummarizedCallable target, string kind
where
(
useSite = target.getACall() and kind = "Call"
or
useSite = target.getACallback() and kind = "Callback"
) and
not useSite.getLocation().getFile() instanceof IgnoredFile
select useSite, kind + " to " + target