mirror of
https://github.com/github/codeql.git
synced 2025-12-18 09:43:15 +01:00
Python: Remove promoted code:
- queries (`py/regex-injection`) - concepts (RegexExecution, RegexEscape) - library models (Stdlib::Re)
This commit is contained in:
@@ -1,45 +0,0 @@
|
|||||||
<!DOCTYPE qhelp PUBLIC
|
|
||||||
"-//Semmle//qhelp//EN"
|
|
||||||
"qhelp.dtd">
|
|
||||||
<qhelp>
|
|
||||||
<overview>
|
|
||||||
<p>
|
|
||||||
Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
|
|
||||||
be able to modify the meaning of the expression. In particular, such a user may be able to provide
|
|
||||||
a regular expression fragment that takes exponential time in the worst case, and use that to
|
|
||||||
perform a Denial of Service attack.
|
|
||||||
</p>
|
|
||||||
</overview>
|
|
||||||
|
|
||||||
<recommendation>
|
|
||||||
<p>
|
|
||||||
Before embedding user input into a regular expression, use a sanitization function such as
|
|
||||||
<code>re.escape</code> to escape meta-characters that have a special meaning regarding
|
|
||||||
regular expressions' syntax.
|
|
||||||
</p>
|
|
||||||
</recommendation>
|
|
||||||
|
|
||||||
<example>
|
|
||||||
<p>
|
|
||||||
The following examples are based on a simple Flask web server environment.
|
|
||||||
</p>
|
|
||||||
<p>
|
|
||||||
The following example shows a HTTP request parameter that is used to construct a regular expression
|
|
||||||
without sanitizing it first:
|
|
||||||
</p>
|
|
||||||
<sample src="re_bad.py" />
|
|
||||||
<p>
|
|
||||||
Instead, the request parameter should be sanitized first, for example using the function
|
|
||||||
<code>re.escape</code>. This ensures that the user cannot insert characters which have a
|
|
||||||
special meaning in regular expressions.
|
|
||||||
</p>
|
|
||||||
<sample src="re_good.py" />
|
|
||||||
</example>
|
|
||||||
|
|
||||||
<references>
|
|
||||||
<li>OWASP: <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.</li>
|
|
||||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
|
|
||||||
<li>Python docs: <a href="https://docs.python.org/3/library/re.html">re</a>.</li>
|
|
||||||
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2631">RSPEC-2631</a>.</li>
|
|
||||||
</references>
|
|
||||||
</qhelp>
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
/**
|
|
||||||
* @name Regular expression injection
|
|
||||||
* @description User input should not be used in regular expressions without first being escaped,
|
|
||||||
* otherwise a malicious user may be able to inject an expression that could require
|
|
||||||
* exponential time on certain inputs.
|
|
||||||
* @kind path-problem
|
|
||||||
* @problem.severity error
|
|
||||||
* @id py/regex-injection
|
|
||||||
* @tags security
|
|
||||||
* external/cwe/cwe-730
|
|
||||||
* external/cwe/cwe-400
|
|
||||||
*/
|
|
||||||
|
|
||||||
// determine precision above
|
|
||||||
import python
|
|
||||||
import experimental.semmle.python.security.injection.RegexInjection
|
|
||||||
import DataFlow::PathGraph
|
|
||||||
|
|
||||||
from
|
|
||||||
RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
|
|
||||||
RegexInjectionSink regexInjectionSink, Attribute methodAttribute
|
|
||||||
where
|
|
||||||
config.hasFlowPath(source, sink) and
|
|
||||||
regexInjectionSink = sink.getNode() and
|
|
||||||
methodAttribute = regexInjectionSink.getRegexMethod()
|
|
||||||
select sink.getNode(), source, sink,
|
|
||||||
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
|
|
||||||
source.getNode(), "user-provided value", methodAttribute,
|
|
||||||
regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
from flask import request, Flask
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/direct")
|
|
||||||
def direct():
|
|
||||||
unsafe_pattern = request.args["pattern"]
|
|
||||||
re.search(unsafe_pattern, "")
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/compile")
|
|
||||||
def compile():
|
|
||||||
unsafe_pattern = request.args["pattern"]
|
|
||||||
compiled_pattern = re.compile(unsafe_pattern)
|
|
||||||
compiled_pattern.search("")
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
from flask import request, Flask
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/direct")
|
|
||||||
def direct():
|
|
||||||
unsafe_pattern = request.args['pattern']
|
|
||||||
safe_pattern = re.escape(unsafe_pattern)
|
|
||||||
re.search(safe_pattern, "")
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/compile")
|
|
||||||
def compile():
|
|
||||||
unsafe_pattern = request.args['pattern']
|
|
||||||
safe_pattern = re.escape(unsafe_pattern)
|
|
||||||
compiled_pattern = re.compile(safe_pattern)
|
|
||||||
compiled_pattern.search("")
|
|
||||||
@@ -14,73 +14,6 @@ private import semmle.python.dataflow.new.RemoteFlowSources
|
|||||||
private import semmle.python.dataflow.new.TaintTracking
|
private import semmle.python.dataflow.new.TaintTracking
|
||||||
private import experimental.semmle.python.Frameworks
|
private import experimental.semmle.python.Frameworks
|
||||||
|
|
||||||
/** Provides classes for modeling Regular Expression-related APIs. */
|
|
||||||
module RegexExecution {
|
|
||||||
/**
|
|
||||||
* A data-flow node that executes a regular expression.
|
|
||||||
*
|
|
||||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
|
||||||
* extend `RegexExecution` instead.
|
|
||||||
*/
|
|
||||||
abstract class Range extends DataFlow::Node {
|
|
||||||
/**
|
|
||||||
* Gets the argument containing the executed expression.
|
|
||||||
*/
|
|
||||||
abstract DataFlow::Node getRegexNode();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the library used to execute the regular expression.
|
|
||||||
*/
|
|
||||||
abstract string getRegexModule();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A data-flow node that executes a regular expression.
|
|
||||||
*
|
|
||||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
|
||||||
* extend `RegexExecution::Range` instead.
|
|
||||||
*/
|
|
||||||
class RegexExecution extends DataFlow::Node {
|
|
||||||
RegexExecution::Range range;
|
|
||||||
|
|
||||||
RegexExecution() { this = range }
|
|
||||||
|
|
||||||
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
|
|
||||||
|
|
||||||
string getRegexModule() { result = range.getRegexModule() }
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Provides classes for modeling Regular Expression escape-related APIs. */
|
|
||||||
module RegexEscape {
|
|
||||||
/**
|
|
||||||
* A data-flow node that escapes a regular expression.
|
|
||||||
*
|
|
||||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
|
||||||
* extend `RegexEscape` instead.
|
|
||||||
*/
|
|
||||||
abstract class Range extends DataFlow::Node {
|
|
||||||
/**
|
|
||||||
* Gets the argument containing the escaped expression.
|
|
||||||
*/
|
|
||||||
abstract DataFlow::Node getRegexNode();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A data-flow node that escapes a regular expression.
|
|
||||||
*
|
|
||||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
|
||||||
* extend `RegexEscape::Range` instead.
|
|
||||||
*/
|
|
||||||
class RegexEscape extends DataFlow::Node {
|
|
||||||
RegexEscape::Range range;
|
|
||||||
|
|
||||||
RegexEscape() { this = range }
|
|
||||||
|
|
||||||
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Provides classes for modeling LDAP query execution-related APIs. */
|
/** Provides classes for modeling LDAP query execution-related APIs. */
|
||||||
module LDAPQuery {
|
module LDAPQuery {
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -9,91 +9,3 @@ private import semmle.python.dataflow.new.TaintTracking
|
|||||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||||
private import experimental.semmle.python.Concepts
|
private import experimental.semmle.python.Concepts
|
||||||
private import semmle.python.ApiGraphs
|
private import semmle.python.ApiGraphs
|
||||||
|
|
||||||
/**
|
|
||||||
* Provides models for Python's `re` library.
|
|
||||||
*
|
|
||||||
* See https://docs.python.org/3/library/re.html
|
|
||||||
*/
|
|
||||||
private module Re {
|
|
||||||
/**
|
|
||||||
* List of `re` methods immediately executing an expression.
|
|
||||||
*
|
|
||||||
* See https://docs.python.org/3/library/re.html#module-contents
|
|
||||||
*/
|
|
||||||
private class RegexExecutionMethods extends string {
|
|
||||||
RegexExecutionMethods() {
|
|
||||||
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A class to find `re` methods immediately executing an expression.
|
|
||||||
*
|
|
||||||
* See `RegexExecutionMethods`
|
|
||||||
*/
|
|
||||||
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
|
|
||||||
DataFlow::Node regexNode;
|
|
||||||
|
|
||||||
DirectRegex() {
|
|
||||||
this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and
|
|
||||||
regexNode = this.getArg(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
|
||||||
|
|
||||||
override string getRegexModule() { result = "re" }
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
|
|
||||||
*
|
|
||||||
* Given the following example:
|
|
||||||
*
|
|
||||||
* ```py
|
|
||||||
* pattern = re.compile(input)
|
|
||||||
* pattern.match(s)
|
|
||||||
* ```
|
|
||||||
*
|
|
||||||
* This class will identify that `re.compile` compiles `input` and afterwards
|
|
||||||
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
|
|
||||||
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* See `RegexExecutionMethods`
|
|
||||||
*
|
|
||||||
* See https://docs.python.org/3/library/re.html#regular-expression-objects
|
|
||||||
*/
|
|
||||||
private class CompiledRegex extends DataFlow::MethodCallNode, RegexExecution::Range {
|
|
||||||
DataFlow::Node regexNode;
|
|
||||||
|
|
||||||
CompiledRegex() {
|
|
||||||
exists(DataFlow::MethodCallNode patternCall |
|
|
||||||
patternCall = API::moduleImport("re").getMember("compile").getACall() and
|
|
||||||
patternCall.flowsTo(this.getObject()) and
|
|
||||||
this.getMethodName() instanceof RegexExecutionMethods and
|
|
||||||
regexNode = patternCall.getArg(0)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
|
||||||
|
|
||||||
override string getRegexModule() { result = "re" }
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A class to find `re` methods escaping an expression.
|
|
||||||
*
|
|
||||||
* See https://docs.python.org/3/library/re.html#re.escape
|
|
||||||
*/
|
|
||||||
class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range {
|
|
||||||
DataFlow::Node regexNode;
|
|
||||||
|
|
||||||
ReEscape() {
|
|
||||||
this = API::moduleImport("re").getMember("escape").getACall() and
|
|
||||||
regexNode = this.getArg(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,53 +0,0 @@
|
|||||||
/**
|
|
||||||
* Provides a taint-tracking configuration for detecting regular expression injection
|
|
||||||
* vulnerabilities.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import experimental.semmle.python.Concepts
|
|
||||||
import semmle.python.dataflow.new.DataFlow
|
|
||||||
import semmle.python.dataflow.new.TaintTracking
|
|
||||||
import semmle.python.dataflow.new.RemoteFlowSources
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A class to find methods executing regular expressions.
|
|
||||||
*
|
|
||||||
* See `RegexExecution`
|
|
||||||
*/
|
|
||||||
class RegexInjectionSink extends DataFlow::Node {
|
|
||||||
string regexModule;
|
|
||||||
Attribute regexMethod;
|
|
||||||
|
|
||||||
RegexInjectionSink() {
|
|
||||||
exists(RegexExecution reExec |
|
|
||||||
this = reExec.getRegexNode() and
|
|
||||||
regexModule = reExec.getRegexModule() and
|
|
||||||
regexMethod = reExec.(DataFlow::CallCfgNode).getFunction().asExpr().(Attribute)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the argument containing the executed expression.
|
|
||||||
*/
|
|
||||||
string getRegexModule() { result = regexModule }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the method used to execute the regular expression.
|
|
||||||
*/
|
|
||||||
Attribute getRegexMethod() { result = regexMethod }
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A taint-tracking configuration for detecting regular expression injections.
|
|
||||||
*/
|
|
||||||
class RegexInjectionFlowConfig extends TaintTracking::Configuration {
|
|
||||||
RegexInjectionFlowConfig() { this = "RegexInjectionFlowConfig" }
|
|
||||||
|
|
||||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
|
||||||
|
|
||||||
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
|
|
||||||
|
|
||||||
override predicate isSanitizer(DataFlow::Node sanitizer) {
|
|
||||||
sanitizer = any(RegexEscape reEscape).getRegexNode()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user