mirror of
https://github.com/github/codeql.git
synced 2025-12-17 17:23:36 +01:00
Python: Remove promoted code:
- queries (`py/regex-injection`) - concepts (RegexExecution, RegexEscape) - library models (Stdlib::Re)
This commit is contained in:
@@ -1,45 +0,0 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>
|
||||
Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
|
||||
be able to modify the meaning of the expression. In particular, such a user may be able to provide
|
||||
a regular expression fragment that takes exponential time in the worst case, and use that to
|
||||
perform a Denial of Service attack.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
Before embedding user input into a regular expression, use a sanitization function such as
|
||||
<code>re.escape</code> to escape meta-characters that have a special meaning regarding
|
||||
regular expressions' syntax.
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
The following examples are based on a simple Flask web server environment.
|
||||
</p>
|
||||
<p>
|
||||
The following example shows a HTTP request parameter that is used to construct a regular expression
|
||||
without sanitizing it first:
|
||||
</p>
|
||||
<sample src="re_bad.py" />
|
||||
<p>
|
||||
Instead, the request parameter should be sanitized first, for example using the function
|
||||
<code>re.escape</code>. This ensures that the user cannot insert characters which have a
|
||||
special meaning in regular expressions.
|
||||
</p>
|
||||
<sample src="re_good.py" />
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
|
||||
<li>Python docs: <a href="https://docs.python.org/3/library/re.html">re</a>.</li>
|
||||
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2631">RSPEC-2631</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -1,29 +0,0 @@
|
||||
/**
|
||||
* @name Regular expression injection
|
||||
* @description User input should not be used in regular expressions without first being escaped,
|
||||
* otherwise a malicious user may be able to inject an expression that could require
|
||||
* exponential time on certain inputs.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/regex-injection
|
||||
* @tags security
|
||||
* external/cwe/cwe-730
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
// determine precision above
|
||||
import python
|
||||
import experimental.semmle.python.security.injection.RegexInjection
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
|
||||
RegexInjectionSink regexInjectionSink, Attribute methodAttribute
|
||||
where
|
||||
config.hasFlowPath(source, sink) and
|
||||
regexInjectionSink = sink.getNode() and
|
||||
methodAttribute = regexInjectionSink.getRegexMethod()
|
||||
select sink.getNode(), source, sink,
|
||||
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
|
||||
source.getNode(), "user-provided value", methodAttribute,
|
||||
regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()
|
||||
@@ -1,15 +0,0 @@
|
||||
from flask import request, Flask
|
||||
import re
|
||||
|
||||
|
||||
@app.route("/direct")
|
||||
def direct():
|
||||
unsafe_pattern = request.args["pattern"]
|
||||
re.search(unsafe_pattern, "")
|
||||
|
||||
|
||||
@app.route("/compile")
|
||||
def compile():
|
||||
unsafe_pattern = request.args["pattern"]
|
||||
compiled_pattern = re.compile(unsafe_pattern)
|
||||
compiled_pattern.search("")
|
||||
@@ -1,17 +0,0 @@
|
||||
from flask import request, Flask
|
||||
import re
|
||||
|
||||
|
||||
@app.route("/direct")
|
||||
def direct():
|
||||
unsafe_pattern = request.args['pattern']
|
||||
safe_pattern = re.escape(unsafe_pattern)
|
||||
re.search(safe_pattern, "")
|
||||
|
||||
|
||||
@app.route("/compile")
|
||||
def compile():
|
||||
unsafe_pattern = request.args['pattern']
|
||||
safe_pattern = re.escape(unsafe_pattern)
|
||||
compiled_pattern = re.compile(safe_pattern)
|
||||
compiled_pattern.search("")
|
||||
@@ -14,73 +14,6 @@ private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import experimental.semmle.python.Frameworks
|
||||
|
||||
/** Provides classes for modeling Regular Expression-related APIs. */
|
||||
module RegexExecution {
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RegexExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the executed expression.
|
||||
*/
|
||||
abstract DataFlow::Node getRegexNode();
|
||||
|
||||
/**
|
||||
* Gets the library used to execute the regular expression.
|
||||
*/
|
||||
abstract string getRegexModule();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegexExecution::Range` instead.
|
||||
*/
|
||||
class RegexExecution extends DataFlow::Node {
|
||||
RegexExecution::Range range;
|
||||
|
||||
RegexExecution() { this = range }
|
||||
|
||||
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
|
||||
|
||||
string getRegexModule() { result = range.getRegexModule() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling Regular Expression escape-related APIs. */
|
||||
module RegexEscape {
|
||||
/**
|
||||
* A data-flow node that escapes a regular expression.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RegexEscape` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the escaped expression.
|
||||
*/
|
||||
abstract DataFlow::Node getRegexNode();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that escapes a regular expression.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegexEscape::Range` instead.
|
||||
*/
|
||||
class RegexEscape extends DataFlow::Node {
|
||||
RegexEscape::Range range;
|
||||
|
||||
RegexEscape() { this = range }
|
||||
|
||||
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling LDAP query execution-related APIs. */
|
||||
module LDAPQuery {
|
||||
/**
|
||||
|
||||
@@ -9,91 +9,3 @@ private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for Python's `re` library.
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html
|
||||
*/
|
||||
private module Re {
|
||||
/**
|
||||
* List of `re` methods immediately executing an expression.
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#module-contents
|
||||
*/
|
||||
private class RegexExecutionMethods extends string {
|
||||
RegexExecutionMethods() {
|
||||
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods immediately executing an expression.
|
||||
*
|
||||
* See `RegexExecutionMethods`
|
||||
*/
|
||||
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
DirectRegex() {
|
||||
this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and
|
||||
regexNode = this.getArg(0)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
|
||||
override string getRegexModule() { result = "re" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
|
||||
*
|
||||
* Given the following example:
|
||||
*
|
||||
* ```py
|
||||
* pattern = re.compile(input)
|
||||
* pattern.match(s)
|
||||
* ```
|
||||
*
|
||||
* This class will identify that `re.compile` compiles `input` and afterwards
|
||||
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
|
||||
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
|
||||
*
|
||||
*
|
||||
* See `RegexExecutionMethods`
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#regular-expression-objects
|
||||
*/
|
||||
private class CompiledRegex extends DataFlow::MethodCallNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
CompiledRegex() {
|
||||
exists(DataFlow::MethodCallNode patternCall |
|
||||
patternCall = API::moduleImport("re").getMember("compile").getACall() and
|
||||
patternCall.flowsTo(this.getObject()) and
|
||||
this.getMethodName() instanceof RegexExecutionMethods and
|
||||
regexNode = patternCall.getArg(0)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
|
||||
override string getRegexModule() { result = "re" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods escaping an expression.
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#re.escape
|
||||
*/
|
||||
class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
ReEscape() {
|
||||
this = API::moduleImport("re").getMember("escape").getACall() and
|
||||
regexNode = this.getArg(0)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting regular expression injection
|
||||
* vulnerabilities.
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
|
||||
/**
|
||||
* A class to find methods executing regular expressions.
|
||||
*
|
||||
* See `RegexExecution`
|
||||
*/
|
||||
class RegexInjectionSink extends DataFlow::Node {
|
||||
string regexModule;
|
||||
Attribute regexMethod;
|
||||
|
||||
RegexInjectionSink() {
|
||||
exists(RegexExecution reExec |
|
||||
this = reExec.getRegexNode() and
|
||||
regexModule = reExec.getRegexModule() and
|
||||
regexMethod = reExec.(DataFlow::CallCfgNode).getFunction().asExpr().(Attribute)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the argument containing the executed expression.
|
||||
*/
|
||||
string getRegexModule() { result = regexModule }
|
||||
|
||||
/**
|
||||
* Gets the method used to execute the regular expression.
|
||||
*/
|
||||
Attribute getRegexMethod() { result = regexMethod }
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting regular expression injections.
|
||||
*/
|
||||
class RegexInjectionFlowConfig extends TaintTracking::Configuration {
|
||||
RegexInjectionFlowConfig() { this = "RegexInjectionFlowConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) {
|
||||
sanitizer = any(RegexEscape reEscape).getRegexNode()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user