Python: Remove promoted code:

- queries (`py/regex-injection`)
- concepts (RegexExecution, RegexEscape)
- library models (Stdlib::Re)
This commit is contained in:
Rasmus Lerchedahl Petersen
2021-09-14 13:14:16 +02:00
parent abbd1d1dc5
commit 36e27f2aa4
7 changed files with 0 additions and 314 deletions

View File

@@ -1,45 +0,0 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
be able to modify the meaning of the expression. In particular, such a user may be able to provide
a regular expression fragment that takes exponential time in the worst case, and use that to
perform a Denial of Service attack.
</p>
</overview>
<recommendation>
<p>
Before embedding user input into a regular expression, use a sanitization function such as
<code>re.escape</code> to escape meta-characters that have a special meaning regarding
regular expressions' syntax.
</p>
</recommendation>
<example>
<p>
The following examples are based on a simple Flask web server environment.
</p>
<p>
The following example shows a HTTP request parameter that is used to construct a regular expression
without sanitizing it first:
</p>
<sample src="re_bad.py" />
<p>
Instead, the request parameter should be sanitized first, for example using the function
<code>re.escape</code>. This ensures that the user cannot insert characters which have a
special meaning in regular expressions.
</p>
<sample src="re_good.py" />
</example>
<references>
<li>OWASP: <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.</li>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
<li>Python docs: <a href="https://docs.python.org/3/library/re.html">re</a>.</li>
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2631">RSPEC-2631</a>.</li>
</references>
</qhelp>

View File

@@ -1,29 +0,0 @@
/**
* @name Regular expression injection
* @description User input should not be used in regular expressions without first being escaped,
* otherwise a malicious user may be able to inject an expression that could require
* exponential time on certain inputs.
* @kind path-problem
* @problem.severity error
* @id py/regex-injection
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
// determine precision above
import python
import experimental.semmle.python.security.injection.RegexInjection
import DataFlow::PathGraph
from
RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
RegexInjectionSink regexInjectionSink, Attribute methodAttribute
where
config.hasFlowPath(source, sink) and
regexInjectionSink = sink.getNode() and
methodAttribute = regexInjectionSink.getRegexMethod()
select sink.getNode(), source, sink,
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
source.getNode(), "user-provided value", methodAttribute,
regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()

View File

@@ -1,15 +0,0 @@
from flask import request, Flask
import re
@app.route("/direct")
def direct():
unsafe_pattern = request.args["pattern"]
re.search(unsafe_pattern, "")
@app.route("/compile")
def compile():
unsafe_pattern = request.args["pattern"]
compiled_pattern = re.compile(unsafe_pattern)
compiled_pattern.search("")

View File

@@ -1,17 +0,0 @@
from flask import request, Flask
import re
@app.route("/direct")
def direct():
unsafe_pattern = request.args['pattern']
safe_pattern = re.escape(unsafe_pattern)
re.search(safe_pattern, "")
@app.route("/compile")
def compile():
unsafe_pattern = request.args['pattern']
safe_pattern = re.escape(unsafe_pattern)
compiled_pattern = re.compile(safe_pattern)
compiled_pattern.search("")

View File

@@ -14,73 +14,6 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/** Provides classes for modeling Regular Expression-related APIs. */
module RegexExecution {
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the executed expression.
*/
abstract DataFlow::Node getRegexNode();
/**
* Gets the library used to execute the regular expression.
*/
abstract string getRegexModule();
}
}
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
RegexExecution() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
string getRegexModule() { result = range.getRegexModule() }
}
/** Provides classes for modeling Regular Expression escape-related APIs. */
module RegexEscape {
/**
* A data-flow node that escapes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexEscape` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the escaped expression.
*/
abstract DataFlow::Node getRegexNode();
}
}
/**
* A data-flow node that escapes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexEscape::Range` instead.
*/
class RegexEscape extends DataFlow::Node {
RegexEscape::Range range;
RegexEscape() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
}
/** Provides classes for modeling LDAP query execution-related APIs. */
module LDAPQuery {
/**

View File

@@ -9,91 +9,3 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for Python's `re` library.
*
* See https://docs.python.org/3/library/re.html
*/
private module Re {
/**
* List of `re` methods immediately executing an expression.
*
* See https://docs.python.org/3/library/re.html#module-contents
*/
private class RegexExecutionMethods extends string {
RegexExecutionMethods() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
}
/**
* A class to find `re` methods immediately executing an expression.
*
* See `RegexExecutionMethods`
*/
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
DataFlow::Node regexNode;
DirectRegex() {
this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and
regexNode = this.getArg(0)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override string getRegexModule() { result = "re" }
}
/**
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegex extends DataFlow::MethodCallNode, RegexExecution::Range {
DataFlow::Node regexNode;
CompiledRegex() {
exists(DataFlow::MethodCallNode patternCall |
patternCall = API::moduleImport("re").getMember("compile").getACall() and
patternCall.flowsTo(this.getObject()) and
this.getMethodName() instanceof RegexExecutionMethods and
regexNode = patternCall.getArg(0)
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override string getRegexModule() { result = "re" }
}
/**
* A class to find `re` methods escaping an expression.
*
* See https://docs.python.org/3/library/re.html#re.escape
*/
class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range {
DataFlow::Node regexNode;
ReEscape() {
this = API::moduleImport("re").getMember("escape").getACall() and
regexNode = this.getArg(0)
}
override DataFlow::Node getRegexNode() { result = regexNode }
}
}

View File

@@ -1,53 +0,0 @@
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*/
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* A class to find methods executing regular expressions.
*
* See `RegexExecution`
*/
class RegexInjectionSink extends DataFlow::Node {
string regexModule;
Attribute regexMethod;
RegexInjectionSink() {
exists(RegexExecution reExec |
this = reExec.getRegexNode() and
regexModule = reExec.getRegexModule() and
regexMethod = reExec.(DataFlow::CallCfgNode).getFunction().asExpr().(Attribute)
)
}
/**
* Gets the argument containing the executed expression.
*/
string getRegexModule() { result = regexModule }
/**
* Gets the method used to execute the regular expression.
*/
Attribute getRegexMethod() { result = regexMethod }
}
/**
* A taint-tracking configuration for detecting regular expression injections.
*/
class RegexInjectionFlowConfig extends TaintTracking::Configuration {
RegexInjectionFlowConfig() { this = "RegexInjectionFlowConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(RegexEscape reEscape).getRegexNode()
}
}