diff --git a/python/ql/src/experimental/Security/CWE-730/RegexInjection.qhelp b/python/ql/src/experimental/Security/CWE-730/RegexInjection.qhelp deleted file mode 100644 index f19f0744469..00000000000 --- a/python/ql/src/experimental/Security/CWE-730/RegexInjection.qhelp +++ /dev/null @@ -1,45 +0,0 @@ - - - -

-Constructing a regular expression with unsanitized user input is dangerous as a malicious user may -be able to modify the meaning of the expression. In particular, such a user may be able to provide -a regular expression fragment that takes exponential time in the worst case, and use that to -perform a Denial of Service attack. -

-
- - -

-Before embedding user input into a regular expression, use a sanitization function such as -re.escape to escape meta-characters that have a special meaning regarding -regular expressions' syntax. -

-
- - -

-The following examples are based on a simple Flask web server environment. -

-

-The following example shows a HTTP request parameter that is used to construct a regular expression -without sanitizing it first: -

- -

-Instead, the request parameter should be sanitized first, for example using the function -re.escape. This ensures that the user cannot insert characters which have a -special meaning in regular expressions. -

- -
- - -
  • OWASP: Regular expression Denial of Service - ReDoS.
  • -
  • Wikipedia: ReDoS.
  • -
  • Python docs: re.
  • -
  • SonarSource: RSPEC-2631.
  • -
    -
    diff --git a/python/ql/src/experimental/Security/CWE-730/RegexInjection.ql b/python/ql/src/experimental/Security/CWE-730/RegexInjection.ql deleted file mode 100644 index 7725f636eb0..00000000000 --- a/python/ql/src/experimental/Security/CWE-730/RegexInjection.ql +++ /dev/null @@ -1,29 +0,0 @@ -/** - * @name Regular expression injection - * @description User input should not be used in regular expressions without first being escaped, - * otherwise a malicious user may be able to inject an expression that could require - * exponential time on certain inputs. - * @kind path-problem - * @problem.severity error - * @id py/regex-injection - * @tags security - * external/cwe/cwe-730 - * external/cwe/cwe-400 - */ - -// determine precision above -import python -import experimental.semmle.python.security.injection.RegexInjection -import DataFlow::PathGraph - -from - RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink, - RegexInjectionSink regexInjectionSink, Attribute methodAttribute -where - config.hasFlowPath(source, sink) and - regexInjectionSink = sink.getNode() and - methodAttribute = regexInjectionSink.getRegexMethod() -select sink.getNode(), source, sink, - "$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This", - source.getNode(), "user-provided value", methodAttribute, - regexInjectionSink.getRegexModule() + "." + methodAttribute.getName() diff --git a/python/ql/src/experimental/Security/CWE-730/re_bad.py b/python/ql/src/experimental/Security/CWE-730/re_bad.py deleted file mode 100644 index 3befaba9a01..00000000000 --- a/python/ql/src/experimental/Security/CWE-730/re_bad.py +++ /dev/null @@ -1,15 +0,0 @@ -from flask import request, Flask -import re - - -@app.route("/direct") -def direct(): - unsafe_pattern = request.args["pattern"] - re.search(unsafe_pattern, "") - - -@app.route("/compile") -def compile(): - unsafe_pattern = request.args["pattern"] - compiled_pattern = re.compile(unsafe_pattern) - compiled_pattern.search("") diff --git a/python/ql/src/experimental/Security/CWE-730/re_good.py b/python/ql/src/experimental/Security/CWE-730/re_good.py deleted file mode 100644 index cdc9a7ac158..00000000000 --- a/python/ql/src/experimental/Security/CWE-730/re_good.py +++ /dev/null @@ -1,17 +0,0 @@ -from flask import request, Flask -import re - - -@app.route("/direct") -def direct(): - unsafe_pattern = request.args['pattern'] - safe_pattern = re.escape(unsafe_pattern) - re.search(safe_pattern, "") - - -@app.route("/compile") -def compile(): - unsafe_pattern = request.args['pattern'] - safe_pattern = re.escape(unsafe_pattern) - compiled_pattern = re.compile(safe_pattern) - compiled_pattern.search("") diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index f87caa88497..cff74235f24 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -14,73 +14,6 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import experimental.semmle.python.Frameworks -/** Provides classes for modeling Regular Expression-related APIs. */ -module RegexExecution { - /** - * A data-flow node that executes a regular expression. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `RegexExecution` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Gets the argument containing the executed expression. - */ - abstract DataFlow::Node getRegexNode(); - - /** - * Gets the library used to execute the regular expression. - */ - abstract string getRegexModule(); - } -} - -/** - * A data-flow node that executes a regular expression. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `RegexExecution::Range` instead. - */ -class RegexExecution extends DataFlow::Node { - RegexExecution::Range range; - - RegexExecution() { this = range } - - DataFlow::Node getRegexNode() { result = range.getRegexNode() } - - string getRegexModule() { result = range.getRegexModule() } -} - -/** Provides classes for modeling Regular Expression escape-related APIs. */ -module RegexEscape { - /** - * A data-flow node that escapes a regular expression. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `RegexEscape` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Gets the argument containing the escaped expression. - */ - abstract DataFlow::Node getRegexNode(); - } -} - -/** - * A data-flow node that escapes a regular expression. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `RegexEscape::Range` instead. - */ -class RegexEscape extends DataFlow::Node { - RegexEscape::Range range; - - RegexEscape() { this = range } - - DataFlow::Node getRegexNode() { result = range.getRegexNode() } -} - /** Provides classes for modeling LDAP query execution-related APIs. */ module LDAPQuery { /** diff --git a/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll b/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll index b3b70f43394..420caf0d73b 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll @@ -9,91 +9,3 @@ private import semmle.python.dataflow.new.TaintTracking private import semmle.python.dataflow.new.RemoteFlowSources private import experimental.semmle.python.Concepts private import semmle.python.ApiGraphs - -/** - * Provides models for Python's `re` library. - * - * See https://docs.python.org/3/library/re.html - */ -private module Re { - /** - * List of `re` methods immediately executing an expression. - * - * See https://docs.python.org/3/library/re.html#module-contents - */ - private class RegexExecutionMethods extends string { - RegexExecutionMethods() { - this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"] - } - } - - /** - * A class to find `re` methods immediately executing an expression. - * - * See `RegexExecutionMethods` - */ - private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range { - DataFlow::Node regexNode; - - DirectRegex() { - this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and - regexNode = this.getArg(0) - } - - override DataFlow::Node getRegexNode() { result = regexNode } - - override string getRegexModule() { result = "re" } - } - - /** - * A class to find `re` methods immediately executing a compiled expression by `re.compile`. - * - * Given the following example: - * - * ```py - * pattern = re.compile(input) - * pattern.match(s) - * ``` - * - * This class will identify that `re.compile` compiles `input` and afterwards - * executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)` - * and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument) - * - * - * See `RegexExecutionMethods` - * - * See https://docs.python.org/3/library/re.html#regular-expression-objects - */ - private class CompiledRegex extends DataFlow::MethodCallNode, RegexExecution::Range { - DataFlow::Node regexNode; - - CompiledRegex() { - exists(DataFlow::MethodCallNode patternCall | - patternCall = API::moduleImport("re").getMember("compile").getACall() and - patternCall.flowsTo(this.getObject()) and - this.getMethodName() instanceof RegexExecutionMethods and - regexNode = patternCall.getArg(0) - ) - } - - override DataFlow::Node getRegexNode() { result = regexNode } - - override string getRegexModule() { result = "re" } - } - - /** - * A class to find `re` methods escaping an expression. - * - * See https://docs.python.org/3/library/re.html#re.escape - */ - class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range { - DataFlow::Node regexNode; - - ReEscape() { - this = API::moduleImport("re").getMember("escape").getACall() and - regexNode = this.getArg(0) - } - - override DataFlow::Node getRegexNode() { result = regexNode } - } -} diff --git a/python/ql/src/experimental/semmle/python/security/injection/RegexInjection.qll b/python/ql/src/experimental/semmle/python/security/injection/RegexInjection.qll deleted file mode 100644 index 7b7b08cacab..00000000000 --- a/python/ql/src/experimental/semmle/python/security/injection/RegexInjection.qll +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Provides a taint-tracking configuration for detecting regular expression injection - * vulnerabilities. - */ - -import python -import experimental.semmle.python.Concepts -import semmle.python.dataflow.new.DataFlow -import semmle.python.dataflow.new.TaintTracking -import semmle.python.dataflow.new.RemoteFlowSources - -/** - * A class to find methods executing regular expressions. - * - * See `RegexExecution` - */ -class RegexInjectionSink extends DataFlow::Node { - string regexModule; - Attribute regexMethod; - - RegexInjectionSink() { - exists(RegexExecution reExec | - this = reExec.getRegexNode() and - regexModule = reExec.getRegexModule() and - regexMethod = reExec.(DataFlow::CallCfgNode).getFunction().asExpr().(Attribute) - ) - } - - /** - * Gets the argument containing the executed expression. - */ - string getRegexModule() { result = regexModule } - - /** - * Gets the method used to execute the regular expression. - */ - Attribute getRegexMethod() { result = regexMethod } -} - -/** - * A taint-tracking configuration for detecting regular expression injections. - */ -class RegexInjectionFlowConfig extends TaintTracking::Configuration { - RegexInjectionFlowConfig() { this = "RegexInjectionFlowConfig" } - - override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - - override predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink } - - override predicate isSanitizer(DataFlow::Node sanitizer) { - sanitizer = any(RegexEscape reEscape).getRegexNode() - } -}