Python: Port regex concepts and adapt PolyRedos

This commit is contained in:
Rasmus Lerchedahl Petersen
2021-09-13 16:50:00 +02:00
parent 68ed3250e8
commit 8f152a5bfb
3 changed files with 199 additions and 135 deletions

View File

@@ -355,6 +355,41 @@ module SqlExecution {
}
}
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
RegexExecution() { this = range }
/** Gets the data flow node for the regex being compiled by this node. */
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
/** Gets a dataflow node for the string to be searched or matched against. */
DataFlow::Node getString() { result = range.getString() }
}
/** Provides classes for modeling new regular-expression execution APIs. */
module RegexExecution {
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the data flow node for the regex being compiled by this node. */
abstract DataFlow::Node getRegexNode();
/** Gets a dataflow node for the string to be searched or matched against. */
abstract DataFlow::Node getString();
}
}
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
@@ -411,6 +446,9 @@ module Escaping {
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getHtmlKind() { result = "html" }
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getRegexKind() { result = "regex" }
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
//
// Technically it claims to escape for both HTML and XML, but for now we don't have
@@ -427,6 +465,14 @@ class HtmlEscaping extends Escaping {
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
}
/**
* An escape of a string so it can be safely included in
* the body of a regex.
*/
class RegexEscaping extends Escaping {
RegexEscaping() { range.getKind() = Escaping::getRegexKind() }
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
import semmle.python.web.HttpConstants

View File

@@ -1497,6 +1497,158 @@ private module StdlibPrivate {
}
}
// ---------------------------------------------------------------------------
// re
// ---------------------------------------------------------------------------
/**
* List of methods in the `re` module immediately executing a regular expression.
*
* See https://docs.python.org/3/library/re.html#module-contents
*/
private class RegexExecutionMethod extends string {
RegexExecutionMethod() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
}
/** Gets the index of the argument representing the string to be searched by a regex. */
int stringArg(RegexExecutionMethod method) {
method in ["match", "fullmatch", "search", "split", "findall", "finditer"] and
result = 1
or
method in ["sub", "subn"] and
result = 2
}
/**
* A a call to a method from the `re` module immediately executing a regular expression.
*
* See `RegexExecutionMethods`
*/
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
RegexExecutionMethod method;
DirectRegex() { this = API::moduleImport("re").getMember(method).getACall() }
override DataFlow::Node getRegexNode() {
result in [this.getArg(0), this.getArgByName("pattern")]
}
override DataFlow::Node getString() {
result in [this.getArg(stringArg(method)), this.getArgByName("string")]
}
}
/** Helper module for tracking compiled regexes. */
private module CompiledRegexes {
private import semmle.python.dataflow.new.DataFlow2
private import semmle.python.RegexTreeView
// TODO: This module should be refactored once API graphs are more expressinve.
/** A configuration for finding uses of compiled regexes. */
class RegexDefinitionConfiguration extends DataFlow2::Configuration {
RegexDefinitionConfiguration() { this = "RegexDefinitionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RegexDefinitonSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexDefinitionSink }
}
/** A regex compilation. */
class RegexDefinitonSource extends DataFlow::CallCfgNode {
DataFlow::Node regexNode;
RegexDefinitonSource() {
this = API::moduleImport("re").getMember("compile").getACall() and
regexNode in [this.getArg(0), this.getArgByName("pattern")]
}
/** Gets the data flow node for the regex being compiled by this node. */
DataFlow::Node getRegexNode() { result = regexNode }
}
/** A use of a compiled regex. */
class RegexDefinitionSink extends DataFlow::Node {
RegexExecutionMethod method;
DataFlow::CallCfgNode executingCall;
RegexDefinitionSink() {
executingCall =
API::moduleImport("re").getMember("compile").getReturn().getMember(method).getACall() and
this = executingCall.getFunction().(DataFlow::AttrRead).getObject()
}
/** Gets the method used to execute the regex. */
RegexExecutionMethod getMethod() { result = method }
/** Gets the data flow node for the executing call. */
DataFlow::CallCfgNode getExecutingCall() { result = executingCall }
}
}
private import CompiledRegexes
/**
* A call on compiled regular expression (obtained via `re.compile`) executing a
* regular expression.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument).
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution {
DataFlow::Node regexNode;
RegexExecutionMethod method;
CompiledRegex() {
exists(
RegexDefinitionConfiguration conf, RegexDefinitonSource source, RegexDefinitionSink sink
|
conf.hasFlow(source, sink) and
regexNode = source.getRegexNode() and
method = sink.getMethod() and
this = sink.getExecutingCall()
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override DataFlow::Node getString() {
result in [this.getArg(stringArg(method) - 1), this.getArgByName("string")]
}
}
/**
* A call to 're.escape'.
* See https://docs.python.org/3/library/re.html#re.escape
*/
private class ReEscapeCall extends Escaping::Range, DataFlow::CallCfgNode {
DataFlow::Node regexNode;
ReEscapeCall() {
this = API::moduleImport("re").getMember("escape").getACall() and
regexNode in [this.getArg(0), this.getArgByName("pattern")]
}
override DataFlow::Node getAnInput() { result = regexNode }
override DataFlow::Node getOutput() { result = this }
override string getKind() { result = Escaping::getRegexKind() }
}
// ---------------------------------------------------------------------------
// OTHER
// ---------------------------------------------------------------------------

View File

@@ -60,7 +60,7 @@ module PolynomialReDoS {
RegExpTerm t;
RegexExecutionAsSink() {
exists(CompiledRegexes::RegexExecution re |
exists(RegexExecution re |
re.getRegexNode().asExpr() = t.getRegex() and
this = re.getString()
) and
@@ -76,137 +76,3 @@ module PolynomialReDoS {
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}
/** Helper module for tracking compiled regexes. */
private module CompiledRegexes {
// TODO: This module should be refactored and merged with the experimental work done on detecting
// regex injections, such that this can be expressed from just using a concept.
/** A configuration for finding uses of compiled regexes. */
class RegexDefinitionConfiguration extends DataFlow2::Configuration {
RegexDefinitionConfiguration() { this = "RegexDefinitionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RegexDefinitonSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexDefinitionSink }
}
/** A regex compilation. */
class RegexDefinitonSource extends DataFlow::CallCfgNode {
DataFlow::Node regexNode;
RegexDefinitonSource() {
this = API::moduleImport("re").getMember("compile").getACall() and
regexNode in [this.getArg(0), this.getArgByName("pattern")]
}
/** Gets the regex that is being compiled by this node. */
RegExpTerm getRegExp() { result.getRegex() = regexNode.asExpr() and result.isRootTerm() }
/** Gets the data flow node for the regex being compiled by this node. */
DataFlow::Node getRegexNode() { result = regexNode }
}
/** A use of a compiled regex. */
class RegexDefinitionSink extends DataFlow::Node {
RegexExecutionMethod method;
DataFlow::CallCfgNode executingCall;
RegexDefinitionSink() {
exists(DataFlow::AttrRead reMethod |
executingCall.getFunction() = reMethod and
reMethod.getAttributeName() = method and
this = reMethod.getObject()
)
}
/** Gets the method used to execute the regex. */
RegexExecutionMethod getMethod() { result = method }
/** Gets the data flow node for the executing call. */
DataFlow::CallCfgNode getExecutingCall() { result = executingCall }
}
/** A data flow node executing a regex. */
abstract class RegexExecution extends DataFlow::Node {
/** Gets the data flow node for the regex being compiled by this node. */
abstract DataFlow::Node getRegexNode();
/** Gets a dataflow node for the string to be searched or matched against. */
abstract DataFlow::Node getString();
}
private class RegexExecutionMethod extends string {
RegexExecutionMethod() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
}
/** Gets the index of the argument representing the string to be searched by a regex. */
int stringArg(RegexExecutionMethod method) {
method in ["match", "fullmatch", "search", "split", "findall", "finditer"] and
result = 1
or
method in ["sub", "subn"] and
result = 2
}
/**
* A class to find `re` methods immediately executing an expression.
*
* See `RegexExecutionMethods`
*/
class DirectRegex extends DataFlow::CallCfgNode, RegexExecution {
RegexExecutionMethod method;
DirectRegex() { this = API::moduleImport("re").getMember(method).getACall() }
override DataFlow::Node getRegexNode() {
result in [this.getArg(0), this.getArgByName("pattern")]
}
override DataFlow::Node getString() {
result in [this.getArg(stringArg(method)), this.getArgByName("string")]
}
}
/**
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution {
DataFlow::Node regexNode;
RegexExecutionMethod method;
CompiledRegex() {
exists(
RegexDefinitionConfiguration conf, RegexDefinitonSource source, RegexDefinitionSink sink
|
conf.hasFlow(source, sink) and
regexNode = source.getRegexNode() and
method = sink.getMethod() and
this = sink.getExecutingCall()
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override DataFlow::Node getString() {
result in [this.getArg(stringArg(method) - 1), this.getArgByName("string")]
}
}
}