mirror of
https://github.com/github/codeql.git
synced 2026-03-17 04:56:58 +01:00
Merge pull request #6693 from yoff/python/promote-regex-injection
Python: Promote `py/regex-injection`
This commit is contained in:
@@ -355,6 +355,53 @@ module SqlExecution {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegexExecution::Range` instead.
|
||||
*/
|
||||
class RegexExecution extends DataFlow::Node {
|
||||
RegexExecution::Range range;
|
||||
|
||||
RegexExecution() { this = range }
|
||||
|
||||
/** Gets the data flow node for the regex being executed by this node. */
|
||||
DataFlow::Node getRegex() { result = range.getRegex() }
|
||||
|
||||
/** Gets a dataflow node for the string to be searched or matched against. */
|
||||
DataFlow::Node getString() { result = range.getString() }
|
||||
|
||||
/**
|
||||
* Gets the name of this regex execution, typically the name of an executing method.
|
||||
* This is used for nice alert messages and should include the module if possible.
|
||||
*/
|
||||
string getName() { result = range.getName() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling new regular-expression execution APIs. */
|
||||
module RegexExecution {
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RegexExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the data flow node for the regex being executed by this node. */
|
||||
abstract DataFlow::Node getRegex();
|
||||
|
||||
/** Gets a dataflow node for the string to be searched or matched against. */
|
||||
abstract DataFlow::Node getString();
|
||||
|
||||
/**
|
||||
* Gets the name of this regex execution, typically the name of an executing method.
|
||||
* This is used for nice alert messages and should include the module if possible.
|
||||
*/
|
||||
abstract string getName();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that escapes meta-characters, which could be used to prevent
|
||||
* injection attacks.
|
||||
@@ -411,6 +458,9 @@ module Escaping {
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
|
||||
string getHtmlKind() { result = "html" }
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
|
||||
string getRegexKind() { result = "regex" }
|
||||
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
|
||||
//
|
||||
// Technically it claims to escape for both HTML and XML, but for now we don't have
|
||||
@@ -427,6 +477,14 @@ class HtmlEscaping extends Escaping {
|
||||
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape of a string so it can be safely included in
|
||||
* the body of a regex.
|
||||
*/
|
||||
class RegexEscaping extends Escaping {
|
||||
RegexEscaping() { range.getKind() = Escaping::getRegexKind() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling HTTP-related APIs. */
|
||||
module HTTP {
|
||||
import semmle.python.web.HttpConstants
|
||||
|
||||
@@ -49,6 +49,7 @@ newtype TRegExpParent =
|
||||
* or another regular expression term.
|
||||
*/
|
||||
class RegExpParent extends TRegExpParent {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "RegExpParent" }
|
||||
|
||||
/** Gets the `i`th child term. */
|
||||
@@ -72,14 +73,18 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {
|
||||
|
||||
override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
|
||||
|
||||
/** Holds if dot, `.`, matches all characters, including newlines. */
|
||||
predicate isDotAll() { re.getAMode() = "DOTALL" }
|
||||
|
||||
/** Holds if this regex matching is case-insensitive for this regex. */
|
||||
predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
|
||||
|
||||
/** Get a string representing all modes for this regex. */
|
||||
string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
|
||||
|
||||
override Regex getRegex() { result = re }
|
||||
|
||||
/** Gets the primary QL class for this regex. */
|
||||
string getPrimaryQLClass() { result = "RegExpLiteral" }
|
||||
}
|
||||
|
||||
@@ -246,8 +251,10 @@ class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
|
||||
result.getEnd() = part_end
|
||||
}
|
||||
|
||||
/** Hols if this term may match an unlimited number of times. */
|
||||
predicate mayRepeatForever() { may_repeat_forever = true }
|
||||
|
||||
/** Gets the qualifier for this term. That is e.g "?" for "a?". */
|
||||
string getQualifier() { result = re.getText().substring(part_end, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpQuantifier" }
|
||||
@@ -322,8 +329,10 @@ class RegExpRange extends RegExpQuantifier {
|
||||
|
||||
RegExpRange() { re.multiples(part_end, end, lower, upper) }
|
||||
|
||||
/** Gets the string defining the upper bound of this range, if any. */
|
||||
string getUpper() { result = upper }
|
||||
|
||||
/** Gets the string defining the lower bound of this range, if any. */
|
||||
string getLower() { result = lower }
|
||||
|
||||
/**
|
||||
@@ -465,11 +474,13 @@ class RegExpEscape extends RegExpNormalChar {
|
||||
result = this.getUnicode()
|
||||
}
|
||||
|
||||
/** Holds if this terms name is given by the part following the escape character. */
|
||||
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpEscape" }
|
||||
|
||||
string getUnescaped() { result = this.getText().suffix(1) }
|
||||
/** Gets the part of the term following the escape character. That is e.g. "w" if the term is "\w". */
|
||||
private string getUnescaped() { result = this.getText().suffix(1) }
|
||||
|
||||
/**
|
||||
* Gets the text for this escape. That is e.g. "\w".
|
||||
@@ -536,15 +547,8 @@ private int toHex(string hex) {
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterClassEscape extends RegExpEscape {
|
||||
// string value;
|
||||
RegExpCharacterClassEscape() {
|
||||
// value = re.getText().substring(start + 1, end) and
|
||||
// value in ["d", "D", "s", "S", "w", "W"]
|
||||
this.getValue() in ["d", "D", "s", "S", "w", "W"]
|
||||
}
|
||||
RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W"] }
|
||||
|
||||
/** Gets the name of the character class; for example, `w` for `\w`. */
|
||||
// override string getValue() { result = value }
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
|
||||
@@ -563,10 +567,13 @@ class RegExpCharacterClassEscape extends RegExpEscape {
|
||||
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
|
||||
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
|
||||
|
||||
/** Holds if this character class is inverted, matching the opposite of its content. */
|
||||
predicate isInverted() { re.getChar(start + 1) = "^" }
|
||||
|
||||
/** Gets the `i`th char inside this charater class. */
|
||||
string getCharThing(int i) { result = re.getChar(i + start) }
|
||||
|
||||
/** Holds if this character class can match anything. */
|
||||
predicate isUniversalClass() {
|
||||
// [^]
|
||||
this.isInverted() and not exists(this.getAChild())
|
||||
@@ -620,6 +627,7 @@ class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
|
||||
re.charRange(_, start, lower_end, upper_start, end)
|
||||
}
|
||||
|
||||
/** Holds if this range goes from `lo` to `hi`, in effect is `lo-hi`. */
|
||||
predicate isRange(string lo, string hi) {
|
||||
lo = re.getText().substring(start, lower_end) and
|
||||
hi = re.getText().substring(upper_start, end)
|
||||
@@ -653,8 +661,13 @@ class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
|
||||
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
|
||||
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
|
||||
|
||||
/**
|
||||
* Holds if this constant represents a valid Unicode character (as opposed
|
||||
* to a surrogate code point that does not correspond to a character by itself.)
|
||||
*/
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
/** Gets the string representation of the char matched by this term. */
|
||||
string getValue() { result = re.getText().substring(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
@@ -684,15 +697,15 @@ class RegExpConstant extends RegExpTerm {
|
||||
qstart <= start and end <= qend
|
||||
) and
|
||||
value = this.(RegExpNormalChar).getValue()
|
||||
// This will never hold
|
||||
// or
|
||||
// this = TRegExpSpecialChar(re, start, end) and
|
||||
// re.inCharSet(start) and
|
||||
// value = this.(RegExpSpecialChar).getChar()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this constant represents a valid Unicode character (as opposed
|
||||
* to a surrogate code point that does not correspond to a character by itself.)
|
||||
*/
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
/** Gets the string matched by this constant term. */
|
||||
string getValue() { result = value }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
@@ -731,10 +744,6 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
|
||||
/** Gets the name of this capture group, if any. */
|
||||
string getName() { result = re.getGroupName(start, end) }
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getValue() { result = re.getText().substring(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
result.getRegex() = re and
|
||||
i = 0 and
|
||||
@@ -762,8 +771,13 @@ class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
|
||||
re.specialCharacter(start, end, char)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this constant represents a valid Unicode character (as opposed
|
||||
* to a surrogate code point that does not correspond to a character by itself.)
|
||||
*/
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
/** Gets the char for this term. */
|
||||
string getChar() { result = char }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
@@ -828,8 +842,6 @@ class RegExpCaret extends RegExpSpecialChar {
|
||||
class RegExpZeroWidthMatch extends RegExpGroup {
|
||||
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
|
||||
|
||||
override predicate isCharacter() { any() }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }
|
||||
|
||||
@@ -1636,6 +1636,119 @@ private module StdlibPrivate {
|
||||
result = this.getArg(any(int i | i >= msgIndex))
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// re
|
||||
// ---------------------------------------------------------------------------
|
||||
/**
|
||||
* List of methods in the `re` module immediately executing a regular expression.
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#module-contents
|
||||
*/
|
||||
private class RegexExecutionMethod extends string {
|
||||
RegexExecutionMethod() {
|
||||
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
|
||||
}
|
||||
|
||||
/** Gets the index of the argument representing the string to be searched by a regex. */
|
||||
int getStringArgIndex() {
|
||||
this in ["match", "fullmatch", "search", "split", "findall", "finditer"] and
|
||||
result = 1
|
||||
or
|
||||
this in ["sub", "subn"] and
|
||||
result = 2
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A a call to a method from the `re` module immediately executing a regular expression.
|
||||
*
|
||||
* See `RegexExecutionMethods`
|
||||
*/
|
||||
private class DirectRegexExecution extends DataFlow::CallCfgNode, RegexExecution::Range {
|
||||
RegexExecutionMethod method;
|
||||
|
||||
DirectRegexExecution() { this = API::moduleImport("re").getMember(method).getACall() }
|
||||
|
||||
override DataFlow::Node getRegex() { result in [this.getArg(0), this.getArgByName("pattern")] }
|
||||
|
||||
override DataFlow::Node getString() {
|
||||
result in [this.getArg(method.getStringArgIndex()), this.getArgByName("string")]
|
||||
}
|
||||
|
||||
override string getName() { result = "re." + method }
|
||||
}
|
||||
|
||||
/** Helper module for tracking compiled regexes. */
|
||||
private module CompiledRegexes {
|
||||
private DataFlow::TypeTrackingNode compiledRegex(DataFlow::TypeTracker t, DataFlow::Node regex) {
|
||||
t.start() and
|
||||
result = API::moduleImport("re").getMember("compile").getACall() and
|
||||
regex in [
|
||||
result.(DataFlow::CallCfgNode).getArg(0),
|
||||
result.(DataFlow::CallCfgNode).getArgByName("pattern")
|
||||
]
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = compiledRegex(t2, regex).track(t2, t))
|
||||
}
|
||||
|
||||
DataFlow::Node compiledRegex(DataFlow::Node regex) {
|
||||
compiledRegex(DataFlow::TypeTracker::end(), regex).flowsTo(result)
|
||||
}
|
||||
}
|
||||
|
||||
private import CompiledRegexes
|
||||
|
||||
/**
|
||||
* A call on compiled regular expression (obtained via `re.compile`) executing a
|
||||
* regular expression.
|
||||
*
|
||||
* Given the following example:
|
||||
*
|
||||
* ```py
|
||||
* pattern = re.compile(input)
|
||||
* pattern.match(s)
|
||||
* ```
|
||||
*
|
||||
* This class will identify that `re.compile` compiles `input` and afterwards
|
||||
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
|
||||
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument).
|
||||
*
|
||||
*
|
||||
* See `RegexExecutionMethods`
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#regular-expression-objects
|
||||
*/
|
||||
private class CompiledRegexExecution extends DataFlow::MethodCallNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
RegexExecutionMethod method;
|
||||
|
||||
CompiledRegexExecution() { this.calls(compiledRegex(regexNode), method) }
|
||||
|
||||
override DataFlow::Node getRegex() { result = regexNode }
|
||||
|
||||
override DataFlow::Node getString() {
|
||||
result in [this.getArg(method.getStringArgIndex() - 1), this.getArgByName("string")]
|
||||
}
|
||||
|
||||
override string getName() { result = "re." + method }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to 're.escape'.
|
||||
* See https://docs.python.org/3/library/re.html#re.escape
|
||||
*/
|
||||
private class ReEscapeCall extends Escaping::Range, DataFlow::CallCfgNode {
|
||||
ReEscapeCall() { this = API::moduleImport("re").getMember("escape").getACall() }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [this.getArg(0), this.getArgByName("pattern")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getKind() { result = Escaping::getRegexKind() }
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -60,8 +60,8 @@ module PolynomialReDoS {
|
||||
RegExpTerm t;
|
||||
|
||||
RegexExecutionAsSink() {
|
||||
exists(CompiledRegexes::RegexExecution re |
|
||||
re.getRegexNode().asExpr() = t.getRegex() and
|
||||
exists(RegexExecution re |
|
||||
re.getRegex().asExpr() = t.getRegex() and
|
||||
this = re.getString()
|
||||
) and
|
||||
t.isRootTerm()
|
||||
@@ -76,137 +76,3 @@ module PolynomialReDoS {
|
||||
*/
|
||||
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
|
||||
}
|
||||
|
||||
/** Helper module for tracking compiled regexes. */
|
||||
private module CompiledRegexes {
|
||||
// TODO: This module should be refactored and merged with the experimental work done on detecting
|
||||
// regex injections, such that this can be expressed from just using a concept.
|
||||
/** A configuration for finding uses of compiled regexes. */
|
||||
class RegexDefinitionConfiguration extends DataFlow2::Configuration {
|
||||
RegexDefinitionConfiguration() { this = "RegexDefinitionConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RegexDefinitonSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexDefinitionSink }
|
||||
}
|
||||
|
||||
/** A regex compilation. */
|
||||
class RegexDefinitonSource extends DataFlow::CallCfgNode {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
RegexDefinitonSource() {
|
||||
this = API::moduleImport("re").getMember("compile").getACall() and
|
||||
regexNode in [this.getArg(0), this.getArgByName("pattern")]
|
||||
}
|
||||
|
||||
/** Gets the regex that is being compiled by this node. */
|
||||
RegExpTerm getRegExp() { result.getRegex() = regexNode.asExpr() and result.isRootTerm() }
|
||||
|
||||
/** Gets the data flow node for the regex being compiled by this node. */
|
||||
DataFlow::Node getRegexNode() { result = regexNode }
|
||||
}
|
||||
|
||||
/** A use of a compiled regex. */
|
||||
class RegexDefinitionSink extends DataFlow::Node {
|
||||
RegexExecutionMethod method;
|
||||
DataFlow::CallCfgNode executingCall;
|
||||
|
||||
RegexDefinitionSink() {
|
||||
exists(DataFlow::AttrRead reMethod |
|
||||
executingCall.getFunction() = reMethod and
|
||||
reMethod.getAttributeName() = method and
|
||||
this = reMethod.getObject()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the method used to execute the regex. */
|
||||
RegexExecutionMethod getMethod() { result = method }
|
||||
|
||||
/** Gets the data flow node for the executing call. */
|
||||
DataFlow::CallCfgNode getExecutingCall() { result = executingCall }
|
||||
}
|
||||
|
||||
/** A data flow node executing a regex. */
|
||||
abstract class RegexExecution extends DataFlow::Node {
|
||||
/** Gets the data flow node for the regex being compiled by this node. */
|
||||
abstract DataFlow::Node getRegexNode();
|
||||
|
||||
/** Gets a dataflow node for the string to be searched or matched against. */
|
||||
abstract DataFlow::Node getString();
|
||||
}
|
||||
|
||||
private class RegexExecutionMethod extends string {
|
||||
RegexExecutionMethod() {
|
||||
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets the index of the argument representing the string to be searched by a regex. */
|
||||
int stringArg(RegexExecutionMethod method) {
|
||||
method in ["match", "fullmatch", "search", "split", "findall", "finditer"] and
|
||||
result = 1
|
||||
or
|
||||
method in ["sub", "subn"] and
|
||||
result = 2
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods immediately executing an expression.
|
||||
*
|
||||
* See `RegexExecutionMethods`
|
||||
*/
|
||||
class DirectRegex extends DataFlow::CallCfgNode, RegexExecution {
|
||||
RegexExecutionMethod method;
|
||||
|
||||
DirectRegex() { this = API::moduleImport("re").getMember(method).getACall() }
|
||||
|
||||
override DataFlow::Node getRegexNode() {
|
||||
result in [this.getArg(0), this.getArgByName("pattern")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getString() {
|
||||
result in [this.getArg(stringArg(method)), this.getArgByName("string")]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
|
||||
*
|
||||
* Given the following example:
|
||||
*
|
||||
* ```py
|
||||
* pattern = re.compile(input)
|
||||
* pattern.match(s)
|
||||
* ```
|
||||
*
|
||||
* This class will identify that `re.compile` compiles `input` and afterwards
|
||||
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
|
||||
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
|
||||
*
|
||||
*
|
||||
* See `RegexExecutionMethods`
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#regular-expression-objects
|
||||
*/
|
||||
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution {
|
||||
DataFlow::Node regexNode;
|
||||
RegexExecutionMethod method;
|
||||
|
||||
CompiledRegex() {
|
||||
exists(
|
||||
RegexDefinitionConfiguration conf, RegexDefinitonSource source, RegexDefinitionSink sink
|
||||
|
|
||||
conf.hasFlow(source, sink) and
|
||||
regexNode = source.getRegexNode() and
|
||||
method = sink.getMethod() and
|
||||
this = sink.getExecutingCall()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
|
||||
override DataFlow::Node getString() {
|
||||
result in [this.getArg(stringArg(method) - 1), this.getArgByName("string")]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,7 +59,7 @@ module ReflectedXSS {
|
||||
class HtmlEscapingAsSanitizer extends Sanitizer {
|
||||
HtmlEscapingAsSanitizer() {
|
||||
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
|
||||
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
|
||||
// `TaintTracking::Configuration`, we treat the output as a taint-sanitizer. This
|
||||
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
|
||||
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
|
||||
//
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting regular expression injection
|
||||
* vulnerabilities.
|
||||
*
|
||||
* Note, for performance reasons: only import this file if
|
||||
* `RegexInjection::Configuration` is needed, otherwise
|
||||
* `RegexInjectionCustomizations` should be imported instead.
|
||||
*/
|
||||
|
||||
private import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting regular expression injection
|
||||
* vulnerabilities.
|
||||
*/
|
||||
module RegexInjection {
|
||||
import RegexInjectionCustomizations::RegexInjection
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "RegexInjection" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof Source }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
|
||||
|
||||
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
|
||||
guard instanceof SanitizerGuard
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
* Provides default sources, sinks and sanitizers for detecting
|
||||
* "regular expression injection"
|
||||
* vulnerabilities, as well as extension points for adding your own.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
|
||||
/**
|
||||
* Provides default sources, sinks and sanitizers for detecting
|
||||
* "regular expression injection"
|
||||
* vulnerabilities, as well as extension points for adding your own.
|
||||
*/
|
||||
module RegexInjection {
|
||||
/**
|
||||
* A data flow source for "regular expression injection" vulnerabilities.
|
||||
*/
|
||||
abstract class Source extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* A sink for "regular expression injection" vulnerabilities is the execution of a regular expression.
|
||||
* If you have a custom way to execute regular expressions, you can extend `RegexExecution::Range`.
|
||||
*/
|
||||
class Sink extends DataFlow::Node {
|
||||
RegexExecution regexExecution;
|
||||
|
||||
Sink() { this = regexExecution.getRegex() }
|
||||
|
||||
/** Gets the call that executes the regular expression marked by this sink. */
|
||||
RegexExecution getRegexExecution() { result = regexExecution }
|
||||
}
|
||||
|
||||
/**
|
||||
* A sanitizer for "regular expression injection" vulnerabilities.
|
||||
*/
|
||||
abstract class Sanitizer extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* A sanitizer guard for "regular expression injection" vulnerabilities.
|
||||
*/
|
||||
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
|
||||
|
||||
/**
|
||||
* A source of remote user input, considered as a flow source.
|
||||
*/
|
||||
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
|
||||
|
||||
/**
|
||||
* A regex escaping, considered as a sanitizer.
|
||||
*/
|
||||
class RegexEscapingAsSanitizer extends Sanitizer {
|
||||
RegexEscapingAsSanitizer() {
|
||||
// Due to use-use flow, we want the output rather than an input
|
||||
// (so the input can still flow to other sinks).
|
||||
this = any(RegexEscaping esc).getOutput()
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user