mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Ruby: refactor regex libraries
This commit is contained in:
@@ -552,7 +552,7 @@ class RegExpWordBoundary extends RegExpSpecialChar {
|
||||
|
||||
/**
|
||||
* A character class escape in a regular expression.
|
||||
* That is, an escaped charachter that denotes multiple characters.
|
||||
* That is, an escaped character that denotes multiple characters.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
|
||||
@@ -186,7 +186,7 @@ abstract class RegexString extends Expr {
|
||||
)
|
||||
}
|
||||
|
||||
/** Hold is a character set starts between `start` and `end`. */
|
||||
/** Holds if a character set starts between `start` and `end`. */
|
||||
predicate char_set_start(int start, int end) {
|
||||
this.char_set_start(start) = true and
|
||||
(
|
||||
@@ -314,8 +314,10 @@ abstract class RegexString extends Expr {
|
||||
result = this.(Bytes).getS()
|
||||
}
|
||||
|
||||
/** Gets the `i`th character of this regex */
|
||||
string getChar(int i) { result = this.getText().charAt(i) }
|
||||
|
||||
/** Gets the `i`th character of this regex, unless it is part of an character escape sequence. */
|
||||
string nonEscapedCharAt(int i) {
|
||||
result = this.getText().charAt(i) and
|
||||
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
|
||||
@@ -327,6 +329,9 @@ abstract class RegexString extends Expr {
|
||||
|
||||
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
|
||||
|
||||
/**
|
||||
* Holds if the `i`th character could not be parsed.
|
||||
*/
|
||||
predicate failedToParse(int i) {
|
||||
exists(this.getChar(i)) and
|
||||
not exists(int start, int end |
|
||||
@@ -415,6 +420,9 @@ abstract class RegexString extends Expr {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a simple or escaped character is found between `start` and `end`.
|
||||
*/
|
||||
predicate character(int start, int end) {
|
||||
(
|
||||
this.simpleCharacter(start, end) and
|
||||
@@ -426,12 +434,18 @@ abstract class RegexString extends Expr {
|
||||
not exists(int x, int y | this.backreference(x, y) and x <= start and y >= end)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a normal character is found between `start` and `end`.
|
||||
*/
|
||||
predicate normalCharacter(int start, int end) {
|
||||
end = start + 1 and
|
||||
this.character(start, end) and
|
||||
not this.specialCharacter(start, end, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a special character is found between `start` and `end`.
|
||||
*/
|
||||
predicate specialCharacter(int start, int end, string char) {
|
||||
not this.inCharSet(start) and
|
||||
this.character(start, end) and
|
||||
@@ -490,7 +504,7 @@ abstract class RegexString extends Expr {
|
||||
this.specialCharacter(start, end, _)
|
||||
}
|
||||
|
||||
/** Whether the text in the range start,end is a group */
|
||||
/** Whether the text in the range `start,end` is a group */
|
||||
predicate group(int start, int end) {
|
||||
this.groupContents(start, end, _, _)
|
||||
or
|
||||
@@ -609,6 +623,7 @@ abstract class RegexString extends Expr {
|
||||
this.simple_group_start(start, end)
|
||||
}
|
||||
|
||||
/** Matches the start of a non-capturing group, e.g. `(?:` */
|
||||
private predicate non_capturing_group_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -616,12 +631,18 @@ abstract class RegexString extends Expr {
|
||||
end = start + 3
|
||||
}
|
||||
|
||||
/** Matches the start of a simple group, e.g. `(a+)`. */
|
||||
private predicate simple_group_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) != "?" and
|
||||
end = start + 1
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches the start of a named group, such as:
|
||||
* - `(?<name>\w+)`
|
||||
* - `(?'name'\w+)`
|
||||
*/
|
||||
private predicate named_group_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -673,6 +694,7 @@ abstract class RegexString extends Expr {
|
||||
)
|
||||
}
|
||||
|
||||
/** Matches the start of a positive lookahead assertion, i.e. `(?=`. */
|
||||
private predicate lookahead_assertion_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -680,6 +702,7 @@ abstract class RegexString extends Expr {
|
||||
end = start + 3
|
||||
}
|
||||
|
||||
/** Matches the start of a negative lookahead assertion, i.e. `(?!`. */
|
||||
private predicate negative_lookahead_assertion_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -687,6 +710,7 @@ abstract class RegexString extends Expr {
|
||||
end = start + 3
|
||||
}
|
||||
|
||||
/** Matches the start of a positive lookbehind assertion, i.e. `(?<=`. */
|
||||
private predicate lookbehind_assertion_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -695,6 +719,7 @@ abstract class RegexString extends Expr {
|
||||
end = start + 4
|
||||
}
|
||||
|
||||
/** Matches the start of a negative lookbehind assertion, i.e. `(?<!`. */
|
||||
private predicate negative_lookbehind_assertion_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -703,6 +728,7 @@ abstract class RegexString extends Expr {
|
||||
end = start + 4
|
||||
}
|
||||
|
||||
/** Matches the start of a comment group, i.e. `(?#`. */
|
||||
private predicate comment_group_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -710,6 +736,7 @@ abstract class RegexString extends Expr {
|
||||
end = start + 3
|
||||
}
|
||||
|
||||
/** Matches the contents of a group. */
|
||||
predicate groupContents(int start, int end, int in_start, int in_end) {
|
||||
this.group_start(start, in_start) and
|
||||
end = in_end + 1 and
|
||||
@@ -717,12 +744,14 @@ abstract class RegexString extends Expr {
|
||||
this.isGroupEnd(in_end)
|
||||
}
|
||||
|
||||
/** Matches a named backreference, e.g. `\k<foo>`. */
|
||||
private predicate named_backreference(int start, int end, string name) {
|
||||
this.named_backreference_start(start, start + 4) and
|
||||
end = min(int i | i > start + 4 and this.getChar(i) = ")") + 1 and
|
||||
name = this.getText().substring(start + 4, end - 2)
|
||||
}
|
||||
|
||||
/** Matches a numbered backreference, e.g. `\1`. */
|
||||
private predicate numbered_backreference(int start, int end, int value) {
|
||||
this.escapingChar(start) and
|
||||
// starting with 0 makes it an octal escape
|
||||
@@ -747,7 +776,7 @@ abstract class RegexString extends Expr {
|
||||
)
|
||||
}
|
||||
|
||||
/** Whether the text in the range start,end is a back reference */
|
||||
/** Whether the text in the range `start,end` is a back reference */
|
||||
predicate backreference(int start, int end) {
|
||||
this.numbered_backreference(start, end, _)
|
||||
or
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import codeql.ruby.security.performance.RegExpTreeView
|
||||
import codeql.ruby.Regexp
|
||||
|
||||
query predicate nonUniqueChild(RegExpParent parent, int i, RegExpTerm child) {
|
||||
child = parent.getChild(i) and
|
||||
|
||||
143
ruby/ql/lib/codeql/ruby/Regexp.qll
Normal file
143
ruby/ql/lib/codeql/ruby/Regexp.qll
Normal file
@@ -0,0 +1,143 @@
|
||||
/**
|
||||
* Provides classes for working with regular expressions.
|
||||
*
|
||||
* Regular expression literals are represented as an abstract syntax tree of regular expression
|
||||
* terms.
|
||||
*/
|
||||
|
||||
import regexp.RegExpTreeView // re-export
|
||||
private import regexp.ParseRegExp
|
||||
private import codeql.ruby.ast.Literal as AST
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.controlflow.CfgNodes
|
||||
private import codeql.ruby.ApiGraphs
|
||||
private import codeql.ruby.dataflow.internal.tainttrackingforlibraries.TaintTrackingImpl
|
||||
|
||||
/**
|
||||
* Provides utility predicates related to regular expressions.
|
||||
*/
|
||||
module RegExpPatterns {
|
||||
/**
|
||||
* Gets a pattern that matches common top-level domain names in lower case.
|
||||
*/
|
||||
string getACommonTld() {
|
||||
// according to ranking by http://google.com/search?q=site:.<<TLD>>
|
||||
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A node whose value may flow to a position where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
abstract class RegExpPatternSource extends DataFlow::Node {
|
||||
/**
|
||||
* Gets a node where the pattern of this node is parsed as a part of
|
||||
* a regular expression.
|
||||
*/
|
||||
abstract DataFlow::Node getAParse();
|
||||
|
||||
/**
|
||||
* Gets the root term of the regular expression parsed from this pattern.
|
||||
*/
|
||||
abstract RegExpTerm getRegExpTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression literal, viewed as the pattern source for itself.
|
||||
*/
|
||||
private class RegExpLiteralPatternSource extends RegExpPatternSource {
|
||||
private AST::RegExpLiteral astNode;
|
||||
|
||||
RegExpLiteralPatternSource() { astNode = this.asExpr().getExpr() }
|
||||
|
||||
override DataFlow::Node getAParse() { result = this }
|
||||
|
||||
override RegExpTerm getRegExpTerm() { result = astNode.getParsed() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node whose string value may flow to a position where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
private class StringRegExpPatternSource extends RegExpPatternSource {
|
||||
private DataFlow::Node parse;
|
||||
|
||||
StringRegExpPatternSource() { this = regExpSource(parse) }
|
||||
|
||||
override DataFlow::Node getAParse() { result = parse }
|
||||
|
||||
override RegExpTerm getRegExpTerm() { result.getRegExp() = this.asExpr().getExpr() }
|
||||
}
|
||||
|
||||
private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
|
||||
override predicate isDotAll() { this.hasMultilineFlag() }
|
||||
|
||||
override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
|
||||
|
||||
override string getFlags() { result = this.getFlagString() }
|
||||
}
|
||||
|
||||
private class ParsedStringRegExp extends RegExp {
|
||||
private DataFlow::Node parse;
|
||||
|
||||
ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
|
||||
|
||||
DataFlow::Node getAParse() { result = parse }
|
||||
|
||||
override predicate isDotAll() { none() }
|
||||
|
||||
override predicate isIgnoreCase() { none() }
|
||||
|
||||
override string getFlags() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `source` may be interpreted as a regular expression.
|
||||
*/
|
||||
private predicate isInterpretedAsRegExp(DataFlow::Node source) {
|
||||
// The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
|
||||
source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
|
||||
or
|
||||
// The argument of a call that coerces the argument to a regular expression.
|
||||
exists(DataFlow::CallNode mce |
|
||||
mce.getMethodName() = ["match", "match?"] and
|
||||
source = mce.getArgument(0) and
|
||||
// exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
|
||||
not mce.getReceiver().asExpr().getExpr() instanceof AST::RegExpLiteral
|
||||
)
|
||||
}
|
||||
|
||||
private class RegExpConfiguration extends Configuration {
|
||||
RegExpConfiguration() { this = "RegExpConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
source.asExpr() =
|
||||
any(ExprCfgNode e |
|
||||
e.getConstantValue().isString(_) and
|
||||
not e instanceof ExprNodes::VariableReadAccessCfgNode and
|
||||
not e instanceof ExprNodes::ConstantReadAccessCfgNode
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
// stop flow if `node` is receiver of
|
||||
// https://ruby-doc.org/core-2.4.0/String.html#method-i-match
|
||||
exists(DataFlow::CallNode mce |
|
||||
mce.getMethodName() = ["match", "match?"] and
|
||||
node = mce.getReceiver() and
|
||||
mce.getArgument(0).asExpr().getExpr() instanceof AST::RegExpLiteral
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
cached
|
||||
DataFlow::Node regExpSource(DataFlow::Node re) {
|
||||
exists(RegExpConfiguration c | c.hasFlow(result, re))
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
private import codeql.ruby.AST
|
||||
private import codeql.ruby.security.performance.RegExpTreeView as RETV
|
||||
private import codeql.ruby.Regexp as RE
|
||||
private import internal.AST
|
||||
private import internal.Constant
|
||||
private import internal.Literal
|
||||
@@ -594,7 +594,7 @@ class RegExpLiteral extends StringlikeLiteral, TRegExpLiteral {
|
||||
final predicate hasFreeSpacingFlag() { this.getFlagString().charAt(_) = "x" }
|
||||
|
||||
/** Returns the root node of the parse tree of this regular expression. */
|
||||
final RETV::RegExpTerm getParsed() { result = RETV::getParsedRegExp(this) }
|
||||
final RE::RegExpTerm getParsed() { result = RE::getParsedRegExp(this) }
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*/
|
||||
|
||||
private import AST
|
||||
private import codeql.ruby.security.performance.RegExpTreeView as RETV
|
||||
private import codeql.ruby.Regexp as RE
|
||||
private import codeql.ruby.ast.internal.Synthesis
|
||||
|
||||
/**
|
||||
@@ -37,7 +37,7 @@ private predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode ch
|
||||
|
||||
newtype TPrintNode =
|
||||
TPrintRegularAstNode(AstNode n) { shouldPrintNode(n) } or
|
||||
TPrintRegExpNode(RETV::RegExpTerm term) {
|
||||
TPrintRegExpNode(RE::RegExpTerm term) {
|
||||
exists(RegExpLiteral literal |
|
||||
shouldPrintNode(literal) and
|
||||
term.getRootTerm() = literal.getParsed()
|
||||
@@ -107,7 +107,7 @@ class PrintRegularAstNode extends PrintAstNode, TPrintRegularAstNode {
|
||||
or
|
||||
// If this AST node is a regexp literal, add the parsed regexp tree as a
|
||||
// child.
|
||||
exists(RETV::RegExpTerm t | t = astNode.(RegExpLiteral).getParsed() |
|
||||
exists(RE::RegExpTerm t | t = astNode.(RegExpLiteral).getParsed() |
|
||||
result = TPrintRegExpNode(t) and edgeName = "getParsed"
|
||||
)
|
||||
}
|
||||
@@ -134,7 +134,7 @@ class PrintRegularAstNode extends PrintAstNode, TPrintRegularAstNode {
|
||||
|
||||
/** A parsed regexp node in the output tree. */
|
||||
class PrintRegExpNode extends PrintAstNode, TPrintRegExpNode {
|
||||
RETV::RegExpTerm regexNode;
|
||||
RE::RegExpTerm regexNode;
|
||||
|
||||
PrintRegExpNode() { this = TPrintRegExpNode(regexNode) }
|
||||
|
||||
@@ -147,7 +147,7 @@ class PrintRegExpNode extends PrintAstNode, TPrintRegExpNode {
|
||||
exists(int i | result = TPrintRegExpNode(regexNode.getChild(i)) and edgeName = i.toString())
|
||||
}
|
||||
|
||||
override int getOrder() { exists(RETV::RegExpTerm p | p.getChild(result) = regexNode) }
|
||||
override int getOrder() { exists(RE::RegExpTerm p | p.getChild(result) = regexNode) }
|
||||
|
||||
override predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
|
||||
@@ -7,10 +7,6 @@
|
||||
|
||||
private import codeql.ruby.ast.Literal as AST
|
||||
private import codeql.Locations
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.controlflow.CfgNodes
|
||||
private import codeql.ruby.ApiGraphs
|
||||
private import codeql.ruby.dataflow.internal.tainttrackingforlibraries.TaintTrackingImpl
|
||||
|
||||
/**
|
||||
* A `StringlikeLiteral` containing a regular expression term, that is, either
|
||||
@@ -116,6 +112,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if a character set starts between `start` and `end`. */
|
||||
predicate charSetStart(int start, int end) {
|
||||
this.charSetStart(start) = true and
|
||||
(
|
||||
@@ -145,14 +142,21 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
)
|
||||
}
|
||||
|
||||
predicate charSetToken(int charsetStart, int index, int tokenStart, int tokenEnd) {
|
||||
/**
|
||||
* Holds if the character set starting at `charsetStart` contains either
|
||||
* a character or a `-` found between `start` and `end`.
|
||||
*/
|
||||
private predicate charSetToken(int charsetStart, int index, int tokenStart, int tokenEnd) {
|
||||
tokenStart =
|
||||
rank[index](int start, int end | this.charSetToken(charsetStart, start, end) | start) and
|
||||
this.charSetToken(charsetStart, tokenStart, tokenEnd)
|
||||
}
|
||||
|
||||
/** Either a char or a - */
|
||||
predicate charSetToken(int charsetStart, int start, int end) {
|
||||
/**
|
||||
* Holds if the character set starting at `charsetStart` contains either
|
||||
* a character or a `-` found between `start` and `end`.
|
||||
*/
|
||||
private predicate charSetToken(int charsetStart, int start, int end) {
|
||||
this.charSetStart(charsetStart, start) and
|
||||
(
|
||||
this.escapedCharacter(start, end)
|
||||
@@ -174,6 +178,10 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the character set starting at `charsetStart` contains either
|
||||
* a character or a range found between `start` and `end`.
|
||||
*/
|
||||
predicate charSetChild(int charsetStart, int start, int end) {
|
||||
this.charSetToken(charsetStart, start, end) and
|
||||
not exists(int rangeStart, int rangeEnd |
|
||||
@@ -185,6 +193,11 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
this.charRange(charsetStart, start, _, _, end)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the character set starting at `charset_start` contains a character range
|
||||
* with lower bound found between `start` and `lower_end`
|
||||
* and upper bound found between `upper_start` and `end`.
|
||||
*/
|
||||
predicate charRange(int charsetStart, int start, int lowerEnd, int upperStart, int end) {
|
||||
exists(int index |
|
||||
this.charRangeEnd(charsetStart, index) = true and
|
||||
@@ -193,6 +206,13 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper predicate for `charRange`.
|
||||
* We can determine where character ranges end by a left to right sweep.
|
||||
*
|
||||
* To avoid negative recursion we return a boolean. See `escaping`,
|
||||
* the helper for `escapingChar`, for a clean use of this pattern.
|
||||
*/
|
||||
private boolean charRangeEnd(int charsetStart, int index) {
|
||||
this.charSetToken(charsetStart, index, _, _) and
|
||||
(
|
||||
@@ -216,8 +236,15 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if the character at `pos` is a "\" that is actually escaping what comes after. */
|
||||
predicate escapingChar(int pos) { this.escaping(pos) = true }
|
||||
|
||||
/**
|
||||
* Helper predicate for `escapingChar`.
|
||||
* In order to avoid negative recusrion, we return a boolean.
|
||||
* This way, we can refer to `escaping(pos - 1).booleanNot()`
|
||||
* rather than to a negated version of `escaping(pos)`.
|
||||
*/
|
||||
private boolean escaping(int pos) {
|
||||
pos = -1 and result = false
|
||||
or
|
||||
@@ -229,8 +256,10 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
/** Gets the text of this regex */
|
||||
string getText() { result = this.getConstantValue().getString() }
|
||||
|
||||
/** Gets the `i`th character of this regex */
|
||||
string getChar(int i) { result = this.getText().charAt(i) }
|
||||
|
||||
/** Gets the `i`th character of this regex, unless it is part of an character escape sequence. */
|
||||
string nonEscapedCharAt(int i) {
|
||||
result = this.getText().charAt(i) and
|
||||
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
|
||||
@@ -242,6 +271,9 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
|
||||
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
|
||||
|
||||
/**
|
||||
* Holds if the `i`th character could not be parsed.
|
||||
*/
|
||||
predicate failedToParse(int i) {
|
||||
exists(this.getChar(i)) and
|
||||
not exists(int start, int end |
|
||||
@@ -331,6 +363,11 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
this.getChar(start + 3) = "^"
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if an escaped character is found between `start` and `end`.
|
||||
* Escaped characters include hex values, octal values and named escapes,
|
||||
* but excludes backreferences.
|
||||
*/
|
||||
predicate escapedCharacter(int start, int end) {
|
||||
this.escapingChar(start) and
|
||||
not this.numberedBackreference(start, _, _) and
|
||||
@@ -350,17 +387,25 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the character at `index` is inside a character set.
|
||||
*/
|
||||
predicate inCharSet(int index) {
|
||||
exists(int x, int y | this.charSet(x, y) and index in [x + 1 .. y - 2])
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the character at `index` is inside a posix bracket.
|
||||
*/
|
||||
predicate inPosixBracket(int index) {
|
||||
exists(int x, int y |
|
||||
this.posixStyleNamedCharacterProperty(x, y, _) and index in [x + 1 .. y - 2]
|
||||
)
|
||||
}
|
||||
|
||||
/** 'Simple' characters are any that don't alter the parsing of the regex. */
|
||||
/**
|
||||
* 'simple' characters are any that don't alter the parsing of the regex.
|
||||
*/
|
||||
private predicate simpleCharacter(int start, int end) {
|
||||
end = start + 1 and
|
||||
not this.charSet(start, _) and
|
||||
@@ -391,6 +436,9 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a simple or escaped character is found between `start` and `end`.
|
||||
*/
|
||||
predicate character(int start, int end) {
|
||||
(
|
||||
this.simpleCharacter(start, end) and
|
||||
@@ -406,12 +454,18 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
not exists(int x, int y | this.multiples(x, y, _, _) and x <= start and y >= end)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a normal character is found between `start` and `end`.
|
||||
*/
|
||||
predicate normalCharacter(int start, int end) {
|
||||
end = start + 1 and
|
||||
this.character(start, end) and
|
||||
not this.specialCharacter(start, end, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a special character is found between `start` and `end`.
|
||||
*/
|
||||
predicate specialCharacter(int start, int end, string char) {
|
||||
this.character(start, end) and
|
||||
not this.inCharSet(start) and
|
||||
@@ -505,6 +559,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
this.positiveLookbehindAssertionGroup(start, end)
|
||||
}
|
||||
|
||||
/** Holds if an empty group is found between `start` and `end`. */
|
||||
predicate emptyGroup(int start, int end) {
|
||||
exists(int endm1 | end = endm1 + 1 |
|
||||
this.groupStart(start, endm1) and
|
||||
@@ -538,24 +593,28 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if a negative lookahead is found between `start` and `end` */
|
||||
predicate negativeLookaheadAssertionGroup(int start, int end) {
|
||||
exists(int inStart | this.negativeLookaheadAssertionStart(start, inStart) |
|
||||
this.groupContents(start, end, inStart, _)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if a negative lookbehind is found between `start` and `end` */
|
||||
predicate negativeLookbehindAssertionGroup(int start, int end) {
|
||||
exists(int inStart | this.negativeLookbehindAssertionStart(start, inStart) |
|
||||
this.groupContents(start, end, inStart, _)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if a positive lookahead is found between `start` and `end` */
|
||||
predicate positiveLookaheadAssertionGroup(int start, int end) {
|
||||
exists(int inStart | this.lookaheadAssertionStart(start, inStart) |
|
||||
this.groupContents(start, end, inStart, _)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if a positive lookbehind is found between `start` and `end` */
|
||||
predicate positiveLookbehindAssertionGroup(int start, int end) {
|
||||
exists(int inStart | this.lookbehindAssertionStart(start, inStart) |
|
||||
this.groupContents(start, end, inStart, _)
|
||||
@@ -661,6 +720,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
end = start + 3
|
||||
}
|
||||
|
||||
/** Matches the contents of a group. */
|
||||
predicate groupContents(int start, int end, int inStart, int inEnd) {
|
||||
this.groupStart(start, inStart) and
|
||||
end = inEnd + 1 and
|
||||
@@ -747,6 +807,11 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a repetition quantifier is found between `start` and `end`,
|
||||
* with the given lower and upper bounds. If a bound is omitted, the corresponding
|
||||
* string is empty.
|
||||
*/
|
||||
predicate multiples(int start, int end, string lower, string upper) {
|
||||
exists(string text, string match, string inner |
|
||||
text = this.getText() and
|
||||
@@ -774,6 +839,13 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
this.qualifiedPart(start, _, end, maybeEmpty, mayRepeatForever)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a qualified part is found between `start` and `part_end` and the qualifier is
|
||||
* found between `part_end` and `end`.
|
||||
*
|
||||
* `maybe_empty` is true if the part is optional.
|
||||
* `may_repeat_forever` is true if the part may be repeated unboundedly.
|
||||
*/
|
||||
predicate qualifiedPart(
|
||||
int start, int partEnd, int end, boolean maybeEmpty, boolean mayRepeatForever
|
||||
) {
|
||||
@@ -781,6 +853,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
this.qualifier(partEnd, end, maybeEmpty, mayRepeatForever)
|
||||
}
|
||||
|
||||
/** Holds if the range `start`, `end` contains a character, a quantifier, a character set or a group. */
|
||||
predicate item(int start, int end) {
|
||||
this.qualifiedItem(start, end, _, _)
|
||||
or
|
||||
@@ -960,75 +1033,3 @@ abstract class RegExp extends AST::StringlikeLiteral {
|
||||
this.lastPart(start, end)
|
||||
}
|
||||
}
|
||||
|
||||
private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
|
||||
override predicate isDotAll() { this.hasMultilineFlag() }
|
||||
|
||||
override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
|
||||
|
||||
override string getFlags() { result = this.getFlagString() }
|
||||
}
|
||||
|
||||
private class ParsedStringRegExp extends RegExp {
|
||||
private DataFlow::Node parse;
|
||||
|
||||
ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
|
||||
|
||||
DataFlow::Node getAParse() { result = parse }
|
||||
|
||||
override predicate isDotAll() { none() }
|
||||
|
||||
override predicate isIgnoreCase() { none() }
|
||||
|
||||
override string getFlags() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `source` may be interpreted as a regular expression.
|
||||
*/
|
||||
private predicate isInterpretedAsRegExp(DataFlow::Node source) {
|
||||
// The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
|
||||
source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
|
||||
or
|
||||
// The argument of a call that coerces the argument to a regular expression.
|
||||
exists(DataFlow::CallNode mce |
|
||||
mce.getMethodName() = ["match", "match?"] and
|
||||
source = mce.getArgument(0) and
|
||||
// exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
|
||||
not mce.getReceiver().asExpr().getExpr() instanceof AST::RegExpLiteral
|
||||
)
|
||||
}
|
||||
|
||||
private class RegExpConfiguration extends Configuration {
|
||||
RegExpConfiguration() { this = "RegExpConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
source.asExpr() =
|
||||
any(ExprCfgNode e |
|
||||
e.getConstantValue().isString(_) and
|
||||
not e instanceof ExprNodes::VariableReadAccessCfgNode and
|
||||
not e instanceof ExprNodes::ConstantReadAccessCfgNode
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
// stop flow if `node` is receiver of
|
||||
// https://ruby-doc.org/core-2.4.0/String.html#method-i-match
|
||||
exists(DataFlow::CallNode mce |
|
||||
mce.getMethodName() = ["match", "match?"] and
|
||||
node = mce.getReceiver() and
|
||||
mce.getArgument(0).asExpr().getExpr() instanceof AST::RegExpLiteral
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
cached
|
||||
DataFlow::Node regExpSource(DataFlow::Node re) {
|
||||
exists(RegExpConfiguration c | c.hasFlow(result, re))
|
||||
}
|
||||
1036
ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll
Normal file
1036
ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll
Normal file
File diff suppressed because it is too large
Load Diff
@@ -8,8 +8,7 @@ private import codeql.ruby.AST as AST
|
||||
private import codeql.ruby.CFG
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.dataflow.RemoteFlowSources
|
||||
private import codeql.ruby.security.performance.ParseRegExp as RegExp
|
||||
private import codeql.ruby.security.performance.RegExpTreeView
|
||||
private import codeql.ruby.Regexp
|
||||
private import codeql.ruby.security.performance.SuperlinearBackTracking
|
||||
|
||||
module PolynomialReDoS {
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
private import codeql.ruby.ast.Literal as AST
|
||||
private import ParseRegExp
|
||||
private import codeql.NumberUtils
|
||||
/**
|
||||
* This module should provide a class hierarchy corresponding to a parse tree of regular expressions.
|
||||
*/
|
||||
|
||||
import codeql.ruby.regexp.RegExpTreeView
|
||||
import codeql.Locations
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.ast.Literal as AST
|
||||
|
||||
/**
|
||||
* Holds if `term` is an ecape class representing e.g. `\d`.
|
||||
@@ -59,776 +61,3 @@ module RegExpFlags {
|
||||
root.getLiteral().isDotAll()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides utility predicates related to regular expressions.
|
||||
*/
|
||||
module RegExpPatterns {
|
||||
/**
|
||||
* Gets a pattern that matches common top-level domain names in lower case.
|
||||
*/
|
||||
string getACommonTld() {
|
||||
// according to ranking by http://google.com/search?q=site:.<<TLD>>
|
||||
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An element containing a regular expression term, that is, either
|
||||
* a string literal (parsed as a regular expression)
|
||||
* or another regular expression term.
|
||||
*/
|
||||
class RegExpParent extends TRegExpParent {
|
||||
string toString() { result = "RegExpParent" }
|
||||
|
||||
RegExpTerm getChild(int i) { none() }
|
||||
|
||||
final RegExpTerm getAChild() { result = this.getChild(_) }
|
||||
|
||||
int getNumChild() { result = count(this.getAChild()) }
|
||||
|
||||
/**
|
||||
* Gets the name of a primary CodeQL class to which this regular
|
||||
* expression term belongs.
|
||||
*/
|
||||
string getAPrimaryQlClass() { result = "RegExpParent" }
|
||||
|
||||
/**
|
||||
* Gets a comma-separated list of the names of the primary CodeQL classes to
|
||||
* which this regular expression term belongs.
|
||||
*/
|
||||
final string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") }
|
||||
}
|
||||
|
||||
class RegExpLiteral extends TRegExpLiteral, RegExpParent {
|
||||
RegExp re;
|
||||
|
||||
RegExpLiteral() { this = TRegExpLiteral(re) }
|
||||
|
||||
override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() }
|
||||
|
||||
predicate isDotAll() { re.isDotAll() }
|
||||
|
||||
predicate isIgnoreCase() { re.isIgnoreCase() }
|
||||
|
||||
string getFlags() { result = re.getFlags() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpLiteral" }
|
||||
}
|
||||
|
||||
class RegExpTerm extends RegExpParent {
|
||||
RegExp re;
|
||||
int start;
|
||||
int end;
|
||||
|
||||
RegExpTerm() {
|
||||
this = TRegExpAlt(re, start, end)
|
||||
or
|
||||
this = TRegExpBackRef(re, start, end)
|
||||
or
|
||||
this = TRegExpCharacterClass(re, start, end)
|
||||
or
|
||||
this = TRegExpCharacterRange(re, start, end)
|
||||
or
|
||||
this = TRegExpNormalChar(re, start, end)
|
||||
or
|
||||
this = TRegExpGroup(re, start, end)
|
||||
or
|
||||
this = TRegExpQuantifier(re, start, end)
|
||||
or
|
||||
this = TRegExpSequence(re, start, end) and
|
||||
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
|
||||
or
|
||||
this = TRegExpSpecialChar(re, start, end)
|
||||
or
|
||||
this = TRegExpNamedCharacterProperty(re, start, end)
|
||||
}
|
||||
|
||||
RegExpTerm getRootTerm() {
|
||||
this.isRootTerm() and result = this
|
||||
or
|
||||
result = this.getParent().(RegExpTerm).getRootTerm()
|
||||
}
|
||||
|
||||
predicate isUsedAsRegExp() { any() }
|
||||
|
||||
predicate isRootTerm() { start = 0 and end = re.getText().length() }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
result = this.(RegExpAlt).getChild(i)
|
||||
or
|
||||
result = this.(RegExpBackRef).getChild(i)
|
||||
or
|
||||
result = this.(RegExpCharacterClass).getChild(i)
|
||||
or
|
||||
result = this.(RegExpCharacterRange).getChild(i)
|
||||
or
|
||||
result = this.(RegExpNormalChar).getChild(i)
|
||||
or
|
||||
result = this.(RegExpGroup).getChild(i)
|
||||
or
|
||||
result = this.(RegExpQuantifier).getChild(i)
|
||||
or
|
||||
result = this.(RegExpSequence).getChild(i)
|
||||
or
|
||||
result = this.(RegExpSpecialChar).getChild(i)
|
||||
or
|
||||
result = this.(RegExpNamedCharacterProperty).getChild(i)
|
||||
}
|
||||
|
||||
RegExpParent getParent() { result.getAChild() = this }
|
||||
|
||||
RegExp getRegExp() { result = re }
|
||||
|
||||
int getStart() { result = start }
|
||||
|
||||
int getEnd() { result = end }
|
||||
|
||||
override string toString() { result = re.getText().substring(start, end) }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpTerm" }
|
||||
|
||||
Location getLocation() { result = re.getLocation() }
|
||||
|
||||
pragma[noinline]
|
||||
private predicate componentHasLocationInfo(
|
||||
int i, string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
re.getComponent(i)
|
||||
.getLocation()
|
||||
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
exists(int re_start, int re_end |
|
||||
this.componentHasLocationInfo(0, filepath, startline, re_start, _, _) and
|
||||
this.componentHasLocationInfo(re.getNumberOfComponents() - 1, filepath, _, _, endline, re_end) and
|
||||
startcolumn = re_start + start and
|
||||
endcolumn = re_start + end - 1
|
||||
)
|
||||
}
|
||||
|
||||
File getFile() { result = this.getLocation().getFile() }
|
||||
|
||||
string getRawValue() { result = this.toString() }
|
||||
|
||||
RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
|
||||
|
||||
/** Gets the regular expression term that is matched (textually) before this one, if any. */
|
||||
RegExpTerm getPredecessor() {
|
||||
exists(RegExpTerm parent | parent = this.getParent() |
|
||||
result = parent.(RegExpSequence).previousElement(this)
|
||||
or
|
||||
not exists(parent.(RegExpSequence).previousElement(this)) and
|
||||
not parent instanceof RegExpSubPattern and
|
||||
result = parent.getPredecessor()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the regular expression term that is matched (textually) after this one, if any. */
|
||||
RegExpTerm getSuccessor() {
|
||||
exists(RegExpTerm parent | parent = this.getParent() |
|
||||
result = parent.(RegExpSequence).nextElement(this)
|
||||
or
|
||||
not exists(parent.(RegExpSequence).nextElement(this)) and
|
||||
not parent instanceof RegExpSubPattern and
|
||||
result = parent.getSuccessor()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
newtype TRegExpParent =
|
||||
TRegExpLiteral(RegExp re) or
|
||||
TRegExpQuantifier(RegExp re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
|
||||
TRegExpSequence(RegExp re, int start, int end) { re.sequence(start, end) } or
|
||||
TRegExpAlt(RegExp re, int start, int end) { re.alternation(start, end) } or
|
||||
TRegExpCharacterClass(RegExp re, int start, int end) { re.charSet(start, end) } or
|
||||
TRegExpCharacterRange(RegExp re, int start, int end) { re.charRange(_, start, _, _, end) } or
|
||||
TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or
|
||||
TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or
|
||||
TRegExpNormalChar(RegExp re, int start, int end) {
|
||||
re.normalCharacterSequence(start, end)
|
||||
or
|
||||
re.escapedCharacter(start, end) and
|
||||
not re.specialCharacter(start, end, _)
|
||||
} or
|
||||
TRegExpBackRef(RegExp re, int start, int end) { re.backreference(start, end) } or
|
||||
TRegExpNamedCharacterProperty(RegExp re, int start, int end) {
|
||||
re.namedCharacterProperty(start, end, _)
|
||||
}
|
||||
|
||||
class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
|
||||
int part_end;
|
||||
boolean may_repeat_forever;
|
||||
|
||||
RegExpQuantifier() {
|
||||
this = TRegExpQuantifier(re, start, end) and
|
||||
re.qualifiedPart(start, part_end, end, _, may_repeat_forever)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegExp() = re and
|
||||
result.getStart() = start and
|
||||
result.getEnd() = part_end
|
||||
}
|
||||
|
||||
predicate mayRepeatForever() { may_repeat_forever = true }
|
||||
|
||||
string getQualifier() { result = re.getText().substring(part_end, end) }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpQuantifier" }
|
||||
}
|
||||
|
||||
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
|
||||
InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "InfiniteRepetitionQuantifier" }
|
||||
}
|
||||
|
||||
class RegExpStar extends InfiniteRepetitionQuantifier {
|
||||
RegExpStar() { this.getQualifier().charAt(0) = "*" }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpStar" }
|
||||
}
|
||||
|
||||
class RegExpPlus extends InfiniteRepetitionQuantifier {
|
||||
RegExpPlus() { this.getQualifier().charAt(0) = "+" }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpPlus" }
|
||||
}
|
||||
|
||||
class RegExpOpt extends RegExpQuantifier {
|
||||
RegExpOpt() { this.getQualifier().charAt(0) = "?" }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpOpt" }
|
||||
}
|
||||
|
||||
class RegExpRange extends RegExpQuantifier {
|
||||
string upper;
|
||||
string lower;
|
||||
|
||||
RegExpRange() { re.multiples(part_end, end, lower, upper) }
|
||||
|
||||
string getUpper() { result = upper }
|
||||
|
||||
string getLower() { result = lower }
|
||||
|
||||
/**
|
||||
* Gets the upper bound of the range, if any.
|
||||
*
|
||||
* If there is no upper bound, any number of repetitions is allowed.
|
||||
* For a term of the form `r{lo}`, both the lower and the upper bound
|
||||
* are `lo`.
|
||||
*/
|
||||
int getUpperBound() { result = this.getUpper().toInt() }
|
||||
|
||||
/** Gets the lower bound of the range. */
|
||||
int getLowerBound() { result = this.getLower().toInt() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpRange" }
|
||||
}
|
||||
|
||||
class RegExpSequence extends RegExpTerm, TRegExpSequence {
|
||||
RegExpSequence() {
|
||||
this = TRegExpSequence(re, start, end) and
|
||||
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
|
||||
|
||||
/** Gets the element preceding `element` in this sequence. */
|
||||
RegExpTerm previousElement(RegExpTerm element) { element = this.nextElement(result) }
|
||||
|
||||
/** Gets the element following `element` in this sequence. */
|
||||
RegExpTerm nextElement(RegExpTerm element) {
|
||||
exists(int i |
|
||||
element = this.getChild(i) and
|
||||
result = this.getChild(i + 1)
|
||||
)
|
||||
}
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpSequence" }
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private int seqChildEnd(RegExp re, int start, int end, int i) {
|
||||
result = seqChild(re, start, end, i).getEnd()
|
||||
}
|
||||
|
||||
// moved out so we can use it in the charpred
|
||||
private RegExpTerm seqChild(RegExp re, int start, int end, int i) {
|
||||
re.sequence(start, end) and
|
||||
(
|
||||
i = 0 and
|
||||
result.getRegExp() = re and
|
||||
result.getStart() = start and
|
||||
exists(int itemEnd |
|
||||
re.item(start, itemEnd) and
|
||||
result.getEnd() = itemEnd
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegExp() = re and
|
||||
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
|
||||
result.getStart() = itemStart and
|
||||
re.item(itemStart, result.getEnd())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
class RegExpAlt extends RegExpTerm, TRegExpAlt {
|
||||
RegExpAlt() { this = TRegExpAlt(re, start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegExp() = re and
|
||||
result.getStart() = start and
|
||||
exists(int part_end |
|
||||
re.alternationOption(start, end, start, part_end) and
|
||||
result.getEnd() = part_end
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegExp() = re and
|
||||
exists(int part_start |
|
||||
part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
|
||||
|
|
||||
result.getStart() = part_start and
|
||||
re.alternationOption(start, end, part_start, result.getEnd())
|
||||
)
|
||||
}
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpAlt" }
|
||||
}
|
||||
|
||||
class RegExpCharEscape = RegExpEscape;
|
||||
|
||||
class RegExpEscape extends RegExpNormalChar {
|
||||
RegExpEscape() { re.escapedCharacter(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the name of the escaped; for example, `w` for `\w`.
|
||||
* TODO: Handle named escapes.
|
||||
*/
|
||||
override string getValue() {
|
||||
this.isIdentityEscape() and result = this.getUnescaped()
|
||||
or
|
||||
this.getUnescaped() = "n" and result = "\n"
|
||||
or
|
||||
this.getUnescaped() = "r" and result = "\r"
|
||||
or
|
||||
this.getUnescaped() = "t" and result = "\t"
|
||||
or
|
||||
this.isUnicode() and
|
||||
result = this.getUnicode()
|
||||
}
|
||||
|
||||
predicate isIdentityEscape() {
|
||||
not this.getUnescaped() in ["n", "r", "t"] and not this.isUnicode()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the text for this escape. That is e.g. "\w".
|
||||
*/
|
||||
private string getText() { result = re.getText().substring(start, end) }
|
||||
|
||||
/**
|
||||
* Holds if this is a unicode escape.
|
||||
*/
|
||||
private predicate isUnicode() { this.getText().prefix(2) = ["\\u", "\\U"] }
|
||||
|
||||
/**
|
||||
* Gets the unicode char for this escape.
|
||||
* E.g. for `\u0061` this returns "a".
|
||||
*/
|
||||
private string getUnicode() {
|
||||
this.isUnicode() and
|
||||
result = parseHexInt(this.getText().suffix(2)).toUnicode()
|
||||
}
|
||||
|
||||
string getUnescaped() { result = this.getText().suffix(1) }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpEscape" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A word boundary, that is, a regular expression term of the form `\b`.
|
||||
*/
|
||||
class RegExpWordBoundary extends RegExpSpecialChar {
|
||||
RegExpWordBoundary() { this.getChar() = "\\b" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A character class escape in a regular expression.
|
||||
* That is, an escaped character that denotes multiple characters.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* \w
|
||||
* \S
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterClassEscape extends RegExpEscape {
|
||||
RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W", "h", "H"] }
|
||||
|
||||
/** Gets the name of the character class; for example, `w` for `\w`. */
|
||||
// override string getValue() { result = value }
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpCharacterClassEscape" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A character class.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```rb
|
||||
* /[a-fA-F0-9]/
|
||||
* /[^abc]/
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
|
||||
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
|
||||
|
||||
predicate isInverted() { re.getChar(start + 1) = "^" }
|
||||
|
||||
predicate isUniversalClass() {
|
||||
// [^]
|
||||
this.isInverted() and not exists(this.getAChild())
|
||||
or
|
||||
// [\w\W] and similar
|
||||
not this.isInverted() and
|
||||
exists(string cce1, string cce2 |
|
||||
cce1 = this.getAChild().(RegExpCharacterClassEscape).getValue() and
|
||||
cce2 = this.getAChild().(RegExpCharacterClassEscape).getValue()
|
||||
|
|
||||
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
|
||||
)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegExp() = re and
|
||||
exists(int itemStart, int itemEnd |
|
||||
result.getStart() = itemStart and
|
||||
re.charSetStart(start, itemStart) and
|
||||
re.charSetChild(start, itemStart, itemEnd) and
|
||||
result.getEnd() = itemEnd
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegExp() = re and
|
||||
exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
|
||||
result.getStart() = itemStart and
|
||||
re.charSetChild(start, itemStart, result.getEnd())
|
||||
)
|
||||
}
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpCharacterClass" }
|
||||
}
|
||||
|
||||
class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
|
||||
int lower_end;
|
||||
int upper_start;
|
||||
|
||||
RegExpCharacterRange() {
|
||||
this = TRegExpCharacterRange(re, start, end) and
|
||||
re.charRange(_, start, lower_end, upper_start, end)
|
||||
}
|
||||
|
||||
predicate isRange(string lo, string hi) {
|
||||
lo = re.getText().substring(start, lower_end) and
|
||||
hi = re.getText().substring(upper_start, end)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegExp() = re and
|
||||
result.getStart() = start and
|
||||
result.getEnd() = lower_end
|
||||
or
|
||||
i = 1 and
|
||||
result.getRegExp() = re and
|
||||
result.getStart() = upper_start and
|
||||
result.getEnd() = end
|
||||
}
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpCharacterRange" }
|
||||
}
|
||||
|
||||
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
|
||||
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getValue() { result = re.getText().substring(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpNormalChar" }
|
||||
}
|
||||
|
||||
class RegExpConstant extends RegExpTerm {
|
||||
string value;
|
||||
|
||||
RegExpConstant() {
|
||||
this = TRegExpNormalChar(re, start, end) and
|
||||
not this instanceof RegExpCharacterClassEscape and
|
||||
// exclude chars in qualifiers
|
||||
// TODO: push this into regex library
|
||||
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
|
||||
qstart <= start and end <= qend
|
||||
) and
|
||||
value = this.(RegExpNormalChar).getValue()
|
||||
or
|
||||
this = TRegExpSpecialChar(re, start, end) and
|
||||
re.inCharSet(start) and
|
||||
value = this.(RegExpSpecialChar).getChar()
|
||||
}
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getValue() { result = value }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpConstant" }
|
||||
}
|
||||
|
||||
class RegExpGroup extends RegExpTerm, TRegExpGroup {
|
||||
RegExpGroup() { this = TRegExpGroup(re, start, end) }
|
||||
|
||||
/**
|
||||
* Gets the index of this capture group within the enclosing regular
|
||||
* expression literal.
|
||||
*
|
||||
* For example, in the regular expression `/((a?).)(?:b)/`, the
|
||||
* group `((a?).)` has index 1, the group `(a?)` nested inside it
|
||||
* has index 2, and the group `(?:b)` has no index, since it is
|
||||
* not a capture group.
|
||||
*/
|
||||
int getNumber() { result = re.getGroupNumber(start, end) }
|
||||
|
||||
/** Holds if this is a capture group. */
|
||||
predicate isCapture() { exists(this.getNumber()) }
|
||||
|
||||
/** Holds if this is a named capture group. */
|
||||
predicate isNamed() { exists(this.getName()) }
|
||||
|
||||
/** Gets the name of this capture group, if any. */
|
||||
string getName() { result = re.getGroupName(start, end) }
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getValue() { result = re.getText().substring(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
result.getRegExp() = re and
|
||||
i = 0 and
|
||||
re.groupContents(start, end, result.getStart(), result.getEnd())
|
||||
}
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpGroup" }
|
||||
}
|
||||
|
||||
class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
|
||||
string char;
|
||||
|
||||
RegExpSpecialChar() {
|
||||
this = TRegExpSpecialChar(re, start, end) and
|
||||
re.specialCharacter(start, end, char)
|
||||
}
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getChar() { result = char }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpSpecialChar" }
|
||||
}
|
||||
|
||||
class RegExpDot extends RegExpSpecialChar {
|
||||
RegExpDot() { this.getChar() = "." }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpDot" }
|
||||
}
|
||||
|
||||
class RegExpDollar extends RegExpSpecialChar {
|
||||
RegExpDollar() { this.getChar() = ["$", "\\Z", "\\z"] }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpDollar" }
|
||||
}
|
||||
|
||||
class RegExpCaret extends RegExpSpecialChar {
|
||||
RegExpCaret() { this.getChar() = ["^", "\\A"] }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpCaret" }
|
||||
}
|
||||
|
||||
class RegExpZeroWidthMatch extends RegExpGroup {
|
||||
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
|
||||
|
||||
override predicate isCharacter() { any() }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpZeroWidthMatch" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width lookahead or lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?=\w)
|
||||
* (?!\n)
|
||||
* (?<=\.)
|
||||
* (?<!\\)
|
||||
* ```
|
||||
*/
|
||||
class RegExpSubPattern extends RegExpZeroWidthMatch {
|
||||
RegExpSubPattern() { not re.emptyGroup(start, end) }
|
||||
|
||||
/** Gets the lookahead term. */
|
||||
RegExpTerm getOperand() {
|
||||
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
|
||||
result.getRegExp() = re and
|
||||
result.getStart() = in_start and
|
||||
result.getEnd() = in_end
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
abstract class RegExpLookahead extends RegExpSubPattern { }
|
||||
|
||||
class RegExpPositiveLookahead extends RegExpLookahead {
|
||||
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpPositiveLookahead" }
|
||||
}
|
||||
|
||||
class RegExpNegativeLookahead extends RegExpLookahead {
|
||||
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpNegativeLookahead" }
|
||||
}
|
||||
|
||||
abstract class RegExpLookbehind extends RegExpSubPattern { }
|
||||
|
||||
class RegExpPositiveLookbehind extends RegExpLookbehind {
|
||||
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpPositiveLookbehind" }
|
||||
}
|
||||
|
||||
class RegExpNegativeLookbehind extends RegExpLookbehind {
|
||||
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpNegativeLookbehind" }
|
||||
}
|
||||
|
||||
class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
|
||||
RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
|
||||
|
||||
/**
|
||||
* Gets the number of the capture group this back reference refers to, if any.
|
||||
*/
|
||||
int getNumber() { result = re.getBackRefNumber(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the name of the capture group this back reference refers to, if any.
|
||||
*/
|
||||
string getName() { result = re.getBackRefName(start, end) }
|
||||
|
||||
/** Gets the capture group this back reference refers to. */
|
||||
RegExpGroup getGroup() {
|
||||
result.getLiteral() = this.getLiteral() and
|
||||
(
|
||||
result.getNumber() = this.getNumber() or
|
||||
result.getName() = this.getName()
|
||||
)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpBackRef" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A named character property. For example, the POSIX bracket expression
|
||||
* `[[:digit:]]`.
|
||||
*/
|
||||
class RegExpNamedCharacterProperty extends RegExpTerm, TRegExpNamedCharacterProperty {
|
||||
RegExpNamedCharacterProperty() { this = TRegExpNamedCharacterProperty(re, start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpNamedCharacterProperty" }
|
||||
|
||||
/**
|
||||
* Gets the property name. For example, in `\p{Space}`, the result is
|
||||
* `"Space"`.
|
||||
*/
|
||||
string getName() { result = re.getCharacterPropertyName(start, end) }
|
||||
|
||||
/**
|
||||
* Holds if the property is inverted. For example, it holds for `\p{^Digit}`,
|
||||
* which matches non-digits.
|
||||
*/
|
||||
predicate isInverted() { re.namedCharacterPropertyIsInverted(start, end) }
|
||||
}
|
||||
|
||||
RegExpTerm getParsedRegExp(AST::RegExpLiteral re) {
|
||||
result.getRegExp() = re and result.isRootTerm()
|
||||
}
|
||||
|
||||
/**
|
||||
* A node whose value may flow to a position where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
abstract class RegExpPatternSource extends DataFlow::Node {
|
||||
/**
|
||||
* Gets a node where the pattern of this node is parsed as a part of
|
||||
* a regular expression.
|
||||
*/
|
||||
abstract DataFlow::Node getAParse();
|
||||
|
||||
/**
|
||||
* Gets the root term of the regular expression parsed from this pattern.
|
||||
*/
|
||||
abstract RegExpTerm getRegExpTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression literal, viewed as the pattern source for itself.
|
||||
*/
|
||||
private class RegExpLiteralPatternSource extends RegExpPatternSource {
|
||||
private AST::RegExpLiteral astNode;
|
||||
|
||||
RegExpLiteralPatternSource() { astNode = this.asExpr().getExpr() }
|
||||
|
||||
override DataFlow::Node getAParse() { result = this }
|
||||
|
||||
override RegExpTerm getRegExpTerm() { result = astNode.getParsed() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node whose string value may flow to a position where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
private class StringRegExpPatternSource extends RegExpPatternSource {
|
||||
private DataFlow::Node parse;
|
||||
|
||||
StringRegExpPatternSource() { this = regExpSource(parse) }
|
||||
|
||||
override DataFlow::Node getAParse() { result = parse }
|
||||
|
||||
override RegExpTerm getRegExpTerm() { result.getRegExp() = this.asExpr().getExpr() }
|
||||
}
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
import codeql.ruby.security.performance.RegExpTreeView
|
||||
import codeql.ruby.Regexp
|
||||
import codeql.ruby.DataFlow
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import codeql.ruby.security.performance.ExponentialBackTracking
|
||||
import codeql.ruby.security.performance.ReDoSUtil
|
||||
import codeql.ruby.security.performance.RegExpTreeView
|
||||
import codeql.ruby.Regexp
|
||||
|
||||
from RegExpTerm t, string pump, State s, string prefixMsg
|
||||
where hasReDoSResult(t, pump, s, prefixMsg)
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
*/
|
||||
|
||||
import codeql.Locations
|
||||
import codeql.ruby.security.performance.RegExpTreeView as RETV
|
||||
import codeql.ruby.Regexp as RE
|
||||
|
||||
query predicate nodes(RETV::RegExpTerm n, string attr, string val) {
|
||||
query predicate nodes(RE::RegExpTerm n, string attr, string val) {
|
||||
attr = "semmle.label" and
|
||||
val = "[" + concat(n.getAPrimaryQlClass(), ", ") + "] " + n.toString()
|
||||
or
|
||||
@@ -13,7 +13,7 @@ query predicate nodes(RETV::RegExpTerm n, string attr, string val) {
|
||||
val =
|
||||
any(int i |
|
||||
n =
|
||||
rank[i](RETV::RegExpTerm t, string fp, int sl, int sc, int el, int ec |
|
||||
rank[i](RE::RegExpTerm t, string fp, int sl, int sc, int el, int ec |
|
||||
t.hasLocationInfo(fp, sl, sc, el, ec)
|
||||
|
|
||||
t order by fp, sl, sc, el, ec, t.toString()
|
||||
@@ -21,7 +21,7 @@ query predicate nodes(RETV::RegExpTerm n, string attr, string val) {
|
||||
).toString()
|
||||
}
|
||||
|
||||
query predicate edges(RETV::RegExpTerm pred, RETV::RegExpTerm succ, string attr, string val) {
|
||||
query predicate edges(RE::RegExpTerm pred, RE::RegExpTerm succ, string attr, string val) {
|
||||
attr in ["semmle.label", "semmle.order"] and
|
||||
val = any(int i | succ = pred.getChild(i)).toString()
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import codeql.ruby.security.performance.RegExpTreeView
|
||||
import codeql.ruby.Regexp
|
||||
|
||||
query predicate groupName(RegExpGroup g, string name) { name = g.getName() }
|
||||
|
||||
|
||||
@@ -33,7 +33,9 @@
|
||||
| tst.rb:137:11:137:17 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
|
||||
| tst.rb:143:11:143:18 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.rb:146:11:146:17 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
|
||||
| tst.rb:155:11:155:20 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'f'. |
|
||||
| tst.rb:149:11:149:20 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
|
||||
| tst.rb:152:11:152:24 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000b'. |
|
||||
| tst.rb:155:11:155:20 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
|
||||
| tst.rb:158:11:158:18 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
|
||||
| tst.rb:161:11:161:18 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.rb:164:11:164:20 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
|
||||
Reference in New Issue
Block a user