Ruby: refactor regex libraries

This commit is contained in:
Arthur Baars
2022-03-18 17:49:41 +01:00
parent 496aab78a7
commit 74aea81fe3
15 changed files with 1318 additions and 879 deletions

View File

@@ -552,7 +552,7 @@ class RegExpWordBoundary extends RegExpSpecialChar {
/**
* A character class escape in a regular expression.
* That is, an escaped charachter that denotes multiple characters.
* That is, an escaped character that denotes multiple characters.
*
* Examples:
*

View File

@@ -186,7 +186,7 @@ abstract class RegexString extends Expr {
)
}
/** Hold is a character set starts between `start` and `end`. */
/** Holds if a character set starts between `start` and `end`. */
predicate char_set_start(int start, int end) {
this.char_set_start(start) = true and
(
@@ -314,8 +314,10 @@ abstract class RegexString extends Expr {
result = this.(Bytes).getS()
}
/** Gets the `i`th character of this regex */
string getChar(int i) { result = this.getText().charAt(i) }
/** Gets the `i`th character of this regex, unless it is part of an character escape sequence. */
string nonEscapedCharAt(int i) {
result = this.getText().charAt(i) and
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
@@ -327,6 +329,9 @@ abstract class RegexString extends Expr {
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
/**
* Holds if the `i`th character could not be parsed.
*/
predicate failedToParse(int i) {
exists(this.getChar(i)) and
not exists(int start, int end |
@@ -415,6 +420,9 @@ abstract class RegexString extends Expr {
)
}
/**
* Holds if a simple or escaped character is found between `start` and `end`.
*/
predicate character(int start, int end) {
(
this.simpleCharacter(start, end) and
@@ -426,12 +434,18 @@ abstract class RegexString extends Expr {
not exists(int x, int y | this.backreference(x, y) and x <= start and y >= end)
}
/**
* Holds if a normal character is found between `start` and `end`.
*/
predicate normalCharacter(int start, int end) {
end = start + 1 and
this.character(start, end) and
not this.specialCharacter(start, end, _)
}
/**
* Holds if a special character is found between `start` and `end`.
*/
predicate specialCharacter(int start, int end, string char) {
not this.inCharSet(start) and
this.character(start, end) and
@@ -490,7 +504,7 @@ abstract class RegexString extends Expr {
this.specialCharacter(start, end, _)
}
/** Whether the text in the range start,end is a group */
/** Whether the text in the range `start,end` is a group */
predicate group(int start, int end) {
this.groupContents(start, end, _, _)
or
@@ -609,6 +623,7 @@ abstract class RegexString extends Expr {
this.simple_group_start(start, end)
}
/** Matches the start of a non-capturing group, e.g. `(?:` */
private predicate non_capturing_group_start(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
@@ -616,12 +631,18 @@ abstract class RegexString extends Expr {
end = start + 3
}
/** Matches the start of a simple group, e.g. `(a+)`. */
private predicate simple_group_start(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) != "?" and
end = start + 1
}
/**
* Matches the start of a named group, such as:
* - `(?<name>\w+)`
* - `(?'name'\w+)`
*/
private predicate named_group_start(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
@@ -673,6 +694,7 @@ abstract class RegexString extends Expr {
)
}
/** Matches the start of a positive lookahead assertion, i.e. `(?=`. */
private predicate lookahead_assertion_start(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
@@ -680,6 +702,7 @@ abstract class RegexString extends Expr {
end = start + 3
}
/** Matches the start of a negative lookahead assertion, i.e. `(?!`. */
private predicate negative_lookahead_assertion_start(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
@@ -687,6 +710,7 @@ abstract class RegexString extends Expr {
end = start + 3
}
/** Matches the start of a positive lookbehind assertion, i.e. `(?<=`. */
private predicate lookbehind_assertion_start(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
@@ -695,6 +719,7 @@ abstract class RegexString extends Expr {
end = start + 4
}
/** Matches the start of a negative lookbehind assertion, i.e. `(?<!`. */
private predicate negative_lookbehind_assertion_start(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
@@ -703,6 +728,7 @@ abstract class RegexString extends Expr {
end = start + 4
}
/** Matches the start of a comment group, i.e. `(?#`. */
private predicate comment_group_start(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
@@ -710,6 +736,7 @@ abstract class RegexString extends Expr {
end = start + 3
}
/** Matches the contents of a group. */
predicate groupContents(int start, int end, int in_start, int in_end) {
this.group_start(start, in_start) and
end = in_end + 1 and
@@ -717,12 +744,14 @@ abstract class RegexString extends Expr {
this.isGroupEnd(in_end)
}
/** Matches a named backreference, e.g. `\k<foo>`. */
private predicate named_backreference(int start, int end, string name) {
this.named_backreference_start(start, start + 4) and
end = min(int i | i > start + 4 and this.getChar(i) = ")") + 1 and
name = this.getText().substring(start + 4, end - 2)
}
/** Matches a numbered backreference, e.g. `\1`. */
private predicate numbered_backreference(int start, int end, int value) {
this.escapingChar(start) and
// starting with 0 makes it an octal escape
@@ -747,7 +776,7 @@ abstract class RegexString extends Expr {
)
}
/** Whether the text in the range start,end is a back reference */
/** Whether the text in the range `start,end` is a back reference */
predicate backreference(int start, int end) {
this.numbered_backreference(start, end, _)
or

View File

@@ -1,4 +1,4 @@
import codeql.ruby.security.performance.RegExpTreeView
import codeql.ruby.Regexp
query predicate nonUniqueChild(RegExpParent parent, int i, RegExpTerm child) {
child = parent.getChild(i) and

View File

@@ -0,0 +1,143 @@
/**
* Provides classes for working with regular expressions.
*
* Regular expression literals are represented as an abstract syntax tree of regular expression
* terms.
*/
import regexp.RegExpTreeView // re-export
private import regexp.ParseRegExp
private import codeql.ruby.ast.Literal as AST
private import codeql.ruby.DataFlow
private import codeql.ruby.controlflow.CfgNodes
private import codeql.ruby.ApiGraphs
private import codeql.ruby.dataflow.internal.tainttrackingforlibraries.TaintTrackingImpl
/**
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
}
/**
* A node whose value may flow to a position where it is interpreted
* as a part of a regular expression.
*/
abstract class RegExpPatternSource extends DataFlow::Node {
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
abstract DataFlow::Node getAParse();
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
abstract RegExpTerm getRegExpTerm();
}
/**
* A regular expression literal, viewed as the pattern source for itself.
*/
private class RegExpLiteralPatternSource extends RegExpPatternSource {
private AST::RegExpLiteral astNode;
RegExpLiteralPatternSource() { astNode = this.asExpr().getExpr() }
override DataFlow::Node getAParse() { result = this }
override RegExpTerm getRegExpTerm() { result = astNode.getParsed() }
}
/**
* A node whose string value may flow to a position where it is interpreted
* as a part of a regular expression.
*/
private class StringRegExpPatternSource extends RegExpPatternSource {
private DataFlow::Node parse;
StringRegExpPatternSource() { this = regExpSource(parse) }
override DataFlow::Node getAParse() { result = parse }
override RegExpTerm getRegExpTerm() { result.getRegExp() = this.asExpr().getExpr() }
}
private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
override predicate isDotAll() { this.hasMultilineFlag() }
override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
override string getFlags() { result = this.getFlagString() }
}
private class ParsedStringRegExp extends RegExp {
private DataFlow::Node parse;
ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
DataFlow::Node getAParse() { result = parse }
override predicate isDotAll() { none() }
override predicate isIgnoreCase() { none() }
override string getFlags() { none() }
}
/**
* Holds if `source` may be interpreted as a regular expression.
*/
private predicate isInterpretedAsRegExp(DataFlow::Node source) {
// The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
or
// The argument of a call that coerces the argument to a regular expression.
exists(DataFlow::CallNode mce |
mce.getMethodName() = ["match", "match?"] and
source = mce.getArgument(0) and
// exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
not mce.getReceiver().asExpr().getExpr() instanceof AST::RegExpLiteral
)
}
private class RegExpConfiguration extends Configuration {
RegExpConfiguration() { this = "RegExpConfiguration" }
override predicate isSource(DataFlow::Node source) {
source.asExpr() =
any(ExprCfgNode e |
e.getConstantValue().isString(_) and
not e instanceof ExprNodes::VariableReadAccessCfgNode and
not e instanceof ExprNodes::ConstantReadAccessCfgNode
)
}
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
override predicate isSanitizer(DataFlow::Node node) {
// stop flow if `node` is receiver of
// https://ruby-doc.org/core-2.4.0/String.html#method-i-match
exists(DataFlow::CallNode mce |
mce.getMethodName() = ["match", "match?"] and
node = mce.getReceiver() and
mce.getArgument(0).asExpr().getExpr() instanceof AST::RegExpLiteral
)
}
}
/**
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
* as a part of a regular expression.
*/
cached
DataFlow::Node regExpSource(DataFlow::Node re) {
exists(RegExpConfiguration c | c.hasFlow(result, re))
}

View File

@@ -1,5 +1,5 @@
private import codeql.ruby.AST
private import codeql.ruby.security.performance.RegExpTreeView as RETV
private import codeql.ruby.Regexp as RE
private import internal.AST
private import internal.Constant
private import internal.Literal
@@ -594,7 +594,7 @@ class RegExpLiteral extends StringlikeLiteral, TRegExpLiteral {
final predicate hasFreeSpacingFlag() { this.getFlagString().charAt(_) = "x" }
/** Returns the root node of the parse tree of this regular expression. */
final RETV::RegExpTerm getParsed() { result = RETV::getParsedRegExp(this) }
final RE::RegExpTerm getParsed() { result = RE::getParsedRegExp(this) }
}
/**

View File

@@ -7,7 +7,7 @@
*/
private import AST
private import codeql.ruby.security.performance.RegExpTreeView as RETV
private import codeql.ruby.Regexp as RE
private import codeql.ruby.ast.internal.Synthesis
/**
@@ -37,7 +37,7 @@ private predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode ch
newtype TPrintNode =
TPrintRegularAstNode(AstNode n) { shouldPrintNode(n) } or
TPrintRegExpNode(RETV::RegExpTerm term) {
TPrintRegExpNode(RE::RegExpTerm term) {
exists(RegExpLiteral literal |
shouldPrintNode(literal) and
term.getRootTerm() = literal.getParsed()
@@ -107,7 +107,7 @@ class PrintRegularAstNode extends PrintAstNode, TPrintRegularAstNode {
or
// If this AST node is a regexp literal, add the parsed regexp tree as a
// child.
exists(RETV::RegExpTerm t | t = astNode.(RegExpLiteral).getParsed() |
exists(RE::RegExpTerm t | t = astNode.(RegExpLiteral).getParsed() |
result = TPrintRegExpNode(t) and edgeName = "getParsed"
)
}
@@ -134,7 +134,7 @@ class PrintRegularAstNode extends PrintAstNode, TPrintRegularAstNode {
/** A parsed regexp node in the output tree. */
class PrintRegExpNode extends PrintAstNode, TPrintRegExpNode {
RETV::RegExpTerm regexNode;
RE::RegExpTerm regexNode;
PrintRegExpNode() { this = TPrintRegExpNode(regexNode) }
@@ -147,7 +147,7 @@ class PrintRegExpNode extends PrintAstNode, TPrintRegExpNode {
exists(int i | result = TPrintRegExpNode(regexNode.getChild(i)) and edgeName = i.toString())
}
override int getOrder() { exists(RETV::RegExpTerm p | p.getChild(result) = regexNode) }
override int getOrder() { exists(RE::RegExpTerm p | p.getChild(result) = regexNode) }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -7,10 +7,6 @@
private import codeql.ruby.ast.Literal as AST
private import codeql.Locations
private import codeql.ruby.DataFlow
private import codeql.ruby.controlflow.CfgNodes
private import codeql.ruby.ApiGraphs
private import codeql.ruby.dataflow.internal.tainttrackingforlibraries.TaintTrackingImpl
/**
* A `StringlikeLiteral` containing a regular expression term, that is, either
@@ -116,6 +112,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
)
}
/** Holds if a character set starts between `start` and `end`. */
predicate charSetStart(int start, int end) {
this.charSetStart(start) = true and
(
@@ -145,14 +142,21 @@ abstract class RegExp extends AST::StringlikeLiteral {
)
}
predicate charSetToken(int charsetStart, int index, int tokenStart, int tokenEnd) {
/**
* Holds if the character set starting at `charsetStart` contains either
* a character or a `-` found between `start` and `end`.
*/
private predicate charSetToken(int charsetStart, int index, int tokenStart, int tokenEnd) {
tokenStart =
rank[index](int start, int end | this.charSetToken(charsetStart, start, end) | start) and
this.charSetToken(charsetStart, tokenStart, tokenEnd)
}
/** Either a char or a - */
predicate charSetToken(int charsetStart, int start, int end) {
/**
* Holds if the character set starting at `charsetStart` contains either
* a character or a `-` found between `start` and `end`.
*/
private predicate charSetToken(int charsetStart, int start, int end) {
this.charSetStart(charsetStart, start) and
(
this.escapedCharacter(start, end)
@@ -174,6 +178,10 @@ abstract class RegExp extends AST::StringlikeLiteral {
)
}
/**
* Holds if the character set starting at `charsetStart` contains either
* a character or a range found between `start` and `end`.
*/
predicate charSetChild(int charsetStart, int start, int end) {
this.charSetToken(charsetStart, start, end) and
not exists(int rangeStart, int rangeEnd |
@@ -185,6 +193,11 @@ abstract class RegExp extends AST::StringlikeLiteral {
this.charRange(charsetStart, start, _, _, end)
}
/**
* Holds if the character set starting at `charset_start` contains a character range
* with lower bound found between `start` and `lower_end`
* and upper bound found between `upper_start` and `end`.
*/
predicate charRange(int charsetStart, int start, int lowerEnd, int upperStart, int end) {
exists(int index |
this.charRangeEnd(charsetStart, index) = true and
@@ -193,6 +206,13 @@ abstract class RegExp extends AST::StringlikeLiteral {
)
}
/**
* Helper predicate for `charRange`.
* We can determine where character ranges end by a left to right sweep.
*
* To avoid negative recursion we return a boolean. See `escaping`,
* the helper for `escapingChar`, for a clean use of this pattern.
*/
private boolean charRangeEnd(int charsetStart, int index) {
this.charSetToken(charsetStart, index, _, _) and
(
@@ -216,8 +236,15 @@ abstract class RegExp extends AST::StringlikeLiteral {
)
}
/** Holds if the character at `pos` is a "\" that is actually escaping what comes after. */
predicate escapingChar(int pos) { this.escaping(pos) = true }
/**
* Helper predicate for `escapingChar`.
* In order to avoid negative recusrion, we return a boolean.
* This way, we can refer to `escaping(pos - 1).booleanNot()`
* rather than to a negated version of `escaping(pos)`.
*/
private boolean escaping(int pos) {
pos = -1 and result = false
or
@@ -229,8 +256,10 @@ abstract class RegExp extends AST::StringlikeLiteral {
/** Gets the text of this regex */
string getText() { result = this.getConstantValue().getString() }
/** Gets the `i`th character of this regex */
string getChar(int i) { result = this.getText().charAt(i) }
/** Gets the `i`th character of this regex, unless it is part of an character escape sequence. */
string nonEscapedCharAt(int i) {
result = this.getText().charAt(i) and
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
@@ -242,6 +271,9 @@ abstract class RegExp extends AST::StringlikeLiteral {
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
/**
* Holds if the `i`th character could not be parsed.
*/
predicate failedToParse(int i) {
exists(this.getChar(i)) and
not exists(int start, int end |
@@ -331,6 +363,11 @@ abstract class RegExp extends AST::StringlikeLiteral {
this.getChar(start + 3) = "^"
}
/**
* Holds if an escaped character is found between `start` and `end`.
* Escaped characters include hex values, octal values and named escapes,
* but excludes backreferences.
*/
predicate escapedCharacter(int start, int end) {
this.escapingChar(start) and
not this.numberedBackreference(start, _, _) and
@@ -350,17 +387,25 @@ abstract class RegExp extends AST::StringlikeLiteral {
)
}
/**
* Holds if the character at `index` is inside a character set.
*/
predicate inCharSet(int index) {
exists(int x, int y | this.charSet(x, y) and index in [x + 1 .. y - 2])
}
/**
* Holds if the character at `index` is inside a posix bracket.
*/
predicate inPosixBracket(int index) {
exists(int x, int y |
this.posixStyleNamedCharacterProperty(x, y, _) and index in [x + 1 .. y - 2]
)
}
/** 'Simple' characters are any that don't alter the parsing of the regex. */
/**
* 'simple' characters are any that don't alter the parsing of the regex.
*/
private predicate simpleCharacter(int start, int end) {
end = start + 1 and
not this.charSet(start, _) and
@@ -391,6 +436,9 @@ abstract class RegExp extends AST::StringlikeLiteral {
)
}
/**
* Holds if a simple or escaped character is found between `start` and `end`.
*/
predicate character(int start, int end) {
(
this.simpleCharacter(start, end) and
@@ -406,12 +454,18 @@ abstract class RegExp extends AST::StringlikeLiteral {
not exists(int x, int y | this.multiples(x, y, _, _) and x <= start and y >= end)
}
/**
* Holds if a normal character is found between `start` and `end`.
*/
predicate normalCharacter(int start, int end) {
end = start + 1 and
this.character(start, end) and
not this.specialCharacter(start, end, _)
}
/**
* Holds if a special character is found between `start` and `end`.
*/
predicate specialCharacter(int start, int end, string char) {
this.character(start, end) and
not this.inCharSet(start) and
@@ -505,6 +559,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
this.positiveLookbehindAssertionGroup(start, end)
}
/** Holds if an empty group is found between `start` and `end`. */
predicate emptyGroup(int start, int end) {
exists(int endm1 | end = endm1 + 1 |
this.groupStart(start, endm1) and
@@ -538,24 +593,28 @@ abstract class RegExp extends AST::StringlikeLiteral {
)
}
/** Holds if a negative lookahead is found between `start` and `end` */
predicate negativeLookaheadAssertionGroup(int start, int end) {
exists(int inStart | this.negativeLookaheadAssertionStart(start, inStart) |
this.groupContents(start, end, inStart, _)
)
}
/** Holds if a negative lookbehind is found between `start` and `end` */
predicate negativeLookbehindAssertionGroup(int start, int end) {
exists(int inStart | this.negativeLookbehindAssertionStart(start, inStart) |
this.groupContents(start, end, inStart, _)
)
}
/** Holds if a positive lookahead is found between `start` and `end` */
predicate positiveLookaheadAssertionGroup(int start, int end) {
exists(int inStart | this.lookaheadAssertionStart(start, inStart) |
this.groupContents(start, end, inStart, _)
)
}
/** Holds if a positive lookbehind is found between `start` and `end` */
predicate positiveLookbehindAssertionGroup(int start, int end) {
exists(int inStart | this.lookbehindAssertionStart(start, inStart) |
this.groupContents(start, end, inStart, _)
@@ -661,6 +720,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
end = start + 3
}
/** Matches the contents of a group. */
predicate groupContents(int start, int end, int inStart, int inEnd) {
this.groupStart(start, inStart) and
end = inEnd + 1 and
@@ -747,6 +807,11 @@ abstract class RegExp extends AST::StringlikeLiteral {
)
}
/**
* Holds if a repetition quantifier is found between `start` and `end`,
* with the given lower and upper bounds. If a bound is omitted, the corresponding
* string is empty.
*/
predicate multiples(int start, int end, string lower, string upper) {
exists(string text, string match, string inner |
text = this.getText() and
@@ -774,6 +839,13 @@ abstract class RegExp extends AST::StringlikeLiteral {
this.qualifiedPart(start, _, end, maybeEmpty, mayRepeatForever)
}
/**
* Holds if a qualified part is found between `start` and `part_end` and the qualifier is
* found between `part_end` and `end`.
*
* `maybe_empty` is true if the part is optional.
* `may_repeat_forever` is true if the part may be repeated unboundedly.
*/
predicate qualifiedPart(
int start, int partEnd, int end, boolean maybeEmpty, boolean mayRepeatForever
) {
@@ -781,6 +853,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
this.qualifier(partEnd, end, maybeEmpty, mayRepeatForever)
}
/** Holds if the range `start`, `end` contains a character, a quantifier, a character set or a group. */
predicate item(int start, int end) {
this.qualifiedItem(start, end, _, _)
or
@@ -960,75 +1033,3 @@ abstract class RegExp extends AST::StringlikeLiteral {
this.lastPart(start, end)
}
}
private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
override predicate isDotAll() { this.hasMultilineFlag() }
override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
override string getFlags() { result = this.getFlagString() }
}
private class ParsedStringRegExp extends RegExp {
private DataFlow::Node parse;
ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
DataFlow::Node getAParse() { result = parse }
override predicate isDotAll() { none() }
override predicate isIgnoreCase() { none() }
override string getFlags() { none() }
}
/**
* Holds if `source` may be interpreted as a regular expression.
*/
private predicate isInterpretedAsRegExp(DataFlow::Node source) {
// The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
or
// The argument of a call that coerces the argument to a regular expression.
exists(DataFlow::CallNode mce |
mce.getMethodName() = ["match", "match?"] and
source = mce.getArgument(0) and
// exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
not mce.getReceiver().asExpr().getExpr() instanceof AST::RegExpLiteral
)
}
private class RegExpConfiguration extends Configuration {
RegExpConfiguration() { this = "RegExpConfiguration" }
override predicate isSource(DataFlow::Node source) {
source.asExpr() =
any(ExprCfgNode e |
e.getConstantValue().isString(_) and
not e instanceof ExprNodes::VariableReadAccessCfgNode and
not e instanceof ExprNodes::ConstantReadAccessCfgNode
)
}
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
override predicate isSanitizer(DataFlow::Node node) {
// stop flow if `node` is receiver of
// https://ruby-doc.org/core-2.4.0/String.html#method-i-match
exists(DataFlow::CallNode mce |
mce.getMethodName() = ["match", "match?"] and
node = mce.getReceiver() and
mce.getArgument(0).asExpr().getExpr() instanceof AST::RegExpLiteral
)
}
}
/**
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
* as a part of a regular expression.
*/
cached
DataFlow::Node regExpSource(DataFlow::Node re) {
exists(RegExpConfiguration c | c.hasFlow(result, re))
}

File diff suppressed because it is too large Load Diff

View File

@@ -8,8 +8,7 @@ private import codeql.ruby.AST as AST
private import codeql.ruby.CFG
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.security.performance.ParseRegExp as RegExp
private import codeql.ruby.security.performance.RegExpTreeView
private import codeql.ruby.Regexp
private import codeql.ruby.security.performance.SuperlinearBackTracking
module PolynomialReDoS {

View File

@@ -1,8 +1,10 @@
private import codeql.ruby.ast.Literal as AST
private import ParseRegExp
private import codeql.NumberUtils
/**
* This module should provide a class hierarchy corresponding to a parse tree of regular expressions.
*/
import codeql.ruby.regexp.RegExpTreeView
import codeql.Locations
private import codeql.ruby.DataFlow
private import codeql.ruby.ast.Literal as AST
/**
* Holds if `term` is an ecape class representing e.g. `\d`.
@@ -59,776 +61,3 @@ module RegExpFlags {
root.getLiteral().isDotAll()
}
}
/**
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
}
/**
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
* or another regular expression term.
*/
class RegExpParent extends TRegExpParent {
string toString() { result = "RegExpParent" }
RegExpTerm getChild(int i) { none() }
final RegExpTerm getAChild() { result = this.getChild(_) }
int getNumChild() { result = count(this.getAChild()) }
/**
* Gets the name of a primary CodeQL class to which this regular
* expression term belongs.
*/
string getAPrimaryQlClass() { result = "RegExpParent" }
/**
* Gets a comma-separated list of the names of the primary CodeQL classes to
* which this regular expression term belongs.
*/
final string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") }
}
class RegExpLiteral extends TRegExpLiteral, RegExpParent {
RegExp re;
RegExpLiteral() { this = TRegExpLiteral(re) }
override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() }
predicate isDotAll() { re.isDotAll() }
predicate isIgnoreCase() { re.isIgnoreCase() }
string getFlags() { result = re.getFlags() }
override string getAPrimaryQlClass() { result = "RegExpLiteral" }
}
class RegExpTerm extends RegExpParent {
RegExp re;
int start;
int end;
RegExpTerm() {
this = TRegExpAlt(re, start, end)
or
this = TRegExpBackRef(re, start, end)
or
this = TRegExpCharacterClass(re, start, end)
or
this = TRegExpCharacterRange(re, start, end)
or
this = TRegExpNormalChar(re, start, end)
or
this = TRegExpGroup(re, start, end)
or
this = TRegExpQuantifier(re, start, end)
or
this = TRegExpSequence(re, start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
or
this = TRegExpSpecialChar(re, start, end)
or
this = TRegExpNamedCharacterProperty(re, start, end)
}
RegExpTerm getRootTerm() {
this.isRootTerm() and result = this
or
result = this.getParent().(RegExpTerm).getRootTerm()
}
predicate isUsedAsRegExp() { any() }
predicate isRootTerm() { start = 0 and end = re.getText().length() }
override RegExpTerm getChild(int i) {
result = this.(RegExpAlt).getChild(i)
or
result = this.(RegExpBackRef).getChild(i)
or
result = this.(RegExpCharacterClass).getChild(i)
or
result = this.(RegExpCharacterRange).getChild(i)
or
result = this.(RegExpNormalChar).getChild(i)
or
result = this.(RegExpGroup).getChild(i)
or
result = this.(RegExpQuantifier).getChild(i)
or
result = this.(RegExpSequence).getChild(i)
or
result = this.(RegExpSpecialChar).getChild(i)
or
result = this.(RegExpNamedCharacterProperty).getChild(i)
}
RegExpParent getParent() { result.getAChild() = this }
RegExp getRegExp() { result = re }
int getStart() { result = start }
int getEnd() { result = end }
override string toString() { result = re.getText().substring(start, end) }
override string getAPrimaryQlClass() { result = "RegExpTerm" }
Location getLocation() { result = re.getLocation() }
pragma[noinline]
private predicate componentHasLocationInfo(
int i, string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
re.getComponent(i)
.getLocation()
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(int re_start, int re_end |
this.componentHasLocationInfo(0, filepath, startline, re_start, _, _) and
this.componentHasLocationInfo(re.getNumberOfComponents() - 1, filepath, _, _, endline, re_end) and
startcolumn = re_start + start and
endcolumn = re_start + end - 1
)
}
File getFile() { result = this.getLocation().getFile() }
string getRawValue() { result = this.toString() }
RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
/** Gets the regular expression term that is matched (textually) before this one, if any. */
RegExpTerm getPredecessor() {
exists(RegExpTerm parent | parent = this.getParent() |
result = parent.(RegExpSequence).previousElement(this)
or
not exists(parent.(RegExpSequence).previousElement(this)) and
not parent instanceof RegExpSubPattern and
result = parent.getPredecessor()
)
}
/** Gets the regular expression term that is matched (textually) after this one, if any. */
RegExpTerm getSuccessor() {
exists(RegExpTerm parent | parent = this.getParent() |
result = parent.(RegExpSequence).nextElement(this)
or
not exists(parent.(RegExpSequence).nextElement(this)) and
not parent instanceof RegExpSubPattern and
result = parent.getSuccessor()
)
}
}
newtype TRegExpParent =
TRegExpLiteral(RegExp re) or
TRegExpQuantifier(RegExp re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
TRegExpSequence(RegExp re, int start, int end) { re.sequence(start, end) } or
TRegExpAlt(RegExp re, int start, int end) { re.alternation(start, end) } or
TRegExpCharacterClass(RegExp re, int start, int end) { re.charSet(start, end) } or
TRegExpCharacterRange(RegExp re, int start, int end) { re.charRange(_, start, _, _, end) } or
TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or
TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or
TRegExpNormalChar(RegExp re, int start, int end) {
re.normalCharacterSequence(start, end)
or
re.escapedCharacter(start, end) and
not re.specialCharacter(start, end, _)
} or
TRegExpBackRef(RegExp re, int start, int end) { re.backreference(start, end) } or
TRegExpNamedCharacterProperty(RegExp re, int start, int end) {
re.namedCharacterProperty(start, end, _)
}
class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
int part_end;
boolean may_repeat_forever;
RegExpQuantifier() {
this = TRegExpQuantifier(re, start, end) and
re.qualifiedPart(start, part_end, end, _, may_repeat_forever)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegExp() = re and
result.getStart() = start and
result.getEnd() = part_end
}
predicate mayRepeatForever() { may_repeat_forever = true }
string getQualifier() { result = re.getText().substring(part_end, end) }
override string getAPrimaryQlClass() { result = "RegExpQuantifier" }
}
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
override string getAPrimaryQlClass() { result = "InfiniteRepetitionQuantifier" }
}
class RegExpStar extends InfiniteRepetitionQuantifier {
RegExpStar() { this.getQualifier().charAt(0) = "*" }
override string getAPrimaryQlClass() { result = "RegExpStar" }
}
class RegExpPlus extends InfiniteRepetitionQuantifier {
RegExpPlus() { this.getQualifier().charAt(0) = "+" }
override string getAPrimaryQlClass() { result = "RegExpPlus" }
}
class RegExpOpt extends RegExpQuantifier {
RegExpOpt() { this.getQualifier().charAt(0) = "?" }
override string getAPrimaryQlClass() { result = "RegExpOpt" }
}
class RegExpRange extends RegExpQuantifier {
string upper;
string lower;
RegExpRange() { re.multiples(part_end, end, lower, upper) }
string getUpper() { result = upper }
string getLower() { result = lower }
/**
* Gets the upper bound of the range, if any.
*
* If there is no upper bound, any number of repetitions is allowed.
* For a term of the form `r{lo}`, both the lower and the upper bound
* are `lo`.
*/
int getUpperBound() { result = this.getUpper().toInt() }
/** Gets the lower bound of the range. */
int getLowerBound() { result = this.getLower().toInt() }
override string getAPrimaryQlClass() { result = "RegExpRange" }
}
class RegExpSequence extends RegExpTerm, TRegExpSequence {
RegExpSequence() {
this = TRegExpSequence(re, start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
}
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
/** Gets the element preceding `element` in this sequence. */
RegExpTerm previousElement(RegExpTerm element) { element = this.nextElement(result) }
/** Gets the element following `element` in this sequence. */
RegExpTerm nextElement(RegExpTerm element) {
exists(int i |
element = this.getChild(i) and
result = this.getChild(i + 1)
)
}
override string getAPrimaryQlClass() { result = "RegExpSequence" }
}
pragma[nomagic]
private int seqChildEnd(RegExp re, int start, int end, int i) {
result = seqChild(re, start, end, i).getEnd()
}
// moved out so we can use it in the charpred
private RegExpTerm seqChild(RegExp re, int start, int end, int i) {
re.sequence(start, end) and
(
i = 0 and
result.getRegExp() = re and
result.getStart() = start and
exists(int itemEnd |
re.item(start, itemEnd) and
result.getEnd() = itemEnd
)
or
i > 0 and
result.getRegExp() = re and
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
result.getStart() = itemStart and
re.item(itemStart, result.getEnd())
)
)
}
class RegExpAlt extends RegExpTerm, TRegExpAlt {
RegExpAlt() { this = TRegExpAlt(re, start, end) }
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegExp() = re and
result.getStart() = start and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
result.getEnd() = part_end
)
or
i > 0 and
result.getRegExp() = re and
exists(int part_start |
part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
|
result.getStart() = part_start and
re.alternationOption(start, end, part_start, result.getEnd())
)
}
override string getAPrimaryQlClass() { result = "RegExpAlt" }
}
class RegExpCharEscape = RegExpEscape;
class RegExpEscape extends RegExpNormalChar {
RegExpEscape() { re.escapedCharacter(start, end) }
/**
* Gets the name of the escaped; for example, `w` for `\w`.
* TODO: Handle named escapes.
*/
override string getValue() {
this.isIdentityEscape() and result = this.getUnescaped()
or
this.getUnescaped() = "n" and result = "\n"
or
this.getUnescaped() = "r" and result = "\r"
or
this.getUnescaped() = "t" and result = "\t"
or
this.isUnicode() and
result = this.getUnicode()
}
predicate isIdentityEscape() {
not this.getUnescaped() in ["n", "r", "t"] and not this.isUnicode()
}
/**
* Gets the text for this escape. That is e.g. "\w".
*/
private string getText() { result = re.getText().substring(start, end) }
/**
* Holds if this is a unicode escape.
*/
private predicate isUnicode() { this.getText().prefix(2) = ["\\u", "\\U"] }
/**
* Gets the unicode char for this escape.
* E.g. for `\u0061` this returns "a".
*/
private string getUnicode() {
this.isUnicode() and
result = parseHexInt(this.getText().suffix(2)).toUnicode()
}
string getUnescaped() { result = this.getText().suffix(1) }
override string getAPrimaryQlClass() { result = "RegExpEscape" }
}
/**
* A word boundary, that is, a regular expression term of the form `\b`.
*/
class RegExpWordBoundary extends RegExpSpecialChar {
RegExpWordBoundary() { this.getChar() = "\\b" }
}
/**
* A character class escape in a regular expression.
* That is, an escaped character that denotes multiple characters.
*
* Examples:
*
* ```
* \w
* \S
* ```
*/
class RegExpCharacterClassEscape extends RegExpEscape {
RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W", "h", "H"] }
/** Gets the name of the character class; for example, `w` for `\w`. */
// override string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpCharacterClassEscape" }
}
/**
* A character class.
*
* Examples:
*
* ```rb
* /[a-fA-F0-9]/
* /[^abc]/
* ```
*/
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
predicate isInverted() { re.getChar(start + 1) = "^" }
predicate isUniversalClass() {
// [^]
this.isInverted() and not exists(this.getAChild())
or
// [\w\W] and similar
not this.isInverted() and
exists(string cce1, string cce2 |
cce1 = this.getAChild().(RegExpCharacterClassEscape).getValue() and
cce2 = this.getAChild().(RegExpCharacterClassEscape).getValue()
|
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegExp() = re and
exists(int itemStart, int itemEnd |
result.getStart() = itemStart and
re.charSetStart(start, itemStart) and
re.charSetChild(start, itemStart, itemEnd) and
result.getEnd() = itemEnd
)
or
i > 0 and
result.getRegExp() = re and
exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
result.getStart() = itemStart and
re.charSetChild(start, itemStart, result.getEnd())
)
}
override string getAPrimaryQlClass() { result = "RegExpCharacterClass" }
}
class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
int lower_end;
int upper_start;
RegExpCharacterRange() {
this = TRegExpCharacterRange(re, start, end) and
re.charRange(_, start, lower_end, upper_start, end)
}
predicate isRange(string lo, string hi) {
lo = re.getText().substring(start, lower_end) and
hi = re.getText().substring(upper_start, end)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegExp() = re and
result.getStart() = start and
result.getEnd() = lower_end
or
i = 1 and
result.getRegExp() = re and
result.getStart() = upper_start and
result.getEnd() = end
}
override string getAPrimaryQlClass() { result = "RegExpCharacterRange" }
}
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
predicate isCharacter() { any() }
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpNormalChar" }
}
class RegExpConstant extends RegExpTerm {
string value;
RegExpConstant() {
this = TRegExpNormalChar(re, start, end) and
not this instanceof RegExpCharacterClassEscape and
// exclude chars in qualifiers
// TODO: push this into regex library
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
qstart <= start and end <= qend
) and
value = this.(RegExpNormalChar).getValue()
or
this = TRegExpSpecialChar(re, start, end) and
re.inCharSet(start) and
value = this.(RegExpSpecialChar).getChar()
}
predicate isCharacter() { any() }
string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpConstant" }
}
class RegExpGroup extends RegExpTerm, TRegExpGroup {
RegExpGroup() { this = TRegExpGroup(re, start, end) }
/**
* Gets the index of this capture group within the enclosing regular
* expression literal.
*
* For example, in the regular expression `/((a?).)(?:b)/`, the
* group `((a?).)` has index 1, the group `(a?)` nested inside it
* has index 2, and the group `(?:b)` has no index, since it is
* not a capture group.
*/
int getNumber() { result = re.getGroupNumber(start, end) }
/** Holds if this is a capture group. */
predicate isCapture() { exists(this.getNumber()) }
/** Holds if this is a named capture group. */
predicate isNamed() { exists(this.getName()) }
/** Gets the name of this capture group, if any. */
string getName() { result = re.getGroupName(start, end) }
predicate isCharacter() { any() }
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) {
result.getRegExp() = re and
i = 0 and
re.groupContents(start, end, result.getStart(), result.getEnd())
}
override string getAPrimaryQlClass() { result = "RegExpGroup" }
}
class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
string char;
RegExpSpecialChar() {
this = TRegExpSpecialChar(re, start, end) and
re.specialCharacter(start, end, char)
}
predicate isCharacter() { any() }
string getChar() { result = char }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpSpecialChar" }
}
class RegExpDot extends RegExpSpecialChar {
RegExpDot() { this.getChar() = "." }
override string getAPrimaryQlClass() { result = "RegExpDot" }
}
class RegExpDollar extends RegExpSpecialChar {
RegExpDollar() { this.getChar() = ["$", "\\Z", "\\z"] }
override string getAPrimaryQlClass() { result = "RegExpDollar" }
}
class RegExpCaret extends RegExpSpecialChar {
RegExpCaret() { this.getChar() = ["^", "\\A"] }
override string getAPrimaryQlClass() { result = "RegExpCaret" }
}
class RegExpZeroWidthMatch extends RegExpGroup {
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
override predicate isCharacter() { any() }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpZeroWidthMatch" }
}
/**
* A zero-width lookahead or lookbehind assertion.
*
* Examples:
*
* ```
* (?=\w)
* (?!\n)
* (?<=\.)
* (?<!\\)
* ```
*/
class RegExpSubPattern extends RegExpZeroWidthMatch {
RegExpSubPattern() { not re.emptyGroup(start, end) }
/** Gets the lookahead term. */
RegExpTerm getOperand() {
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
result.getRegExp() = re and
result.getStart() = in_start and
result.getEnd() = in_end
)
}
}
abstract class RegExpLookahead extends RegExpSubPattern { }
class RegExpPositiveLookahead extends RegExpLookahead {
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
override string getAPrimaryQlClass() { result = "RegExpPositiveLookahead" }
}
class RegExpNegativeLookahead extends RegExpLookahead {
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
override string getAPrimaryQlClass() { result = "RegExpNegativeLookahead" }
}
abstract class RegExpLookbehind extends RegExpSubPattern { }
class RegExpPositiveLookbehind extends RegExpLookbehind {
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
override string getAPrimaryQlClass() { result = "RegExpPositiveLookbehind" }
}
class RegExpNegativeLookbehind extends RegExpLookbehind {
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
override string getAPrimaryQlClass() { result = "RegExpNegativeLookbehind" }
}
class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
/**
* Gets the number of the capture group this back reference refers to, if any.
*/
int getNumber() { result = re.getBackRefNumber(start, end) }
/**
* Gets the name of the capture group this back reference refers to, if any.
*/
string getName() { result = re.getBackRefName(start, end) }
/** Gets the capture group this back reference refers to. */
RegExpGroup getGroup() {
result.getLiteral() = this.getLiteral() and
(
result.getNumber() = this.getNumber() or
result.getName() = this.getName()
)
}
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpBackRef" }
}
/**
* A named character property. For example, the POSIX bracket expression
* `[[:digit:]]`.
*/
class RegExpNamedCharacterProperty extends RegExpTerm, TRegExpNamedCharacterProperty {
RegExpNamedCharacterProperty() { this = TRegExpNamedCharacterProperty(re, start, end) }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpNamedCharacterProperty" }
/**
* Gets the property name. For example, in `\p{Space}`, the result is
* `"Space"`.
*/
string getName() { result = re.getCharacterPropertyName(start, end) }
/**
* Holds if the property is inverted. For example, it holds for `\p{^Digit}`,
* which matches non-digits.
*/
predicate isInverted() { re.namedCharacterPropertyIsInverted(start, end) }
}
RegExpTerm getParsedRegExp(AST::RegExpLiteral re) {
result.getRegExp() = re and result.isRootTerm()
}
/**
* A node whose value may flow to a position where it is interpreted
* as a part of a regular expression.
*/
abstract class RegExpPatternSource extends DataFlow::Node {
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
abstract DataFlow::Node getAParse();
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
abstract RegExpTerm getRegExpTerm();
}
/**
* A regular expression literal, viewed as the pattern source for itself.
*/
private class RegExpLiteralPatternSource extends RegExpPatternSource {
private AST::RegExpLiteral astNode;
RegExpLiteralPatternSource() { astNode = this.asExpr().getExpr() }
override DataFlow::Node getAParse() { result = this }
override RegExpTerm getRegExpTerm() { result = astNode.getParsed() }
}
/**
* A node whose string value may flow to a position where it is interpreted
* as a part of a regular expression.
*/
private class StringRegExpPatternSource extends RegExpPatternSource {
private DataFlow::Node parse;
StringRegExpPatternSource() { this = regExpSource(parse) }
override DataFlow::Node getAParse() { result = parse }
override RegExpTerm getRegExpTerm() { result.getRegExp() = this.asExpr().getExpr() }
}

View File

@@ -1,2 +1,2 @@
import codeql.ruby.security.performance.RegExpTreeView
import codeql.ruby.Regexp
import codeql.ruby.DataFlow

View File

@@ -16,7 +16,7 @@
import codeql.ruby.security.performance.ExponentialBackTracking
import codeql.ruby.security.performance.ReDoSUtil
import codeql.ruby.security.performance.RegExpTreeView
import codeql.ruby.Regexp
from RegExpTerm t, string pump, State s, string prefixMsg
where hasReDoSResult(t, pump, s, prefixMsg)

View File

@@ -3,9 +3,9 @@
*/
import codeql.Locations
import codeql.ruby.security.performance.RegExpTreeView as RETV
import codeql.ruby.Regexp as RE
query predicate nodes(RETV::RegExpTerm n, string attr, string val) {
query predicate nodes(RE::RegExpTerm n, string attr, string val) {
attr = "semmle.label" and
val = "[" + concat(n.getAPrimaryQlClass(), ", ") + "] " + n.toString()
or
@@ -13,7 +13,7 @@ query predicate nodes(RETV::RegExpTerm n, string attr, string val) {
val =
any(int i |
n =
rank[i](RETV::RegExpTerm t, string fp, int sl, int sc, int el, int ec |
rank[i](RE::RegExpTerm t, string fp, int sl, int sc, int el, int ec |
t.hasLocationInfo(fp, sl, sc, el, ec)
|
t order by fp, sl, sc, el, ec, t.toString()
@@ -21,7 +21,7 @@ query predicate nodes(RETV::RegExpTerm n, string attr, string val) {
).toString()
}
query predicate edges(RETV::RegExpTerm pred, RETV::RegExpTerm succ, string attr, string val) {
query predicate edges(RE::RegExpTerm pred, RE::RegExpTerm succ, string attr, string val) {
attr in ["semmle.label", "semmle.order"] and
val = any(int i | succ = pred.getChild(i)).toString()
}

View File

@@ -1,4 +1,4 @@
import codeql.ruby.security.performance.RegExpTreeView
import codeql.ruby.Regexp
query predicate groupName(RegExpGroup g, string name) { name = g.getName() }

View File

@@ -33,7 +33,9 @@
| tst.rb:137:11:137:17 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
| tst.rb:143:11:143:18 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| tst.rb:146:11:146:17 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
| tst.rb:155:11:155:20 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'f'. |
| tst.rb:149:11:149:20 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
| tst.rb:152:11:152:24 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000b'. |
| tst.rb:155:11:155:20 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
| tst.rb:158:11:158:18 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
| tst.rb:161:11:161:18 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| tst.rb:164:11:164:20 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |