diff --git a/ql/src/codeql_ruby/regexp/ReDoS.ql b/ql/src/codeql_ruby/regexp/ExponentialBackTracking.qll similarity index 94% rename from ql/src/codeql_ruby/regexp/ReDoS.ql rename to ql/src/codeql_ruby/regexp/ExponentialBackTracking.qll index f9125d2b2cd..f490febf609 100644 --- a/ql/src/codeql_ruby/regexp/ReDoS.ql +++ b/ql/src/codeql_ruby/regexp/ExponentialBackTracking.qll @@ -1,18 +1,3 @@ -/** - * @name Inefficient regular expression - * @description A regular expression that requires exponential time to match certain inputs - * can be a performance bottleneck, and may be vulnerable to denial-of-service - * attacks. - * @kind problem - * @problem.severity error - * @precision high - * @id rb/redos - * @tags security - * external/cwe/cwe-1333 - * external/cwe/cwe-730 - * external/cwe/cwe-400 - */ - import ReDoSUtil /* @@ -336,9 +321,3 @@ class ExponentialReDoSConfiguration extends ReDoSConfiguration { override predicate isReDoSCandidate(State state, string pump) { isPumpable(state, pump) } } - -from RegExpTerm t, string pump, State s, string prefixMsg -where hasReDoSResult(t, pump, s, prefixMsg) -select t, - "This part of the regular expression may cause exponential backtracking on strings " + prefixMsg + - "containing many repetitions of '" + pump + "'." diff --git a/ql/src/codeql_ruby/regexp/ParseRegExp.qll b/ql/src/codeql_ruby/regexp/ParseRegExp.qll index ff52b04931c..841ebecd6c8 100644 --- a/ql/src/codeql_ruby/regexp/ParseRegExp.qll +++ b/ql/src/codeql_ruby/regexp/ParseRegExp.qll @@ -75,9 +75,9 @@ class RegExp extends AST::RegExpLiteral { not this.charSetStart(_, start) | end = innerEnd + 1 and - innerEnd > innerStart and innerEnd = min(int e | + e > innerStart and this.nonEscapedCharAt(e) = "]" and not exists(int x, int y | this.posixStyleNamedCharacterProperty(x, y, _) and e >= x and e < y diff --git a/ql/src/queries/security/cwe-1333/ReDoS.qhelp b/ql/src/queries/security/cwe-1333/ReDoS.qhelp new file mode 100644 index 00000000000..4c85702a0d3 --- /dev/null +++ b/ql/src/queries/security/cwe-1333/ReDoS.qhelp @@ -0,0 +1,28 @@ + + + + +

Consider this regular expression:

+ + /^_(__|.)+_$/ + +

+ Its sub-expression "(__|.)+?" can match the string + "__" either by the first alternative "__" to the + left of the "|" operator, or by two repetitions of the second + alternative "." to the right. Thus, a string consisting of an + odd number of underscores followed by some other character will cause the + regular expression engine to run for an exponential amount of time before + rejecting the input. +

+

+ This problem can be avoided by rewriting the regular expression to remove + the ambiguity between the two branches of the alternative inside the + repetition: +

+ + /^_(__|[^_])+_$/ + +
+ +
diff --git a/ql/src/queries/security/cwe-1333/ReDoS.ql b/ql/src/queries/security/cwe-1333/ReDoS.ql new file mode 100644 index 00000000000..322a18d78db --- /dev/null +++ b/ql/src/queries/security/cwe-1333/ReDoS.ql @@ -0,0 +1,22 @@ +/** + * @name Inefficient regular expression + * @description A regular expression that requires exponential time to match certain inputs + * can be a performance bottleneck, and may be vulnerable to denial-of-service + * attacks. + * @kind problem + * @problem.severity error + * @precision high + * @id rb/redos + * @tags security + * external/cwe/cwe-1333 + * external/cwe/cwe-730 + * external/cwe/cwe-400 + */ + +import codeql_ruby.regexp.ExponentialBackTracking + +from RegExpTerm t, string pump, State s, string prefixMsg +where hasReDoSResult(t, pump, s, prefixMsg) +select t, + "This part of the regular expression may cause exponential backtracking on strings " + prefixMsg + + "containing many repetitions of '" + pump + "'." diff --git a/ql/src/queries/security/cwe-1333/ReDoSIntroduction.inc.qhelp b/ql/src/queries/security/cwe-1333/ReDoSIntroduction.inc.qhelp new file mode 100644 index 00000000000..fd3c39bfaa7 --- /dev/null +++ b/ql/src/queries/security/cwe-1333/ReDoSIntroduction.inc.qhelp @@ -0,0 +1,37 @@ + + + +

+ Some regular expressions take a long time to match certain input strings + to the point where the time it takes to match a string of length n + is proportional to nk or even 2n. + Such regular expressions can negatively affect performance, or even allow + a malicious user to perform a Denial of Service ("DoS") attack by crafting + an expensive input string for the regular expression to match. +

+

+ The regular expression engine used by the Ruby interpreter (MRI) uses + backtracking non-deterministic finite automata to implement regular + expression matching. While this approach is space-efficient and allows + supporting advanced features like capture groups, it is not time-efficient + in general. The worst-case time complexity of such an automaton can be + polynomial or even exponential, meaning that for strings of a certain + shape, increasing the input length by ten characters may make the + automaton about 1000 times slower. +

+

+ Typically, a regular expression is affected by this problem if it contains + a repetition of the form r* or r+ where the + sub-expression r is ambiguous in the sense that it can match + some string in multiple ways. More information about the precise + circumstances can be found in the references. +

+
+ +

+ Modify the regular expression to remove the ambiguity, or ensure that the + strings matched with the regular expression are short enough that the + time-complexity does not matter. +

+
+
\ No newline at end of file diff --git a/ql/src/queries/security/cwe-1333/ReDoSReferences.inc.qhelp b/ql/src/queries/security/cwe-1333/ReDoSReferences.inc.qhelp new file mode 100644 index 00000000000..d8bb9eb9ee0 --- /dev/null +++ b/ql/src/queries/security/cwe-1333/ReDoSReferences.inc.qhelp @@ -0,0 +1,13 @@ + + + +
  • OWASP: + Regular expression Denial of Service - ReDoS. +
  • +
  • Wikipedia: ReDoS.
  • +
  • Wikipedia: Time complexity.
  • +
  • James Kirrage, Asiri Rathnayake, Hayo Thielecke: + Static Analysis for Regular Expression Denial-of-Service Attack. +
  • +
    +