Split ReDoS query into .ql and .qll, and add .qhelp

2026-04-29 18:55:14 +02:00 · 2021-06-24 16:32:45 +01:00
parent c784e37089
commit a6dd2fa0a1
6 changed files with 101 additions and 22 deletions
--- a/ql/src/codeql_ruby/regexp/ExponentialBackTracking.qll
+++ b/ql/src/codeql_ruby/regexp/ExponentialBackTracking.qll
@@ -1,18 +1,3 @@
-/**
- * @name Inefficient regular expression
- * @description A regular expression that requires exponential time to match certain inputs
- *              can be a performance bottleneck, and may be vulnerable to denial-of-service
- *              attacks.
- * @kind problem
- * @problem.severity error
- * @precision high
- * @id rb/redos
- * @tags security
- *       external/cwe/cwe-1333
- *       external/cwe/cwe-730
- *       external/cwe/cwe-400
- */
-
 import ReDoSUtil

 /*
@@ -336,9 +321,3 @@ class ExponentialReDoSConfiguration extends ReDoSConfiguration {

  override predicate isReDoSCandidate(State state, string pump) { isPumpable(state, pump) }
 }
-
-from RegExpTerm t, string pump, State s, string prefixMsg
-where hasReDoSResult(t, pump, s, prefixMsg)
-select t,
-  "This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
-    "containing many repetitions of '" + pump + "'."
--- a/ql/src/codeql_ruby/regexp/ParseRegExp.qll
+++ b/ql/src/codeql_ruby/regexp/ParseRegExp.qll
@@ -75,9 +75,9 @@ class RegExp extends AST::RegExpLiteral {
      not this.charSetStart(_, start)
    |
      end = innerEnd + 1 and
-      innerEnd > innerStart and
      innerEnd =
        min(int e |
+          e > innerStart and
          this.nonEscapedCharAt(e) = "]" and
          not exists(int x, int y |
            this.posixStyleNamedCharacterProperty(x, y, _) and e >= x and e < y
--- a/ql/src/queries/security/cwe-1333/ReDoS.qhelp
+++ b/ql/src/queries/security/cwe-1333/ReDoS.qhelp
@@ -0,0 +1,28 @@
+<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
+<qhelp>
+  <include src="ReDoSIntroduction.inc.qhelp" />
+  <example>
+    <p>Consider this regular expression:</p>
+    <sample language="ruby">
+      /^_(__|.)+_$/
+    </sample>
+    <p>
+      Its sub-expression <code>"(__|.)+?"</code> can match the string
+      <code>"__"</code> either by the first alternative <code>"__"</code> to the
+      left of the <code>"|"</code> operator, or by two repetitions of the second
+      alternative <code>"."</code> to the right. Thus, a string consisting of an
+      odd number of underscores followed by some other character will cause the
+      regular expression engine to run for an exponential amount of time before
+      rejecting the input.
+    </p>
+    <p>
+      This problem can be avoided by rewriting the regular expression to remove
+      the ambiguity between the two branches of the alternative inside the
+      repetition:
+    </p>
+    <sample language="ruby">
+      /^_(__|[^_])+_$/
+    </sample>
+  </example>
+  <include src="ReDoSReferences.inc.qhelp"/>
+</qhelp>
--- a/ql/src/queries/security/cwe-1333/ReDoS.ql
+++ b/ql/src/queries/security/cwe-1333/ReDoS.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Inefficient regular expression
+ * @description A regular expression that requires exponential time to match certain inputs
+ *              can be a performance bottleneck, and may be vulnerable to denial-of-service
+ *              attacks.
+ * @kind problem
+ * @problem.severity error
+ * @precision high
+ * @id rb/redos
+ * @tags security
+ *       external/cwe/cwe-1333
+ *       external/cwe/cwe-730
+ *       external/cwe/cwe-400
+ */
+
+import codeql_ruby.regexp.ExponentialBackTracking
+
+from RegExpTerm t, string pump, State s, string prefixMsg
+where hasReDoSResult(t, pump, s, prefixMsg)
+select t,
+  "This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
+    "containing many repetitions of '" + pump + "'."
--- a/ql/src/queries/security/cwe-1333/ReDoSIntroduction.inc.qhelp
+++ b/ql/src/queries/security/cwe-1333/ReDoSIntroduction.inc.qhelp
@@ -0,0 +1,37 @@
+<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
+<qhelp>
+  <overview>
+    <p>
+      Some regular expressions take a long time to match certain input strings
+      to the point where the time it takes to match a string of length <i>n</i>
+      is proportional to <i>n<sup>k</sup></i> or even <i>2<sup>n</sup></i>.
+      Such regular expressions can negatively affect performance, or even allow
+      a malicious user to perform a Denial of Service ("DoS") attack by crafting
+      an expensive input string for the regular expression to match.
+    </p>
+    <p>
+      The regular expression engine used by the Ruby interpreter (MRI) uses
+      backtracking non-deterministic finite automata to implement regular
+      expression matching. While this approach is space-efficient and allows
+      supporting advanced features like capture groups, it is not time-efficient
+      in general. The worst-case time complexity of such an automaton can be
+      polynomial or even exponential, meaning that for strings of a certain
+      shape, increasing the input length by ten characters may make the
+      automaton about 1000 times slower.
+    </p>
+    <p>
+      Typically, a regular expression is affected by this problem if it contains
+      a repetition of the form <code>r*</code> or <code>r+</code> where the
+      sub-expression <code>r</code> is ambiguous in the sense that it can match
+      some string in multiple ways. More information about the precise
+      circumstances can be found in the references.
+    </p>
+  </overview>
+  <recommendation>
+    <p>
+      Modify the regular expression to remove the ambiguity, or ensure that the
+      strings matched with the regular expression are short enough that the
+      time-complexity does not matter.
+    </p>
+  </recommendation>
+</qhelp>
--- a/ql/src/queries/security/cwe-1333/ReDoSReferences.inc.qhelp
+++ b/ql/src/queries/security/cwe-1333/ReDoSReferences.inc.qhelp
@@ -0,0 +1,13 @@
+<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
+<qhelp>
+  <references>
+    <li> OWASP:
+      <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.
+    </li>
+    <li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
+    <li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Time_complexity">Time complexity</a>.</li>
+    <li>James Kirrage, Asiri Rathnayake, Hayo Thielecke:
+      <a href="http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf">Static Analysis for Regular Expression Denial-of-Service Attack</a>.
+    </li>
+  </references>
+</qhelp>