diff --git a/java/ql/lib/change-notes/2023-07-20-regex-parse-modes.md b/java/ql/lib/change-notes/2023-07-20-regex-parse-modes.md new file mode 100644 index 00000000000..2d676227491 --- /dev/null +++ b/java/ql/lib/change-notes/2023-07-20-regex-parse-modes.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Regular expressions containing multiple parse mode flags are now interpretted correctly. For example `"(?is)abc.*"` with both the `i` and `s` flags. diff --git a/java/ql/lib/change-notes/2023-09-12-regex-mode-flag-groups.md b/java/ql/lib/change-notes/2023-09-12-regex-mode-flag-groups.md new file mode 100644 index 00000000000..d13350726a8 --- /dev/null +++ b/java/ql/lib/change-notes/2023-09-12-regex-mode-flag-groups.md @@ -0,0 +1,4 @@ +--- +category: fix +--- +* The regular expressions library no longer incorrectly matches mode flag characters against the input. diff --git a/java/ql/lib/semmle/code/java/regex/regex.qll b/java/ql/lib/semmle/code/java/regex/regex.qll index aca51b74a03..a131ac0deb5 100644 --- a/java/ql/lib/semmle/code/java/regex/regex.qll +++ b/java/ql/lib/semmle/code/java/regex/regex.qll @@ -472,12 +472,48 @@ abstract class RegexString extends StringLiteral { ) } - private predicate flagGroupStart(int start, int end, string c) { + /** + * Holds if the initial part of a parse mode, not containing any + * mode characters is between `start` and `end`. + */ + private predicate flagGroupStartNoModes(int start, int end) { this.isGroupStart(start) and this.getChar(start + 1) = "?" and - end = start + 3 and - c = this.getChar(start + 2) and - c in ["i", "m", "s", "u", "x", "U"] + this.getChar(start + 2) in ["i", "m", "s", "u", "x", "U"] and + end = start + 2 + } + + /** + * Holds if `pos` contains a mode character from the + * flag group starting at `start`. + */ + private predicate modeCharacter(int start, int pos) { + this.flagGroupStartNoModes(start, pos) + or + this.modeCharacter(start, pos - 1) and + this.getChar(pos) in ["i", "m", "s", "u", "x", "U"] + } + + /** + * Holds if a parse mode group is between `start` and `end`. + */ + private predicate flagGroupStart(int start, int end) { + this.flagGroupStartNoModes(start, _) and + end = max(int i | this.modeCharacter(start, i) | i + 1) + } + + /** + * Holds if a parse mode group of this regex includes the mode flag `c`. + * For example the following parse mode group, with mode flag `i`: + * ``` + * (?i) + * ``` + */ + private predicate flag(string c) { + exists(int pos | + this.modeCharacter(_, pos) and + this.getChar(pos) = c + ) } /** @@ -485,7 +521,7 @@ abstract class RegexString extends StringLiteral { * it is defined by a prefix. */ string getModeFromPrefix() { - exists(string c | this.flagGroupStart(_, _, c) | + exists(string c | this.flag(c) | c = "i" and result = "IGNORECASE" or c = "m" and result = "MULTILINE" @@ -540,7 +576,7 @@ abstract class RegexString extends StringLiteral { private predicate groupStart(int start, int end) { this.nonCapturingGroupStart(start, end) or - this.flagGroupStart(start, end, _) + this.flagGroupStart(start, end) or this.namedGroupStart(start, end) or diff --git a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java index e7e876cb696..56017ee12e8 100644 --- a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java +++ b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java @@ -86,6 +86,9 @@ class ExpRedosTest { // NOT GOOD; attack: "\n".repeat(100) + "." "(?s)(.|\\n)*!", // $ hasExpRedos + // NOT GOOD; attack: "\n".repeat(100) + "." + "(?is)(.|\\n)*!", // $ hasExpRedos + // GOOD "([\\w.]+)*", @@ -120,7 +123,7 @@ class ExpRedosTest { "\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"])*)\"", // $ MISSING: hasExpRedos // GOOD - "\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"\\\\])*)\"", + "\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"\\\\])*)\"", // NOT GOOD "(([a-z]|[d-h])*)\"", // $ hasExpRedos @@ -428,7 +431,10 @@ class ExpRedosTest { "(a*)*b", // $ hasExpRedos // BAD - but not detected due to the way possessive quantifiers are approximated - "((aa|a*+)b)*c" // $ MISSING: hasExpRedos + "((aa|a*+)b)*c", // $ MISSING: hasExpRedos + + // BAD - testing mode flag groups + "(?is)(a|aa?)*b" // $ hasExpRedos hasPrefixMsg= hasPump=a }; void test() { diff --git a/java/ql/test/query-tests/security/CWE-730/ReDoS.ql b/java/ql/test/query-tests/security/CWE-730/ReDoS.ql index 97719364f94..4011946318e 100644 --- a/java/ql/test/query-tests/security/CWE-730/ReDoS.ql +++ b/java/ql/test/query-tests/security/CWE-730/ReDoS.ql @@ -4,8 +4,13 @@ private import semmle.code.java.regex.RegexTreeView::RegexTreeView as TreeView import codeql.regex.nfa.ExponentialBackTracking::Make as ExponentialBackTracking import semmle.code.java.regex.regex +bindingset[s] +string quote(string s) { if s.matches("% %") then result = "\"" + s + "\"" else result = s } + module HasExpRedos implements TestSig { - string getARelevantTag() { result = ["hasExpRedos", "hasParseFailure"] } + string getARelevantTag() { + result = ["hasExpRedos", "hasParseFailure", "hasPump", "hasPrefixMsg"] + } predicate hasActualResult(Location location, string element, string tag, string value) { tag = "hasExpRedos" and @@ -25,6 +30,22 @@ module HasExpRedos implements TestSig { element = r.toString() ) } + + predicate hasOptionalResult(Location location, string element, string tag, string value) { + exists(TreeView::RegExpTerm t, Regex r, string pump, string prefixMsg | + ExponentialBackTracking::hasReDoSResult(t, pump, _, prefixMsg) and + t.occursInRegex(r, _, _) and + ( + tag = "hasPrefixMsg" and + value = quote(prefixMsg) + or + tag = "hasPump" and + value = pump + ) and + location = r.getLocation() and + element = r.toString() + ) + } } import MakeTest