From 32c10885d491242dac2018f7ab8f4ba13e7f10c6 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Thu, 20 Jul 2023 11:43:11 +0100 Subject: [PATCH 1/8] Java: Add test case. --- java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java index e7e876cb696..51eec6e74e7 100644 --- a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java +++ b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java @@ -86,6 +86,9 @@ class ExpRedosTest { // NOT GOOD; attack: "\n".repeat(100) + "." "(?s)(.|\\n)*!", // $ hasExpRedos + // NOT GOOD; attack: "\n".repeat(100) + "." + "(?is)(.|\\n)*!", // $ MISSING: hasExpRedos + // GOOD "([\\w.]+)*", @@ -120,7 +123,7 @@ class ExpRedosTest { "\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"])*)\"", // $ MISSING: hasExpRedos // GOOD - "\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"\\\\])*)\"", + "\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"\\\\])*)\"", // NOT GOOD "(([a-z]|[d-h])*)\"", // $ hasExpRedos From 369f88beda15c5b8f201e75cfc804118bb8487d6 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Thu, 20 Jul 2023 11:41:10 +0100 Subject: [PATCH 2/8] Java: Fix for multiple parse mode flags. --- java/ql/lib/semmle/code/java/regex/regex.qll | 19 +++++++++++++------ .../security/CWE-730/ExpRedosTest.java | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/java/ql/lib/semmle/code/java/regex/regex.qll b/java/ql/lib/semmle/code/java/regex/regex.qll index aca51b74a03..7c24892ff2e 100644 --- a/java/ql/lib/semmle/code/java/regex/regex.qll +++ b/java/ql/lib/semmle/code/java/regex/regex.qll @@ -472,12 +472,19 @@ abstract class RegexString extends StringLiteral { ) } - private predicate flagGroupStart(int start, int end, string c) { + private predicate flagGroupStart(int start, int end) { this.isGroupStart(start) and this.getChar(start + 1) = "?" and - end = start + 3 and - c = this.getChar(start + 2) and - c in ["i", "m", "s", "u", "x", "U"] + this.getChar(start + 2) in ["i", "m", "s", "u", "x", "U"] and + end = start + 2 + } + + private predicate flagGroup(int start, int end, string c) { + exists(int inStart, int inEnd | + this.flagGroupStart(start, inStart) and + this.groupContents(start, end, inStart, inEnd) and + this.getChar([inStart .. inEnd - 1]) = c + ) } /** @@ -485,7 +492,7 @@ abstract class RegexString extends StringLiteral { * it is defined by a prefix. */ string getModeFromPrefix() { - exists(string c | this.flagGroupStart(_, _, c) | + exists(string c | this.flagGroup(_, _, c) | c = "i" and result = "IGNORECASE" or c = "m" and result = "MULTILINE" @@ -540,7 +547,7 @@ abstract class RegexString extends StringLiteral { private predicate groupStart(int start, int end) { this.nonCapturingGroupStart(start, end) or - this.flagGroupStart(start, end, _) + this.flagGroupStart(start, end) or this.namedGroupStart(start, end) or diff --git a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java index 51eec6e74e7..4a31060850b 100644 --- a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java +++ b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java @@ -87,7 +87,7 @@ class ExpRedosTest { "(?s)(.|\\n)*!", // $ hasExpRedos // NOT GOOD; attack: "\n".repeat(100) + "." - "(?is)(.|\\n)*!", // $ MISSING: hasExpRedos + "(?is)(.|\\n)*!", // $ hasExpRedos // GOOD "([\\w.]+)*", From 80cb386ffd43e4ef3e20323282ee5dc4f1b90fff Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Thu, 20 Jul 2023 11:52:04 +0100 Subject: [PATCH 3/8] Java: Change note. --- java/ql/lib/change-notes/2023-07-20-regex-parse-modes.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 java/ql/lib/change-notes/2023-07-20-regex-parse-modes.md diff --git a/java/ql/lib/change-notes/2023-07-20-regex-parse-modes.md b/java/ql/lib/change-notes/2023-07-20-regex-parse-modes.md new file mode 100644 index 00000000000..2d676227491 --- /dev/null +++ b/java/ql/lib/change-notes/2023-07-20-regex-parse-modes.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Regular expressions containing multiple parse mode flags are now interpretted correctly. For example `"(?is)abc.*"` with both the `i` and `s` flags. From 45a9d5bc7d834fe1026e227101971bfc8791c02b Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Thu, 20 Jul 2023 11:53:52 +0100 Subject: [PATCH 4/8] Java: QLDoc. --- java/ql/lib/semmle/code/java/regex/regex.qll | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/java/ql/lib/semmle/code/java/regex/regex.qll b/java/ql/lib/semmle/code/java/regex/regex.qll index 7c24892ff2e..1533f549f89 100644 --- a/java/ql/lib/semmle/code/java/regex/regex.qll +++ b/java/ql/lib/semmle/code/java/regex/regex.qll @@ -472,6 +472,9 @@ abstract class RegexString extends StringLiteral { ) } + /** + * Holds if a parse mode starts between `start` and `end`. + */ private predicate flagGroupStart(int start, int end) { this.isGroupStart(start) and this.getChar(start + 1) = "?" and @@ -479,6 +482,13 @@ abstract class RegexString extends StringLiteral { end = start + 2 } + /** + * Holds if a parse mode group is between `start` and `end`, and includes the + * mode flag `c`. For example the following span, with mode flag `i`: + * ``` + * (?i) + * ``` + */ private predicate flagGroup(int start, int end, string c) { exists(int inStart, int inEnd | this.flagGroupStart(start, inStart) and From 657642a1228170c61ed5c263332d9de2f2281e0d Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 14 Aug 2023 14:12:07 +0100 Subject: [PATCH 5/8] Java: Expose parts of the vquery message in the test. --- .../security/CWE-730/ExpRedosTest.java | 5 +++- .../query-tests/security/CWE-730/ReDoS.ql | 23 ++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java index 4a31060850b..28742c161e6 100644 --- a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java +++ b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java @@ -431,7 +431,10 @@ class ExpRedosTest { "(a*)*b", // $ hasExpRedos // BAD - but not detected due to the way possessive quantifiers are approximated - "((aa|a*+)b)*c" // $ MISSING: hasExpRedos + "((aa|a*+)b)*c", // $ MISSING: hasExpRedos + + // BAD - testsing + "(?is)(a|aa?)*b" // $ hasExpRedos hasPrefixMsg="starting with 'is' and " hasPump=a }; void test() { diff --git a/java/ql/test/query-tests/security/CWE-730/ReDoS.ql b/java/ql/test/query-tests/security/CWE-730/ReDoS.ql index 97719364f94..4011946318e 100644 --- a/java/ql/test/query-tests/security/CWE-730/ReDoS.ql +++ b/java/ql/test/query-tests/security/CWE-730/ReDoS.ql @@ -4,8 +4,13 @@ private import semmle.code.java.regex.RegexTreeView::RegexTreeView as TreeView import codeql.regex.nfa.ExponentialBackTracking::Make as ExponentialBackTracking import semmle.code.java.regex.regex +bindingset[s] +string quote(string s) { if s.matches("% %") then result = "\"" + s + "\"" else result = s } + module HasExpRedos implements TestSig { - string getARelevantTag() { result = ["hasExpRedos", "hasParseFailure"] } + string getARelevantTag() { + result = ["hasExpRedos", "hasParseFailure", "hasPump", "hasPrefixMsg"] + } predicate hasActualResult(Location location, string element, string tag, string value) { tag = "hasExpRedos" and @@ -25,6 +30,22 @@ module HasExpRedos implements TestSig { element = r.toString() ) } + + predicate hasOptionalResult(Location location, string element, string tag, string value) { + exists(TreeView::RegExpTerm t, Regex r, string pump, string prefixMsg | + ExponentialBackTracking::hasReDoSResult(t, pump, _, prefixMsg) and + t.occursInRegex(r, _, _) and + ( + tag = "hasPrefixMsg" and + value = quote(prefixMsg) + or + tag = "hasPump" and + value = pump + ) and + location = r.getLocation() and + element = r.toString() + ) + } } import MakeTest From 8c3e778be65d7c1b01cd732c64c746f89f359c55 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 12 Sep 2023 23:29:34 +0100 Subject: [PATCH 6/8] Java: Port regex mode flag character fix from Python. --- java/ql/lib/semmle/code/java/regex/regex.qll | 39 ++++++++++++++----- .../security/CWE-730/ExpRedosTest.java | 2 +- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/java/ql/lib/semmle/code/java/regex/regex.qll b/java/ql/lib/semmle/code/java/regex/regex.qll index 1533f549f89..a131ac0deb5 100644 --- a/java/ql/lib/semmle/code/java/regex/regex.qll +++ b/java/ql/lib/semmle/code/java/regex/regex.qll @@ -473,9 +473,10 @@ abstract class RegexString extends StringLiteral { } /** - * Holds if a parse mode starts between `start` and `end`. + * Holds if the initial part of a parse mode, not containing any + * mode characters is between `start` and `end`. */ - private predicate flagGroupStart(int start, int end) { + private predicate flagGroupStartNoModes(int start, int end) { this.isGroupStart(start) and this.getChar(start + 1) = "?" and this.getChar(start + 2) in ["i", "m", "s", "u", "x", "U"] and @@ -483,17 +484,35 @@ abstract class RegexString extends StringLiteral { } /** - * Holds if a parse mode group is between `start` and `end`, and includes the - * mode flag `c`. For example the following span, with mode flag `i`: + * Holds if `pos` contains a mode character from the + * flag group starting at `start`. + */ + private predicate modeCharacter(int start, int pos) { + this.flagGroupStartNoModes(start, pos) + or + this.modeCharacter(start, pos - 1) and + this.getChar(pos) in ["i", "m", "s", "u", "x", "U"] + } + + /** + * Holds if a parse mode group is between `start` and `end`. + */ + private predicate flagGroupStart(int start, int end) { + this.flagGroupStartNoModes(start, _) and + end = max(int i | this.modeCharacter(start, i) | i + 1) + } + + /** + * Holds if a parse mode group of this regex includes the mode flag `c`. + * For example the following parse mode group, with mode flag `i`: * ``` * (?i) * ``` */ - private predicate flagGroup(int start, int end, string c) { - exists(int inStart, int inEnd | - this.flagGroupStart(start, inStart) and - this.groupContents(start, end, inStart, inEnd) and - this.getChar([inStart .. inEnd - 1]) = c + private predicate flag(string c) { + exists(int pos | + this.modeCharacter(_, pos) and + this.getChar(pos) = c ) } @@ -502,7 +521,7 @@ abstract class RegexString extends StringLiteral { * it is defined by a prefix. */ string getModeFromPrefix() { - exists(string c | this.flagGroup(_, _, c) | + exists(string c | this.flag(c) | c = "i" and result = "IGNORECASE" or c = "m" and result = "MULTILINE" diff --git a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java index 28742c161e6..c9e66e69f59 100644 --- a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java +++ b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java @@ -434,7 +434,7 @@ class ExpRedosTest { "((aa|a*+)b)*c", // $ MISSING: hasExpRedos // BAD - testsing - "(?is)(a|aa?)*b" // $ hasExpRedos hasPrefixMsg="starting with 'is' and " hasPump=a + "(?is)(a|aa?)*b" // $ hasExpRedos hasPrefixMsg= hasPump=a }; void test() { From 1c81bd52e61175e56ed9b114f1fccc365926c75f Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Wed, 13 Sep 2023 09:21:52 +0100 Subject: [PATCH 7/8] Java: Change note. --- java/ql/lib/change-notes/2023-09-12-regex-mode-flag-groups.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 java/ql/lib/change-notes/2023-09-12-regex-mode-flag-groups.md diff --git a/java/ql/lib/change-notes/2023-09-12-regex-mode-flag-groups.md b/java/ql/lib/change-notes/2023-09-12-regex-mode-flag-groups.md new file mode 100644 index 00000000000..d13350726a8 --- /dev/null +++ b/java/ql/lib/change-notes/2023-09-12-regex-mode-flag-groups.md @@ -0,0 +1,4 @@ +--- +category: fix +--- +* The regular expressions library no longer incorrectly matches mode flag characters against the input. From af3d8c88bb9f7f8274b94c5cc69dfaa522a7a76b Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Wed, 13 Sep 2023 17:58:31 +0100 Subject: [PATCH 8/8] Java: Fix test comment. --- java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java index c9e66e69f59..56017ee12e8 100644 --- a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java +++ b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java @@ -433,7 +433,7 @@ class ExpRedosTest { // BAD - but not detected due to the way possessive quantifiers are approximated "((aa|a*+)b)*c", // $ MISSING: hasExpRedos - // BAD - testsing + // BAD - testing mode flag groups "(?is)(a|aa?)*b" // $ hasExpRedos hasPrefixMsg= hasPump=a };