From 8c3e778be65d7c1b01cd732c64c746f89f359c55 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 12 Sep 2023 23:29:34 +0100 Subject: [PATCH] Java: Port regex mode flag character fix from Python. --- java/ql/lib/semmle/code/java/regex/regex.qll | 39 ++++++++++++++----- .../security/CWE-730/ExpRedosTest.java | 2 +- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/java/ql/lib/semmle/code/java/regex/regex.qll b/java/ql/lib/semmle/code/java/regex/regex.qll index 1533f549f89..a131ac0deb5 100644 --- a/java/ql/lib/semmle/code/java/regex/regex.qll +++ b/java/ql/lib/semmle/code/java/regex/regex.qll @@ -473,9 +473,10 @@ abstract class RegexString extends StringLiteral { } /** - * Holds if a parse mode starts between `start` and `end`. + * Holds if the initial part of a parse mode, not containing any + * mode characters is between `start` and `end`. */ - private predicate flagGroupStart(int start, int end) { + private predicate flagGroupStartNoModes(int start, int end) { this.isGroupStart(start) and this.getChar(start + 1) = "?" and this.getChar(start + 2) in ["i", "m", "s", "u", "x", "U"] and @@ -483,17 +484,35 @@ abstract class RegexString extends StringLiteral { } /** - * Holds if a parse mode group is between `start` and `end`, and includes the - * mode flag `c`. For example the following span, with mode flag `i`: + * Holds if `pos` contains a mode character from the + * flag group starting at `start`. + */ + private predicate modeCharacter(int start, int pos) { + this.flagGroupStartNoModes(start, pos) + or + this.modeCharacter(start, pos - 1) and + this.getChar(pos) in ["i", "m", "s", "u", "x", "U"] + } + + /** + * Holds if a parse mode group is between `start` and `end`. + */ + private predicate flagGroupStart(int start, int end) { + this.flagGroupStartNoModes(start, _) and + end = max(int i | this.modeCharacter(start, i) | i + 1) + } + + /** + * Holds if a parse mode group of this regex includes the mode flag `c`. + * For example the following parse mode group, with mode flag `i`: * ``` * (?i) * ``` */ - private predicate flagGroup(int start, int end, string c) { - exists(int inStart, int inEnd | - this.flagGroupStart(start, inStart) and - this.groupContents(start, end, inStart, inEnd) and - this.getChar([inStart .. inEnd - 1]) = c + private predicate flag(string c) { + exists(int pos | + this.modeCharacter(_, pos) and + this.getChar(pos) = c ) } @@ -502,7 +521,7 @@ abstract class RegexString extends StringLiteral { * it is defined by a prefix. */ string getModeFromPrefix() { - exists(string c | this.flagGroup(_, _, c) | + exists(string c | this.flag(c) | c = "i" and result = "IGNORECASE" or c = "m" and result = "MULTILINE" diff --git a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java index 28742c161e6..c9e66e69f59 100644 --- a/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java +++ b/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java @@ -434,7 +434,7 @@ class ExpRedosTest { "((aa|a*+)b)*c", // $ MISSING: hasExpRedos // BAD - testsing - "(?is)(a|aa?)*b" // $ hasExpRedos hasPrefixMsg="starting with 'is' and " hasPump=a + "(?is)(a|aa?)*b" // $ hasExpRedos hasPrefixMsg= hasPump=a }; void test() {