diff --git a/java/ql/lib/semmle/code/java/regex/RegexTreeView.qll b/java/ql/lib/semmle/code/java/regex/RegexTreeView.qll index 1b6013b26a0..89242eea4d6 100644 --- a/java/ql/lib/semmle/code/java/regex/RegexTreeView.qll +++ b/java/ql/lib/semmle/code/java/regex/RegexTreeView.qll @@ -40,6 +40,8 @@ newtype TRegExpParent = TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or /** A normal character */ TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or + /** A quoted sequence */ + TRegExpQuote(Regex re, int start, int end) { re.quote(start, end) } or /** A back reference */ TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) } @@ -107,6 +109,8 @@ class RegExpTerm extends RegExpParent { or this = TRegExpNormalChar(re, start, end) or + this = TRegExpQuote(re, start, end) + or this = TRegExpGroup(re, start, end) or this = TRegExpQuantifier(re, start, end) @@ -675,9 +679,34 @@ class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar { override string getPrimaryQLClass() { result = "RegExpNormalChar" } } +/** + * A quoted sequence. + * + * Example: + * ``` + * \Qabc\E + * ``` + */ +class RegExpQuote extends RegExpTerm, TRegExpQuote { + string value; + + RegExpQuote() { + exists(int inner_start, int inner_end | + this = TRegExpQuote(re, start, end) and + re.quote(start, end, inner_start, inner_end) and + value = re.getText().substring(inner_start, inner_end) + ) + } + + /** Gets the string matched by this quote term. */ + string getValue() { result = value } + + override string getPrimaryQLClass() { result = "RegExpQuote" } +} + /** * A constant regular expression term, that is, a regular expression - * term matching a single string. Currently, this will always be a single character. + * term matching a single string. This can be a single character or a quoted sequence. * * Example: * @@ -689,14 +718,14 @@ class RegExpConstant extends RegExpTerm { string value; RegExpConstant() { - this = TRegExpNormalChar(re, start, end) and + (this = TRegExpNormalChar(re, start, end) or this = TRegExpQuote(re, start, end)) and not this instanceof RegExpCharacterClassEscape and // exclude chars in qualifiers // TODO: push this into regex library not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) | qstart <= start and end <= qend ) and - value = this.(RegExpNormalChar).getValue() + (value = this.(RegExpNormalChar).getValue() or value = this.(RegExpQuote).getValue()) } /** diff --git a/java/ql/lib/semmle/code/java/regex/regex.qll b/java/ql/lib/semmle/code/java/regex/regex.qll index 4ad795cdd39..185a4981b00 100644 --- a/java/ql/lib/semmle/code/java/regex/regex.qll +++ b/java/ql/lib/semmle/code/java/regex/regex.qll @@ -189,13 +189,17 @@ abstract class RegexString extends Expr { } /** Holds if the character at `pos` is a "\" that is actually escaping what comes after. */ - predicate escapingChar(int pos) { this.escaping(pos) = true } + predicate escapingChar(int pos) { + this.escaping(pos) = true and + not exists(int x, int y | this.quote(x, y) and pos in [x .. y - 1]) + } /** * Helper predicate for `escapingChar`. * In order to avoid negative recusrion, we return a boolean. * This way, we can refer to `escaping(pos - 1).booleanNot()` * rather than to a negated version of `escaping(pos)`. + * Does not take into account escape characters inside quote sequences. */ private boolean escaping(int pos) { pos = -1 and result = false @@ -205,6 +209,53 @@ abstract class RegexString extends Expr { this.getChar(pos) != "\\" and result = false } + /** + * Helper predicate for `quoteSequence`. + * Holds if the char at `pos` could be the beginning of a quote delimiter, i.e. `\Q` (non-escaped) or `\E` (escaping not checked, as quote sequences turn off escapes). + * Result is `true` for `\Q` and `false` for `\E`. + */ + private boolean quote_delimiter(int pos) { + result = true and + this.escaping(pos) = true and + this.getChar(pos + 1) = "Q" + or + result = false and + this.getChar(pos) = "\\" and + this.getChar(pos + 1) = "E" + } + + /** + * Helper predicate for `quoteSequence`. + * Holds if the char at `pos` is the one-based `index`th occourence of a quote delimiter (`\Q` or `\E`) + * Result is `true` for `\Q` and `false` for `\E`. + */ + private boolean quote_delimiter(int index, int pos) { + result = this.quote_delimiter(pos) and + pos = rank[index](int p | this.quote_delimiter(p) = [true, false]) + } + + /** Holds if a quoted sequence is found between `start` and `end` */ + predicate quote(int start, int end) { this.quote(start, end, _, _) } + + /** Holds if a quoted sequence is fund between `start` and `end`, with ontent found between `inner_start` and `inner_end`. */ + predicate quote(int start, int end, int inner_start, int inner_end) { + exists(int index | + this.quote_delimiter(index, start) = true and + ( + index = 1 + or + this.quote_delimiter(index - 1, _) = false + ) and + inner_start = start + 2 and + inner_end = end - 2 and + inner_end > inner_start and + this.quote_delimiter(inner_end) = false and + not exists(int mid | + this.quote_delimiter(mid) = false and mid in [inner_start .. inner_end - 1] + ) + ) + } + /** Gets the text of this regex */ string getText() { result = this.(StringLiteral).getValue() } @@ -212,7 +263,8 @@ abstract class RegexString extends Expr { string nonEscapedCharAt(int i) { result = this.getText().charAt(i) and - not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1]) + not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1]) and + not exists(int x, int y | this.quote(x, y) and i in [x .. y - 1]) } private predicate isOptionDivider(int i) { this.nonEscapedCharAt(i) = "|" } @@ -728,7 +780,8 @@ abstract class RegexString extends Expr { this.character(start, _) or this.isGroupStart(start) or this.charSet(start, _) or - this.backreference(start, _) + this.backreference(start, _) or + this.quote(start, _) } private predicate item_end(int end) { @@ -739,6 +792,8 @@ abstract class RegexString extends Expr { this.charSet(_, end) or this.qualifier(_, end, _, _) + or + this.quote(_, end) } private predicate top_level(int start, int end) { @@ -846,6 +901,8 @@ abstract class RegexString extends Expr { this.qualifiedItem(start, end, _, _) or this.charSet(start, end) + or + this.quote(start, end) ) and this.firstPart(start, end) } @@ -861,6 +918,8 @@ abstract class RegexString extends Expr { this.qualifiedItem(start, end, _, _) or this.charSet(start, end) + or + this.quote(start, end) ) and this.lastPart(start, end) }