From 8270bf5bb9b350cacc0b8d497163f5fdef528069 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 18 Nov 2020 11:55:49 +0100 Subject: [PATCH] make the character search skip unencodable characters --- javascript/ql/src/Performance/ReDoS.ql | 23 +++++++++++++------ .../test/query-tests/Performance/ReDoS/tst.js | 8 ++++++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/javascript/ql/src/Performance/ReDoS.ql b/javascript/ql/src/Performance/ReDoS.ql index cc0516fb24a..3f782e437b0 100644 --- a/javascript/ql/src/Performance/ReDoS.ql +++ b/javascript/ql/src/Performance/ReDoS.ql @@ -122,6 +122,13 @@ class RegExpRepetition extends RegExpParent { } } +/** + * A constant in a regular expression that represents valid Unicode character(s). + */ +class RegexpCharacterConstant extends RegExpConstant { + RegexpCharacterConstant() { this.isCharacter() } +} + /** * Gets the root containing the given term, that is, the root of the literal, * or a branch of the root disjunction. @@ -136,7 +143,9 @@ RegExpRoot getRoot(RegExpTerm term) { */ newtype TInputSymbol = /** An input symbol corresponding to character `c`. */ - Char(string c) { c = any(RegExpConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_) } or + Char(string c) { + c = any(RegexpCharacterConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_) + } or /** * An input symbol representing all characters matched by * (non-universal) character class `recc`. @@ -173,7 +182,7 @@ private predicate sharesRoot(TInputSymbol a, TInputSymbol b) { */ private predicate belongsTo(TInputSymbol a, RegExpRoot root) { exists(RegExpTerm term | getRoot(term) = root | - a = Char(term.(RegExpConstant).getValue().charAt(_)) + a = Char(term.(RegexpCharacterConstant).getValue().charAt(_)) or a = CharClass(term) ) @@ -236,7 +245,7 @@ private module CharacterClasses { predicate hasChildThatMatches(RegExpCharacterClass cc, string char) { exists(CharClass(cc)) and exists(RegExpTerm child | child = cc.getAChild() | - char = child.(RegExpConstant).getValue() + char = child.(RegexpCharacterConstant).getValue() or rangeMatchesOnLetterOrDigits(child, char) or @@ -300,7 +309,7 @@ private module CharacterClasses { private string getARelevantChar() { exists(ascii(result)) or - exists(RegExpConstant c | result = c.getValue().charAt(_)) + exists(RegexpCharacterConstant c | result = c.getValue().charAt(_)) or classEscapeMatches(_, result) } @@ -310,7 +319,7 @@ private module CharacterClasses { */ private string getAMentionedChar(RegExpCharacterClass c) { exists(RegExpTerm child | child = c.getAChild() | - result = child.(RegExpConstant).getValue() + result = child.(RegexpCharacterConstant).getValue() or child.(RegExpCharacterRange).isRange(result, _) or @@ -439,7 +448,7 @@ newtype TState = ( i = 0 or - exists(t.(RegExpConstant).getValue().charAt(i)) + exists(t.(RegexpCharacterConstant).getValue().charAt(i)) ) } or Accept(RegExpRoot l) { l.isRelevant() } @@ -511,7 +520,7 @@ State after(RegExpTerm t) { * Holds if the NFA has a transition from `q1` to `q2` labelled with `lbl`. */ predicate delta(State q1, EdgeLabel lbl, State q2) { - exists(RegExpConstant s, int i | + exists(RegexpCharacterConstant s, int i | q1 = Match(s, i) and lbl = Char(s.getValue().charAt(i)) and ( diff --git a/javascript/ql/test/query-tests/Performance/ReDoS/tst.js b/javascript/ql/test/query-tests/Performance/ReDoS/tst.js index 2e2585cf417..1c1c51b1d54 100644 --- a/javascript/ql/test/query-tests/Performance/ReDoS/tst.js +++ b/javascript/ql/test/query-tests/Performance/ReDoS/tst.js @@ -272,4 +272,10 @@ var bad60 = /(.thisisagoddamnlongstringforstresstestingthequery|\sthisisagoddamn var bad61 = /(thisisagoddamnlongstringforstresstestingthequery|this\w+query)*-/ // GOOD -var good27 = /(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-/ \ No newline at end of file +var good27 = /(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-/ + +// GOOD +var good28 = /foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo/ + +// GOOD +var good29 = /foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo/ \ No newline at end of file