From c38062ce93065137ac8730c0d642d572230839b8 Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Fri, 2 Sep 2022 20:26:31 +0200 Subject: [PATCH 1/4] convert RelevantState to a class in the PrefixConstruction module --- .../code/java/security/regexp/NfaUtils.qll | 49 +++++++++---------- .../javascript/security/regexp/NfaUtils.qll | 49 +++++++++---------- .../python/security/regexp/NfaUtils.qll | 49 +++++++++---------- .../codeql/ruby/security/regexp/NfaUtils.qll | 49 +++++++++---------- 4 files changed, 88 insertions(+), 108 deletions(-) diff --git a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll index 033b8aa8cfd..1c806c305ba 100644 --- a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll +++ b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll @@ -887,11 +887,10 @@ module PrefixConstruction { /** * Holds if `state` is the textually last start state for the regular expression. */ - private predicate lastStartState(State state) { + private predicate lastStartState(RelevantState state) { exists(RegExpRoot root | state = max(State s, Location l | - s = stateInRelevantRegexp() and isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation() @@ -963,10 +962,13 @@ module PrefixConstruction { min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) } - /** Gets a state within a regular expression that contains a candidate state. */ - pragma[noinline] - State stateInRelevantRegexp() { - exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr())) + /** A state within a regular expression that contains a candidate state. */ + class RelevantState instanceof State { + RelevantState() { + exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(this.getRepr())) + } + + string toString() { result = "RelevantState" } } } @@ -1007,6 +1009,8 @@ module ReDoSPruning { import PrefixConstruction as Prefix + class RelevantState = Prefix::RelevantState; + /** * Predicates for testing the presence of a rejecting suffix. * @@ -1040,32 +1044,26 @@ module ReDoSPruning { * This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs. */ pragma[noinline] - private predicate isLikelyRejectable(State s) { - s = Prefix::stateInRelevantRegexp() and - ( - // exists a reject edge with some char. - hasRejectEdge(s) - or - hasEdgeToLikelyRejectable(s) - or - // stopping here is rejection - isRejectState(s) - ) + private predicate isLikelyRejectable(RelevantState s) { + // exists a reject edge with some char. + hasRejectEdge(s) + or + hasEdgeToLikelyRejectable(s) + or + // stopping here is rejection + isRejectState(s) } /** * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state. */ - predicate isRejectState(State s) { - s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_) - } + predicate isRejectState(RelevantState s) { not epsilonSucc*(s) = Accept(_) } /** * Holds if there is likely a non-empty suffix leading to rejection starting in `s`. */ pragma[noopt] - predicate hasEdgeToLikelyRejectable(State s) { - s = Prefix::stateInRelevantRegexp() and + predicate hasEdgeToLikelyRejectable(RelevantState s) { // all edges (at least one) with some char leads to another state that is rejectable. // the `next` states might not share a common suffix, which can cause FPs. exists(string char | char = hasEdgeToLikelyRejectableHelper(s) | @@ -1080,8 +1078,7 @@ module ReDoSPruning { * and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`. */ pragma[noinline] - private string hasEdgeToLikelyRejectableHelper(State s) { - s = Prefix::stateInRelevantRegexp() and + private string hasEdgeToLikelyRejectableHelper(RelevantState s) { not hasRejectEdge(s) and not isRejectState(s) and deltaClosedChar(s, result, _) @@ -1092,9 +1089,7 @@ module ReDoSPruning { * along epsilon edges, such that there is a transition from * `prev` to `next` that the character symbol `char`. */ - predicate deltaClosedChar(State prev, string char, State next) { - prev = Prefix::stateInRelevantRegexp() and - next = Prefix::stateInRelevantRegexp() and + predicate deltaClosedChar(RelevantState prev, string char, RelevantState next) { deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next) } diff --git a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll index 033b8aa8cfd..1c806c305ba 100644 --- a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll +++ b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll @@ -887,11 +887,10 @@ module PrefixConstruction { /** * Holds if `state` is the textually last start state for the regular expression. */ - private predicate lastStartState(State state) { + private predicate lastStartState(RelevantState state) { exists(RegExpRoot root | state = max(State s, Location l | - s = stateInRelevantRegexp() and isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation() @@ -963,10 +962,13 @@ module PrefixConstruction { min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) } - /** Gets a state within a regular expression that contains a candidate state. */ - pragma[noinline] - State stateInRelevantRegexp() { - exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr())) + /** A state within a regular expression that contains a candidate state. */ + class RelevantState instanceof State { + RelevantState() { + exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(this.getRepr())) + } + + string toString() { result = "RelevantState" } } } @@ -1007,6 +1009,8 @@ module ReDoSPruning { import PrefixConstruction as Prefix + class RelevantState = Prefix::RelevantState; + /** * Predicates for testing the presence of a rejecting suffix. * @@ -1040,32 +1044,26 @@ module ReDoSPruning { * This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs. */ pragma[noinline] - private predicate isLikelyRejectable(State s) { - s = Prefix::stateInRelevantRegexp() and - ( - // exists a reject edge with some char. - hasRejectEdge(s) - or - hasEdgeToLikelyRejectable(s) - or - // stopping here is rejection - isRejectState(s) - ) + private predicate isLikelyRejectable(RelevantState s) { + // exists a reject edge with some char. + hasRejectEdge(s) + or + hasEdgeToLikelyRejectable(s) + or + // stopping here is rejection + isRejectState(s) } /** * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state. */ - predicate isRejectState(State s) { - s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_) - } + predicate isRejectState(RelevantState s) { not epsilonSucc*(s) = Accept(_) } /** * Holds if there is likely a non-empty suffix leading to rejection starting in `s`. */ pragma[noopt] - predicate hasEdgeToLikelyRejectable(State s) { - s = Prefix::stateInRelevantRegexp() and + predicate hasEdgeToLikelyRejectable(RelevantState s) { // all edges (at least one) with some char leads to another state that is rejectable. // the `next` states might not share a common suffix, which can cause FPs. exists(string char | char = hasEdgeToLikelyRejectableHelper(s) | @@ -1080,8 +1078,7 @@ module ReDoSPruning { * and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`. */ pragma[noinline] - private string hasEdgeToLikelyRejectableHelper(State s) { - s = Prefix::stateInRelevantRegexp() and + private string hasEdgeToLikelyRejectableHelper(RelevantState s) { not hasRejectEdge(s) and not isRejectState(s) and deltaClosedChar(s, result, _) @@ -1092,9 +1089,7 @@ module ReDoSPruning { * along epsilon edges, such that there is a transition from * `prev` to `next` that the character symbol `char`. */ - predicate deltaClosedChar(State prev, string char, State next) { - prev = Prefix::stateInRelevantRegexp() and - next = Prefix::stateInRelevantRegexp() and + predicate deltaClosedChar(RelevantState prev, string char, RelevantState next) { deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next) } diff --git a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll index 033b8aa8cfd..1c806c305ba 100644 --- a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll +++ b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll @@ -887,11 +887,10 @@ module PrefixConstruction { /** * Holds if `state` is the textually last start state for the regular expression. */ - private predicate lastStartState(State state) { + private predicate lastStartState(RelevantState state) { exists(RegExpRoot root | state = max(State s, Location l | - s = stateInRelevantRegexp() and isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation() @@ -963,10 +962,13 @@ module PrefixConstruction { min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) } - /** Gets a state within a regular expression that contains a candidate state. */ - pragma[noinline] - State stateInRelevantRegexp() { - exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr())) + /** A state within a regular expression that contains a candidate state. */ + class RelevantState instanceof State { + RelevantState() { + exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(this.getRepr())) + } + + string toString() { result = "RelevantState" } } } @@ -1007,6 +1009,8 @@ module ReDoSPruning { import PrefixConstruction as Prefix + class RelevantState = Prefix::RelevantState; + /** * Predicates for testing the presence of a rejecting suffix. * @@ -1040,32 +1044,26 @@ module ReDoSPruning { * This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs. */ pragma[noinline] - private predicate isLikelyRejectable(State s) { - s = Prefix::stateInRelevantRegexp() and - ( - // exists a reject edge with some char. - hasRejectEdge(s) - or - hasEdgeToLikelyRejectable(s) - or - // stopping here is rejection - isRejectState(s) - ) + private predicate isLikelyRejectable(RelevantState s) { + // exists a reject edge with some char. + hasRejectEdge(s) + or + hasEdgeToLikelyRejectable(s) + or + // stopping here is rejection + isRejectState(s) } /** * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state. */ - predicate isRejectState(State s) { - s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_) - } + predicate isRejectState(RelevantState s) { not epsilonSucc*(s) = Accept(_) } /** * Holds if there is likely a non-empty suffix leading to rejection starting in `s`. */ pragma[noopt] - predicate hasEdgeToLikelyRejectable(State s) { - s = Prefix::stateInRelevantRegexp() and + predicate hasEdgeToLikelyRejectable(RelevantState s) { // all edges (at least one) with some char leads to another state that is rejectable. // the `next` states might not share a common suffix, which can cause FPs. exists(string char | char = hasEdgeToLikelyRejectableHelper(s) | @@ -1080,8 +1078,7 @@ module ReDoSPruning { * and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`. */ pragma[noinline] - private string hasEdgeToLikelyRejectableHelper(State s) { - s = Prefix::stateInRelevantRegexp() and + private string hasEdgeToLikelyRejectableHelper(RelevantState s) { not hasRejectEdge(s) and not isRejectState(s) and deltaClosedChar(s, result, _) @@ -1092,9 +1089,7 @@ module ReDoSPruning { * along epsilon edges, such that there is a transition from * `prev` to `next` that the character symbol `char`. */ - predicate deltaClosedChar(State prev, string char, State next) { - prev = Prefix::stateInRelevantRegexp() and - next = Prefix::stateInRelevantRegexp() and + predicate deltaClosedChar(RelevantState prev, string char, RelevantState next) { deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next) } diff --git a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll index 033b8aa8cfd..1c806c305ba 100644 --- a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll +++ b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll @@ -887,11 +887,10 @@ module PrefixConstruction { /** * Holds if `state` is the textually last start state for the regular expression. */ - private predicate lastStartState(State state) { + private predicate lastStartState(RelevantState state) { exists(RegExpRoot root | state = max(State s, Location l | - s = stateInRelevantRegexp() and isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation() @@ -963,10 +962,13 @@ module PrefixConstruction { min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) } - /** Gets a state within a regular expression that contains a candidate state. */ - pragma[noinline] - State stateInRelevantRegexp() { - exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr())) + /** A state within a regular expression that contains a candidate state. */ + class RelevantState instanceof State { + RelevantState() { + exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(this.getRepr())) + } + + string toString() { result = "RelevantState" } } } @@ -1007,6 +1009,8 @@ module ReDoSPruning { import PrefixConstruction as Prefix + class RelevantState = Prefix::RelevantState; + /** * Predicates for testing the presence of a rejecting suffix. * @@ -1040,32 +1044,26 @@ module ReDoSPruning { * This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs. */ pragma[noinline] - private predicate isLikelyRejectable(State s) { - s = Prefix::stateInRelevantRegexp() and - ( - // exists a reject edge with some char. - hasRejectEdge(s) - or - hasEdgeToLikelyRejectable(s) - or - // stopping here is rejection - isRejectState(s) - ) + private predicate isLikelyRejectable(RelevantState s) { + // exists a reject edge with some char. + hasRejectEdge(s) + or + hasEdgeToLikelyRejectable(s) + or + // stopping here is rejection + isRejectState(s) } /** * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state. */ - predicate isRejectState(State s) { - s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_) - } + predicate isRejectState(RelevantState s) { not epsilonSucc*(s) = Accept(_) } /** * Holds if there is likely a non-empty suffix leading to rejection starting in `s`. */ pragma[noopt] - predicate hasEdgeToLikelyRejectable(State s) { - s = Prefix::stateInRelevantRegexp() and + predicate hasEdgeToLikelyRejectable(RelevantState s) { // all edges (at least one) with some char leads to another state that is rejectable. // the `next` states might not share a common suffix, which can cause FPs. exists(string char | char = hasEdgeToLikelyRejectableHelper(s) | @@ -1080,8 +1078,7 @@ module ReDoSPruning { * and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`. */ pragma[noinline] - private string hasEdgeToLikelyRejectableHelper(State s) { - s = Prefix::stateInRelevantRegexp() and + private string hasEdgeToLikelyRejectableHelper(RelevantState s) { not hasRejectEdge(s) and not isRejectState(s) and deltaClosedChar(s, result, _) @@ -1092,9 +1089,7 @@ module ReDoSPruning { * along epsilon edges, such that there is a transition from * `prev` to `next` that the character symbol `char`. */ - predicate deltaClosedChar(State prev, string char, State next) { - prev = Prefix::stateInRelevantRegexp() and - next = Prefix::stateInRelevantRegexp() and + predicate deltaClosedChar(RelevantState prev, string char, RelevantState next) { deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next) } From 0162bc3c773182e967bc542c5caa69e71d68a2cd Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Mon, 5 Sep 2022 11:22:12 +0200 Subject: [PATCH 2/4] use `RelevantState` inside the `lastStartState` predicate Co-authored-by: Arthur Baars --- java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll index 1c806c305ba..8706b827a6f 100644 --- a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll +++ b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll @@ -890,7 +890,7 @@ module PrefixConstruction { private predicate lastStartState(RelevantState state) { exists(RegExpRoot root | state = - max(State s, Location l | + max(RelevantState s, Location l | isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation() From 3f1cb04f3e8b450f5508781bf572b13f029a3e50 Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Mon, 5 Sep 2022 11:22:34 +0200 Subject: [PATCH 3/4] sync files --- .../ql/lib/semmle/javascript/security/regexp/NfaUtils.qll | 2 +- python/ql/lib/semmle/python/security/regexp/NfaUtils.qll | 2 +- ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll index 1c806c305ba..8706b827a6f 100644 --- a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll +++ b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll @@ -890,7 +890,7 @@ module PrefixConstruction { private predicate lastStartState(RelevantState state) { exists(RegExpRoot root | state = - max(State s, Location l | + max(RelevantState s, Location l | isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation() diff --git a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll index 1c806c305ba..8706b827a6f 100644 --- a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll +++ b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll @@ -890,7 +890,7 @@ module PrefixConstruction { private predicate lastStartState(RelevantState state) { exists(RegExpRoot root | state = - max(State s, Location l | + max(RelevantState s, Location l | isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation() diff --git a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll index 1c806c305ba..8706b827a6f 100644 --- a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll +++ b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll @@ -890,7 +890,7 @@ module PrefixConstruction { private predicate lastStartState(RelevantState state) { exists(RegExpRoot root | state = - max(State s, Location l | + max(RelevantState s, Location l | isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation() From a86a940df7b26ccc57bd44a25b9d7774cf1d8100 Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Mon, 5 Sep 2022 13:27:34 +0200 Subject: [PATCH 4/4] add getRepr() and toString() on RelevantState --- java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll | 6 +++++- .../ql/lib/semmle/javascript/security/regexp/NfaUtils.qll | 6 +++++- python/ql/lib/semmle/python/security/regexp/NfaUtils.qll | 6 +++++- ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll | 6 +++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll index 8706b827a6f..5112bdad11e 100644 --- a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll +++ b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll @@ -968,7 +968,11 @@ module PrefixConstruction { exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(this.getRepr())) } - string toString() { result = "RelevantState" } + /** Gets a string representation for this state in a regular expression. */ + string toString() { result = State.super.toString() } + + /** Gets the term represented by this state. */ + RegExpTerm getRepr() { result = State.super.getRepr() } } } diff --git a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll index 8706b827a6f..5112bdad11e 100644 --- a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll +++ b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll @@ -968,7 +968,11 @@ module PrefixConstruction { exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(this.getRepr())) } - string toString() { result = "RelevantState" } + /** Gets a string representation for this state in a regular expression. */ + string toString() { result = State.super.toString() } + + /** Gets the term represented by this state. */ + RegExpTerm getRepr() { result = State.super.getRepr() } } } diff --git a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll index 8706b827a6f..5112bdad11e 100644 --- a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll +++ b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll @@ -968,7 +968,11 @@ module PrefixConstruction { exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(this.getRepr())) } - string toString() { result = "RelevantState" } + /** Gets a string representation for this state in a regular expression. */ + string toString() { result = State.super.toString() } + + /** Gets the term represented by this state. */ + RegExpTerm getRepr() { result = State.super.getRepr() } } } diff --git a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll index 8706b827a6f..5112bdad11e 100644 --- a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll +++ b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll @@ -968,7 +968,11 @@ module PrefixConstruction { exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(this.getRepr())) } - string toString() { result = "RelevantState" } + /** Gets a string representation for this state in a regular expression. */ + string toString() { result = State.super.toString() } + + /** Gets the term represented by this state. */ + RegExpTerm getRepr() { result = State.super.getRepr() } } }