diff --git a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll index fdcb10c2f85..033b8aa8cfd 100644 --- a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll +++ b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll @@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm { * Holds if this root term is relevant to the ReDoS analysis. */ predicate isRelevant() { - // there is at least one repetition - getRoot(any(InfiniteRepetitionQuantifier q)) = this and // is actually used as a RegExp this.isUsedAsRegExp() and // not excluded for library specific reasons @@ -877,6 +875,101 @@ predicate isStartState(State state) { */ signature predicate isCandidateSig(State state, string pump); +/** + * Holds if `state` is a candidate for ReDoS. + */ +signature predicate isCandidateSig(State state); + +/** + * Predicates for constructing a prefix string that leads to a given state. + */ +module PrefixConstruction { + /** + * Holds if `state` is the textually last start state for the regular expression. + */ + private predicate lastStartState(State state) { + exists(RegExpRoot root | + state = + max(State s, Location l | + s = stateInRelevantRegexp() and + isStartState(s) and + getRoot(s.getRepr()) = root and + l = s.getRepr().getLocation() + | + s + order by + l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(), + l.getEndLine() + ) + ) + } + + /** + * Holds if there exists any transition (Epsilon() or other) from `a` to `b`. + */ + private predicate existsTransition(State a, State b) { delta(a, _, b) } + + /** + * Gets the minimum number of transitions it takes to reach `state` from the `start` state. + */ + int prefixLength(State start, State state) = + shortestDistances(lastStartState/1, existsTransition/2)(start, state, result) + + /** + * Gets the minimum number of transitions it takes to reach `state` from the start state. + */ + private int lengthFromStart(State state) { result = prefixLength(_, state) } + + /** + * Gets a string for which the regular expression will reach `state`. + * + * Has at most one result for any given `state`. + * This predicate will not always have a result even if there is a ReDoS issue in + * the regular expression. + */ + string prefix(State state) { + lastStartState(state) and + result = "" + or + // the search stops past the last redos candidate state. + lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and + exists(State prev | + // select a unique predecessor (by an arbitrary measure) + prev = + min(State s, Location loc | + lengthFromStart(s) = lengthFromStart(state) - 1 and + loc = s.getRepr().getLocation() and + delta(s, _, state) + | + s + order by + loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(), + s.getRepr().toString() + ) + | + // greedy search for the shortest prefix + result = prefix(prev) and delta(prev, Epsilon(), state) + or + not delta(prev, Epsilon(), state) and + result = prefix(prev) + getCanonicalEdgeChar(prev, state) + ) + } + + /** + * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA. + */ + private string getCanonicalEdgeChar(State prev, State next) { + result = + min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) + } + + /** Gets a state within a regular expression that contains a candidate state. */ + pragma[noinline] + State stateInRelevantRegexp() { + exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr())) + } +} + /** * A module for pruning candidate ReDoS states. * The candidates are specified by the `isCandidate` signature predicate. @@ -910,95 +1003,9 @@ module ReDoSPruning { /** Gets a state that can reach the `accept-any` state using only epsilon steps. */ private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) } - /** - * Predicates for constructing a prefix string that leads to a given state. - */ - private module PrefixConstruction { - /** - * Holds if `state` is the textually last start state for the regular expression. - */ - private predicate lastStartState(State state) { - exists(RegExpRoot root | - state = - max(State s, Location l | - s = stateInPumpableRegexp() and - isStartState(s) and - getRoot(s.getRepr()) = root and - l = s.getRepr().getLocation() - | - s - order by - l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(), - l.getEndLine() - ) - ) - } + predicate isCandidateState(State s) { isReDoSCandidate(s, _) } - /** - * Holds if there exists any transition (Epsilon() or other) from `a` to `b`. - */ - private predicate existsTransition(State a, State b) { delta(a, _, b) } - - /** - * Gets the minimum number of transitions it takes to reach `state` from the `start` state. - */ - int prefixLength(State start, State state) = - shortestDistances(lastStartState/1, existsTransition/2)(start, state, result) - - /** - * Gets the minimum number of transitions it takes to reach `state` from the start state. - */ - private int lengthFromStart(State state) { result = prefixLength(_, state) } - - /** - * Gets a string for which the regular expression will reach `state`. - * - * Has at most one result for any given `state`. - * This predicate will not always have a result even if there is a ReDoS issue in - * the regular expression. - */ - string prefix(State state) { - lastStartState(state) and - result = "" - or - // the search stops past the last redos candidate state. - lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and - exists(State prev | - // select a unique predecessor (by an arbitrary measure) - prev = - min(State s, Location loc | - lengthFromStart(s) = lengthFromStart(state) - 1 and - loc = s.getRepr().getLocation() and - delta(s, _, state) - | - s - order by - loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(), - s.getRepr().toString() - ) - | - // greedy search for the shortest prefix - result = prefix(prev) and delta(prev, Epsilon(), state) - or - not delta(prev, Epsilon(), state) and - result = prefix(prev) + getCanonicalEdgeChar(prev, state) - ) - } - - /** - * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA. - */ - private string getCanonicalEdgeChar(State prev, State next) { - result = - min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) - } - - /** Gets a state within a regular expression that has a pumpable state. */ - pragma[noinline] - State stateInPumpableRegexp() { - exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr())) - } - } + import PrefixConstruction as Prefix /** * Predicates for testing the presence of a rejecting suffix. @@ -1018,8 +1025,6 @@ module ReDoSPruning { * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes. */ private module SuffixConstruction { - import PrefixConstruction - /** * Holds if all states reachable from `fork` by repeating `w` * are likely rejectable by appending some suffix. @@ -1036,7 +1041,7 @@ module ReDoSPruning { */ pragma[noinline] private predicate isLikelyRejectable(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and ( // exists a reject edge with some char. hasRejectEdge(s) @@ -1052,7 +1057,7 @@ module ReDoSPruning { * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state. */ predicate isRejectState(State s) { - s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_) + s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_) } /** @@ -1060,7 +1065,7 @@ module ReDoSPruning { */ pragma[noopt] predicate hasEdgeToLikelyRejectable(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and // all edges (at least one) with some char leads to another state that is rejectable. // the `next` states might not share a common suffix, which can cause FPs. exists(string char | char = hasEdgeToLikelyRejectableHelper(s) | @@ -1076,7 +1081,7 @@ module ReDoSPruning { */ pragma[noinline] private string hasEdgeToLikelyRejectableHelper(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and not hasRejectEdge(s) and not isRejectState(s) and deltaClosedChar(s, result, _) @@ -1088,8 +1093,8 @@ module ReDoSPruning { * `prev` to `next` that the character symbol `char`. */ predicate deltaClosedChar(State prev, string char, State next) { - prev = stateInPumpableRegexp() and - next = stateInPumpableRegexp() and + prev = Prefix::stateInRelevantRegexp() and + next = Prefix::stateInRelevantRegexp() and deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next) } @@ -1099,18 +1104,28 @@ module ReDoSPruning { result = getAnInputSymbolMatching(char) } + pragma[noinline] + RegExpRoot relevantRoot() { + exists(RegExpTerm term, State s | + s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm() + ) + } + /** * Gets a char used for finding possible suffixes inside `root`. */ pragma[noinline] private string relevant(RegExpRoot root) { - exists(ascii(result)) and exists(root) - or - exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) - or - // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). - // The three chars must be kept in sync with `hasSimpleRejectEdge`. - result = ["|", "\n", "Z"] and exists(root) + root = relevantRoot() and + ( + exists(ascii(result)) and exists(root) + or + exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) + or + // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). + // The three chars must be kept in sync with `hasSimpleRejectEdge`. + result = ["|", "\n", "Z"] and exists(root) + ) } /** @@ -1208,12 +1223,12 @@ module ReDoSPruning { predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) { isReDoSAttackable(t, pump, s) and ( - prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and - not PrefixConstruction::prefix(s) = "" + prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and + not Prefix::prefix(s) = "" or - PrefixConstruction::prefix(s) = "" and prefixMsg = "" + Prefix::prefix(s) = "" and prefixMsg = "" or - not exists(PrefixConstruction::prefix(s)) and prefixMsg = "" + not exists(Prefix::prefix(s)) and prefixMsg = "" ) } diff --git a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll index fdcb10c2f85..033b8aa8cfd 100644 --- a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll +++ b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll @@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm { * Holds if this root term is relevant to the ReDoS analysis. */ predicate isRelevant() { - // there is at least one repetition - getRoot(any(InfiniteRepetitionQuantifier q)) = this and // is actually used as a RegExp this.isUsedAsRegExp() and // not excluded for library specific reasons @@ -877,6 +875,101 @@ predicate isStartState(State state) { */ signature predicate isCandidateSig(State state, string pump); +/** + * Holds if `state` is a candidate for ReDoS. + */ +signature predicate isCandidateSig(State state); + +/** + * Predicates for constructing a prefix string that leads to a given state. + */ +module PrefixConstruction { + /** + * Holds if `state` is the textually last start state for the regular expression. + */ + private predicate lastStartState(State state) { + exists(RegExpRoot root | + state = + max(State s, Location l | + s = stateInRelevantRegexp() and + isStartState(s) and + getRoot(s.getRepr()) = root and + l = s.getRepr().getLocation() + | + s + order by + l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(), + l.getEndLine() + ) + ) + } + + /** + * Holds if there exists any transition (Epsilon() or other) from `a` to `b`. + */ + private predicate existsTransition(State a, State b) { delta(a, _, b) } + + /** + * Gets the minimum number of transitions it takes to reach `state` from the `start` state. + */ + int prefixLength(State start, State state) = + shortestDistances(lastStartState/1, existsTransition/2)(start, state, result) + + /** + * Gets the minimum number of transitions it takes to reach `state` from the start state. + */ + private int lengthFromStart(State state) { result = prefixLength(_, state) } + + /** + * Gets a string for which the regular expression will reach `state`. + * + * Has at most one result for any given `state`. + * This predicate will not always have a result even if there is a ReDoS issue in + * the regular expression. + */ + string prefix(State state) { + lastStartState(state) and + result = "" + or + // the search stops past the last redos candidate state. + lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and + exists(State prev | + // select a unique predecessor (by an arbitrary measure) + prev = + min(State s, Location loc | + lengthFromStart(s) = lengthFromStart(state) - 1 and + loc = s.getRepr().getLocation() and + delta(s, _, state) + | + s + order by + loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(), + s.getRepr().toString() + ) + | + // greedy search for the shortest prefix + result = prefix(prev) and delta(prev, Epsilon(), state) + or + not delta(prev, Epsilon(), state) and + result = prefix(prev) + getCanonicalEdgeChar(prev, state) + ) + } + + /** + * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA. + */ + private string getCanonicalEdgeChar(State prev, State next) { + result = + min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) + } + + /** Gets a state within a regular expression that contains a candidate state. */ + pragma[noinline] + State stateInRelevantRegexp() { + exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr())) + } +} + /** * A module for pruning candidate ReDoS states. * The candidates are specified by the `isCandidate` signature predicate. @@ -910,95 +1003,9 @@ module ReDoSPruning { /** Gets a state that can reach the `accept-any` state using only epsilon steps. */ private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) } - /** - * Predicates for constructing a prefix string that leads to a given state. - */ - private module PrefixConstruction { - /** - * Holds if `state` is the textually last start state for the regular expression. - */ - private predicate lastStartState(State state) { - exists(RegExpRoot root | - state = - max(State s, Location l | - s = stateInPumpableRegexp() and - isStartState(s) and - getRoot(s.getRepr()) = root and - l = s.getRepr().getLocation() - | - s - order by - l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(), - l.getEndLine() - ) - ) - } + predicate isCandidateState(State s) { isReDoSCandidate(s, _) } - /** - * Holds if there exists any transition (Epsilon() or other) from `a` to `b`. - */ - private predicate existsTransition(State a, State b) { delta(a, _, b) } - - /** - * Gets the minimum number of transitions it takes to reach `state` from the `start` state. - */ - int prefixLength(State start, State state) = - shortestDistances(lastStartState/1, existsTransition/2)(start, state, result) - - /** - * Gets the minimum number of transitions it takes to reach `state` from the start state. - */ - private int lengthFromStart(State state) { result = prefixLength(_, state) } - - /** - * Gets a string for which the regular expression will reach `state`. - * - * Has at most one result for any given `state`. - * This predicate will not always have a result even if there is a ReDoS issue in - * the regular expression. - */ - string prefix(State state) { - lastStartState(state) and - result = "" - or - // the search stops past the last redos candidate state. - lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and - exists(State prev | - // select a unique predecessor (by an arbitrary measure) - prev = - min(State s, Location loc | - lengthFromStart(s) = lengthFromStart(state) - 1 and - loc = s.getRepr().getLocation() and - delta(s, _, state) - | - s - order by - loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(), - s.getRepr().toString() - ) - | - // greedy search for the shortest prefix - result = prefix(prev) and delta(prev, Epsilon(), state) - or - not delta(prev, Epsilon(), state) and - result = prefix(prev) + getCanonicalEdgeChar(prev, state) - ) - } - - /** - * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA. - */ - private string getCanonicalEdgeChar(State prev, State next) { - result = - min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) - } - - /** Gets a state within a regular expression that has a pumpable state. */ - pragma[noinline] - State stateInPumpableRegexp() { - exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr())) - } - } + import PrefixConstruction as Prefix /** * Predicates for testing the presence of a rejecting suffix. @@ -1018,8 +1025,6 @@ module ReDoSPruning { * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes. */ private module SuffixConstruction { - import PrefixConstruction - /** * Holds if all states reachable from `fork` by repeating `w` * are likely rejectable by appending some suffix. @@ -1036,7 +1041,7 @@ module ReDoSPruning { */ pragma[noinline] private predicate isLikelyRejectable(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and ( // exists a reject edge with some char. hasRejectEdge(s) @@ -1052,7 +1057,7 @@ module ReDoSPruning { * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state. */ predicate isRejectState(State s) { - s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_) + s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_) } /** @@ -1060,7 +1065,7 @@ module ReDoSPruning { */ pragma[noopt] predicate hasEdgeToLikelyRejectable(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and // all edges (at least one) with some char leads to another state that is rejectable. // the `next` states might not share a common suffix, which can cause FPs. exists(string char | char = hasEdgeToLikelyRejectableHelper(s) | @@ -1076,7 +1081,7 @@ module ReDoSPruning { */ pragma[noinline] private string hasEdgeToLikelyRejectableHelper(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and not hasRejectEdge(s) and not isRejectState(s) and deltaClosedChar(s, result, _) @@ -1088,8 +1093,8 @@ module ReDoSPruning { * `prev` to `next` that the character symbol `char`. */ predicate deltaClosedChar(State prev, string char, State next) { - prev = stateInPumpableRegexp() and - next = stateInPumpableRegexp() and + prev = Prefix::stateInRelevantRegexp() and + next = Prefix::stateInRelevantRegexp() and deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next) } @@ -1099,18 +1104,28 @@ module ReDoSPruning { result = getAnInputSymbolMatching(char) } + pragma[noinline] + RegExpRoot relevantRoot() { + exists(RegExpTerm term, State s | + s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm() + ) + } + /** * Gets a char used for finding possible suffixes inside `root`. */ pragma[noinline] private string relevant(RegExpRoot root) { - exists(ascii(result)) and exists(root) - or - exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) - or - // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). - // The three chars must be kept in sync with `hasSimpleRejectEdge`. - result = ["|", "\n", "Z"] and exists(root) + root = relevantRoot() and + ( + exists(ascii(result)) and exists(root) + or + exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) + or + // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). + // The three chars must be kept in sync with `hasSimpleRejectEdge`. + result = ["|", "\n", "Z"] and exists(root) + ) } /** @@ -1208,12 +1223,12 @@ module ReDoSPruning { predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) { isReDoSAttackable(t, pump, s) and ( - prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and - not PrefixConstruction::prefix(s) = "" + prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and + not Prefix::prefix(s) = "" or - PrefixConstruction::prefix(s) = "" and prefixMsg = "" + Prefix::prefix(s) = "" and prefixMsg = "" or - not exists(PrefixConstruction::prefix(s)) and prefixMsg = "" + not exists(Prefix::prefix(s)) and prefixMsg = "" ) } diff --git a/javascript/ql/src/Security/CWE-178/CaseSensitiveMiddlewarePath.ql b/javascript/ql/src/Security/CWE-178/CaseSensitiveMiddlewarePath.ql index ccf659bf024..e8835436c72 100644 --- a/javascript/ql/src/Security/CWE-178/CaseSensitiveMiddlewarePath.ql +++ b/javascript/ql/src/Security/CWE-178/CaseSensitiveMiddlewarePath.ql @@ -20,55 +20,29 @@ string toOtherCase(string s) { if s.regexpMatch(".*[a-z].*") then result = s.toUpperCase() else result = s.toLowerCase() } -RegExpCharacterClass getEnclosingClass(RegExpTerm term) { - term = result.getAChild() - or - term = result.getAChild().(RegExpRange).getAChild() -} +import semmle.javascript.security.regexp.NfaUtils as NfaUtils -/** - * Holds if `term` seems to distinguish between upper and lower case letters, assuming the `i` flag is not present. - */ -pragma[inline] -predicate isLikelyCaseSensitiveRegExp(RegExpTerm term) { - exists(RegExpConstant const | - const = term.getAChild*() and - const.getValue().regexpMatch(".*[a-zA-Z].*") and - not getEnclosingClass(const).getAChild().(RegExpConstant).getValue() = - toOtherCase(const.getValue()) and - not const.getParent*() instanceof RegExpNegativeLookahead and - not const.getParent*() instanceof RegExpNegativeLookbehind +/** Holds if `s` is a relevant regexp term were we want to compute a string that matches the term (for `getCaseSensitiveBypassExample`). */ +predicate isCand(NfaUtils::State s) { + s.getRepr() instanceof NfaUtils::RegExpRoot and + exists(DataFlow::RegExpCreationNode creation | + isCaseSensitiveMiddleware(_, creation, _) and + s.getRepr().getRootTerm() = creation.getRoot() ) } -/** - * Gets a string matched by `term`, or part of such a string. - */ +import NfaUtils::PrefixConstruction as Prefix + +/** Gets a string matched by `term`. */ string getExampleString(RegExpTerm term) { - result = term.getAMatchedString() - or - // getAMatchedString does not recurse into sequences. Perform one step manually. - exists(RegExpSequence seq | seq = term | - result = - strictconcat(RegExpTerm child, int i, string text | - child = seq.getChild(i) and - ( - text = child.getAMatchedString() - or - not exists(child.getAMatchedString()) and - text = "" - ) - | - text order by i - ) - ) + result = Prefix::prefix(any(NfaUtils::State s | s.getRepr() = term)) } string getCaseSensitiveBypassExample(RegExpTerm term) { - exists(string example | - example = getExampleString(term) and - result = toOtherCase(example) and - result != example // getting an example string is approximate; ensure we got a proper case-change example + exists(string byPassExample | + byPassExample = getExampleString(term) and + result = toOtherCase(byPassExample) and + result != byPassExample // getting an byPassExample string is approximate; ensure we got a proper case-change byPassExample ) } @@ -89,7 +63,6 @@ predicate isCaseSensitiveMiddleware( ) and arg = call.getArgument(0) and regexp.getAReference().flowsTo(arg) and - isLikelyCaseSensitiveRegExp(regexp.getRoot()) and exists(string flags | flags = regexp.getFlags() and not RegExp::isIgnoreCase(flags) @@ -108,14 +81,64 @@ predicate isGuardedCaseInsensitiveEndpoint( ) } +/** + * Gets an example path that will hit `endpoint`. + * Query parameters (e.g. the ":param" in "/foo/:param") have been replaced with example values. + */ +string getAnEndpointExample(Routing::RouteSetup endpoint) { + exists(string raw | + raw = endpoint.getRelativePath() and + result = raw.regexpReplaceAll(":\\w+\\b|\\*", ["a", "1"]) + ) +} + +import semmle.javascript.security.regexp.RegexpMatching as RegexpMatching + +NfaUtils::RegExpRoot getARoot(DataFlow::RegExpCreationNode creator) { + result.getRootTerm() = creator.getRoot() +} + +/** + * Holds if the regexp matcher should test whether `root` matches `str`. + * The result is used to test whether a case-sensitive bypass exists. + */ +predicate isMatchingCandidate( + RegexpMatching::RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups +) { + exists( + Routing::RouteSetup middleware, Routing::RouteSetup endPoint, + DataFlow::RegExpCreationNode regexp + | + isCaseSensitiveMiddleware(middleware, regexp, _) and + isGuardedCaseInsensitiveEndpoint(endPoint, middleware) + | + root = regexp.getRoot() and + exists(getCaseSensitiveBypassExample(getARoot(regexp))) and + ignorePrefix = true and + testWithGroups = false and + str = + [ + getCaseSensitiveBypassExample(getARoot(regexp)), getAnEndpointExample(endPoint), + toOtherCase(getAnEndpointExample(endPoint)) + ] + ) +} + +import RegexpMatching::RegexpMatching as Matcher + from DataFlow::RegExpCreationNode regexp, Routing::RouteSetup middleware, Routing::RouteSetup endpoint, - DataFlow::Node arg, string example + DataFlow::Node arg, string byPassExample, string endpointExample, string byPassEndPoint where isCaseSensitiveMiddleware(middleware, regexp, arg) and - example = getCaseSensitiveBypassExample(regexp.getRoot()) and + byPassExample = getCaseSensitiveBypassExample(getARoot(regexp)) and isGuardedCaseInsensitiveEndpoint(endpoint, middleware) and - exists(endpoint.getRelativePath().toLowerCase().indexOf(example.toLowerCase())) + // only report one example. + endpointExample = + min(string ex | ex = getAnEndpointExample(endpoint) and Matcher::matches(regexp.getRoot(), ex)) and + not Matcher::matches(regexp.getRoot(), byPassExample) and + byPassEndPoint = toOtherCase(endpointExample) and + not Matcher::matches(regexp.getRoot(), byPassEndPoint) select arg, "This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '" - + example + "' will bypass the middleware.", regexp, "pattern", endpoint, "here" + + byPassEndPoint + "' will bypass the middleware.", regexp, "pattern", endpoint, "here" diff --git a/javascript/ql/test/query-tests/Security/CWE-178/CaseSensitiveMiddlewarePath.expected b/javascript/ql/test/query-tests/Security/CWE-178/CaseSensitiveMiddlewarePath.expected index a79b3100a80..2e8cf4ee415 100644 --- a/javascript/ql/test/query-tests/Security/CWE-178/CaseSensitiveMiddlewarePath.expected +++ b/javascript/ql/test/query-tests/Security/CWE-178/CaseSensitiveMiddlewarePath.expected @@ -1,3 +1,7 @@ -| tst.js:8:9:8:19 | /\\/foo\\/.*/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/' will bypass the middleware. | tst.js:8:9:8:19 | /\\/foo\\/.*/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here | -| tst.js:14:5:14:28 | new Reg ... (.*)?') | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO' will bypass the middleware. | tst.js:14:5:14:28 | new Reg ... (.*)?') | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here | -| tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/' will bypass the middleware. | tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here | +| tst.js:8:9:8:19 | /\\/foo\\/.*/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/1' will bypass the middleware. | tst.js:8:9:8:19 | /\\/foo\\/.*/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here | +| tst.js:14:5:14:28 | new Reg ... (.*)?') | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/1' will bypass the middleware. | tst.js:14:5:14:28 | new Reg ... (.*)?') | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here | +| tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/1' will bypass the middleware. | tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here | +| tst.js:64:5:64:28 | new Reg ... (.*)?') | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/BAR/1' will bypass the middleware. | tst.js:64:5:64:28 | new Reg ... (.*)?') | pattern | tst.js:73:1:74:2 | app.get ... ware\\n}) | here | +| tst.js:76:9:76:20 | /\\/baz\\/bla/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/BAZ/BLA' will bypass the middleware. | tst.js:76:9:76:20 | /\\/baz\\/bla/ | pattern | tst.js:77:1:79:2 | app.get ... });\\n}) | here | +| tst.js:86:9:86:30 | /\\/[Bb] ... 3\\/[a]/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/BAZ3/A' will bypass the middleware. | tst.js:86:9:86:30 | /\\/[Bb] ... 3\\/[a]/ | pattern | tst.js:87:1:89:2 | app.get ... });\\n}) | here | +| tst.js:91:9:91:40 | /\\/summ ... ntGame/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/CURRENTGAME' will bypass the middleware. | tst.js:91:9:91:40 | /\\/summ ... ntGame/ | pattern | tst.js:93:1:95:2 | app.get ... O");\\n}) | here | diff --git a/javascript/ql/test/query-tests/Security/CWE-178/tst.js b/javascript/ql/test/query-tests/Security/CWE-178/tst.js index 1acb57b16ea..5fcc3cc94a0 100644 --- a/javascript/ql/test/query-tests/Security/CWE-178/tst.js +++ b/javascript/ql/test/query-tests/Security/CWE-178/tst.js @@ -59,3 +59,37 @@ app.use(/\/foo\/([0-9]+)/i, (req, res) => { // OK - not middleware (also case in app.get('/foo/:param', (req, res) => { // OK - not a middleware }); + +app.get( + new RegExp('^/bar(.*)?'), // NOT OK - case sensitive + unknown(), + function(req, res, next) { + if (req.params.blah) { + next(); + } + } +); + +app.get('/bar/*', (req, res) => { // OK - not a middleware +}); + +app.use(/\/baz\/bla/, unknown()); // NOT OK - case sensitive +app.get('/baz/bla', (req, resp) => { + resp.send({ test: 123 }); +}); + +app.use(/\/[Bb][Aa][Zz]2\/[aA]/, unknown()); // OK - not case sensitive +app.get('/baz2/a', (req, resp) => { + resp.send({ test: 123 }); +}); + +app.use(/\/[Bb][Aa][Zz]3\/[a]/, unknown()); // NOT OK - case sensitive +app.get('/baz3/a', (req, resp) => { + resp.send({ test: 123 }); +}); + +app.use(/\/summonerByName|\/currentGame/,apiLimit1, apiLimit2); + +app.get('/currentGame', function (req, res) { + res.send("FOO"); +}); diff --git a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll index fdcb10c2f85..033b8aa8cfd 100644 --- a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll +++ b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll @@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm { * Holds if this root term is relevant to the ReDoS analysis. */ predicate isRelevant() { - // there is at least one repetition - getRoot(any(InfiniteRepetitionQuantifier q)) = this and // is actually used as a RegExp this.isUsedAsRegExp() and // not excluded for library specific reasons @@ -877,6 +875,101 @@ predicate isStartState(State state) { */ signature predicate isCandidateSig(State state, string pump); +/** + * Holds if `state` is a candidate for ReDoS. + */ +signature predicate isCandidateSig(State state); + +/** + * Predicates for constructing a prefix string that leads to a given state. + */ +module PrefixConstruction { + /** + * Holds if `state` is the textually last start state for the regular expression. + */ + private predicate lastStartState(State state) { + exists(RegExpRoot root | + state = + max(State s, Location l | + s = stateInRelevantRegexp() and + isStartState(s) and + getRoot(s.getRepr()) = root and + l = s.getRepr().getLocation() + | + s + order by + l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(), + l.getEndLine() + ) + ) + } + + /** + * Holds if there exists any transition (Epsilon() or other) from `a` to `b`. + */ + private predicate existsTransition(State a, State b) { delta(a, _, b) } + + /** + * Gets the minimum number of transitions it takes to reach `state` from the `start` state. + */ + int prefixLength(State start, State state) = + shortestDistances(lastStartState/1, existsTransition/2)(start, state, result) + + /** + * Gets the minimum number of transitions it takes to reach `state` from the start state. + */ + private int lengthFromStart(State state) { result = prefixLength(_, state) } + + /** + * Gets a string for which the regular expression will reach `state`. + * + * Has at most one result for any given `state`. + * This predicate will not always have a result even if there is a ReDoS issue in + * the regular expression. + */ + string prefix(State state) { + lastStartState(state) and + result = "" + or + // the search stops past the last redos candidate state. + lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and + exists(State prev | + // select a unique predecessor (by an arbitrary measure) + prev = + min(State s, Location loc | + lengthFromStart(s) = lengthFromStart(state) - 1 and + loc = s.getRepr().getLocation() and + delta(s, _, state) + | + s + order by + loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(), + s.getRepr().toString() + ) + | + // greedy search for the shortest prefix + result = prefix(prev) and delta(prev, Epsilon(), state) + or + not delta(prev, Epsilon(), state) and + result = prefix(prev) + getCanonicalEdgeChar(prev, state) + ) + } + + /** + * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA. + */ + private string getCanonicalEdgeChar(State prev, State next) { + result = + min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) + } + + /** Gets a state within a regular expression that contains a candidate state. */ + pragma[noinline] + State stateInRelevantRegexp() { + exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr())) + } +} + /** * A module for pruning candidate ReDoS states. * The candidates are specified by the `isCandidate` signature predicate. @@ -910,95 +1003,9 @@ module ReDoSPruning { /** Gets a state that can reach the `accept-any` state using only epsilon steps. */ private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) } - /** - * Predicates for constructing a prefix string that leads to a given state. - */ - private module PrefixConstruction { - /** - * Holds if `state` is the textually last start state for the regular expression. - */ - private predicate lastStartState(State state) { - exists(RegExpRoot root | - state = - max(State s, Location l | - s = stateInPumpableRegexp() and - isStartState(s) and - getRoot(s.getRepr()) = root and - l = s.getRepr().getLocation() - | - s - order by - l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(), - l.getEndLine() - ) - ) - } + predicate isCandidateState(State s) { isReDoSCandidate(s, _) } - /** - * Holds if there exists any transition (Epsilon() or other) from `a` to `b`. - */ - private predicate existsTransition(State a, State b) { delta(a, _, b) } - - /** - * Gets the minimum number of transitions it takes to reach `state` from the `start` state. - */ - int prefixLength(State start, State state) = - shortestDistances(lastStartState/1, existsTransition/2)(start, state, result) - - /** - * Gets the minimum number of transitions it takes to reach `state` from the start state. - */ - private int lengthFromStart(State state) { result = prefixLength(_, state) } - - /** - * Gets a string for which the regular expression will reach `state`. - * - * Has at most one result for any given `state`. - * This predicate will not always have a result even if there is a ReDoS issue in - * the regular expression. - */ - string prefix(State state) { - lastStartState(state) and - result = "" - or - // the search stops past the last redos candidate state. - lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and - exists(State prev | - // select a unique predecessor (by an arbitrary measure) - prev = - min(State s, Location loc | - lengthFromStart(s) = lengthFromStart(state) - 1 and - loc = s.getRepr().getLocation() and - delta(s, _, state) - | - s - order by - loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(), - s.getRepr().toString() - ) - | - // greedy search for the shortest prefix - result = prefix(prev) and delta(prev, Epsilon(), state) - or - not delta(prev, Epsilon(), state) and - result = prefix(prev) + getCanonicalEdgeChar(prev, state) - ) - } - - /** - * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA. - */ - private string getCanonicalEdgeChar(State prev, State next) { - result = - min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) - } - - /** Gets a state within a regular expression that has a pumpable state. */ - pragma[noinline] - State stateInPumpableRegexp() { - exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr())) - } - } + import PrefixConstruction as Prefix /** * Predicates for testing the presence of a rejecting suffix. @@ -1018,8 +1025,6 @@ module ReDoSPruning { * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes. */ private module SuffixConstruction { - import PrefixConstruction - /** * Holds if all states reachable from `fork` by repeating `w` * are likely rejectable by appending some suffix. @@ -1036,7 +1041,7 @@ module ReDoSPruning { */ pragma[noinline] private predicate isLikelyRejectable(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and ( // exists a reject edge with some char. hasRejectEdge(s) @@ -1052,7 +1057,7 @@ module ReDoSPruning { * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state. */ predicate isRejectState(State s) { - s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_) + s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_) } /** @@ -1060,7 +1065,7 @@ module ReDoSPruning { */ pragma[noopt] predicate hasEdgeToLikelyRejectable(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and // all edges (at least one) with some char leads to another state that is rejectable. // the `next` states might not share a common suffix, which can cause FPs. exists(string char | char = hasEdgeToLikelyRejectableHelper(s) | @@ -1076,7 +1081,7 @@ module ReDoSPruning { */ pragma[noinline] private string hasEdgeToLikelyRejectableHelper(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and not hasRejectEdge(s) and not isRejectState(s) and deltaClosedChar(s, result, _) @@ -1088,8 +1093,8 @@ module ReDoSPruning { * `prev` to `next` that the character symbol `char`. */ predicate deltaClosedChar(State prev, string char, State next) { - prev = stateInPumpableRegexp() and - next = stateInPumpableRegexp() and + prev = Prefix::stateInRelevantRegexp() and + next = Prefix::stateInRelevantRegexp() and deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next) } @@ -1099,18 +1104,28 @@ module ReDoSPruning { result = getAnInputSymbolMatching(char) } + pragma[noinline] + RegExpRoot relevantRoot() { + exists(RegExpTerm term, State s | + s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm() + ) + } + /** * Gets a char used for finding possible suffixes inside `root`. */ pragma[noinline] private string relevant(RegExpRoot root) { - exists(ascii(result)) and exists(root) - or - exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) - or - // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). - // The three chars must be kept in sync with `hasSimpleRejectEdge`. - result = ["|", "\n", "Z"] and exists(root) + root = relevantRoot() and + ( + exists(ascii(result)) and exists(root) + or + exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) + or + // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). + // The three chars must be kept in sync with `hasSimpleRejectEdge`. + result = ["|", "\n", "Z"] and exists(root) + ) } /** @@ -1208,12 +1223,12 @@ module ReDoSPruning { predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) { isReDoSAttackable(t, pump, s) and ( - prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and - not PrefixConstruction::prefix(s) = "" + prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and + not Prefix::prefix(s) = "" or - PrefixConstruction::prefix(s) = "" and prefixMsg = "" + Prefix::prefix(s) = "" and prefixMsg = "" or - not exists(PrefixConstruction::prefix(s)) and prefixMsg = "" + not exists(Prefix::prefix(s)) and prefixMsg = "" ) } diff --git a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll index fdcb10c2f85..033b8aa8cfd 100644 --- a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll +++ b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll @@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm { * Holds if this root term is relevant to the ReDoS analysis. */ predicate isRelevant() { - // there is at least one repetition - getRoot(any(InfiniteRepetitionQuantifier q)) = this and // is actually used as a RegExp this.isUsedAsRegExp() and // not excluded for library specific reasons @@ -877,6 +875,101 @@ predicate isStartState(State state) { */ signature predicate isCandidateSig(State state, string pump); +/** + * Holds if `state` is a candidate for ReDoS. + */ +signature predicate isCandidateSig(State state); + +/** + * Predicates for constructing a prefix string that leads to a given state. + */ +module PrefixConstruction { + /** + * Holds if `state` is the textually last start state for the regular expression. + */ + private predicate lastStartState(State state) { + exists(RegExpRoot root | + state = + max(State s, Location l | + s = stateInRelevantRegexp() and + isStartState(s) and + getRoot(s.getRepr()) = root and + l = s.getRepr().getLocation() + | + s + order by + l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(), + l.getEndLine() + ) + ) + } + + /** + * Holds if there exists any transition (Epsilon() or other) from `a` to `b`. + */ + private predicate existsTransition(State a, State b) { delta(a, _, b) } + + /** + * Gets the minimum number of transitions it takes to reach `state` from the `start` state. + */ + int prefixLength(State start, State state) = + shortestDistances(lastStartState/1, existsTransition/2)(start, state, result) + + /** + * Gets the minimum number of transitions it takes to reach `state` from the start state. + */ + private int lengthFromStart(State state) { result = prefixLength(_, state) } + + /** + * Gets a string for which the regular expression will reach `state`. + * + * Has at most one result for any given `state`. + * This predicate will not always have a result even if there is a ReDoS issue in + * the regular expression. + */ + string prefix(State state) { + lastStartState(state) and + result = "" + or + // the search stops past the last redos candidate state. + lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and + exists(State prev | + // select a unique predecessor (by an arbitrary measure) + prev = + min(State s, Location loc | + lengthFromStart(s) = lengthFromStart(state) - 1 and + loc = s.getRepr().getLocation() and + delta(s, _, state) + | + s + order by + loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(), + s.getRepr().toString() + ) + | + // greedy search for the shortest prefix + result = prefix(prev) and delta(prev, Epsilon(), state) + or + not delta(prev, Epsilon(), state) and + result = prefix(prev) + getCanonicalEdgeChar(prev, state) + ) + } + + /** + * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA. + */ + private string getCanonicalEdgeChar(State prev, State next) { + result = + min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) + } + + /** Gets a state within a regular expression that contains a candidate state. */ + pragma[noinline] + State stateInRelevantRegexp() { + exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr())) + } +} + /** * A module for pruning candidate ReDoS states. * The candidates are specified by the `isCandidate` signature predicate. @@ -910,95 +1003,9 @@ module ReDoSPruning { /** Gets a state that can reach the `accept-any` state using only epsilon steps. */ private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) } - /** - * Predicates for constructing a prefix string that leads to a given state. - */ - private module PrefixConstruction { - /** - * Holds if `state` is the textually last start state for the regular expression. - */ - private predicate lastStartState(State state) { - exists(RegExpRoot root | - state = - max(State s, Location l | - s = stateInPumpableRegexp() and - isStartState(s) and - getRoot(s.getRepr()) = root and - l = s.getRepr().getLocation() - | - s - order by - l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(), - l.getEndLine() - ) - ) - } + predicate isCandidateState(State s) { isReDoSCandidate(s, _) } - /** - * Holds if there exists any transition (Epsilon() or other) from `a` to `b`. - */ - private predicate existsTransition(State a, State b) { delta(a, _, b) } - - /** - * Gets the minimum number of transitions it takes to reach `state` from the `start` state. - */ - int prefixLength(State start, State state) = - shortestDistances(lastStartState/1, existsTransition/2)(start, state, result) - - /** - * Gets the minimum number of transitions it takes to reach `state` from the start state. - */ - private int lengthFromStart(State state) { result = prefixLength(_, state) } - - /** - * Gets a string for which the regular expression will reach `state`. - * - * Has at most one result for any given `state`. - * This predicate will not always have a result even if there is a ReDoS issue in - * the regular expression. - */ - string prefix(State state) { - lastStartState(state) and - result = "" - or - // the search stops past the last redos candidate state. - lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and - exists(State prev | - // select a unique predecessor (by an arbitrary measure) - prev = - min(State s, Location loc | - lengthFromStart(s) = lengthFromStart(state) - 1 and - loc = s.getRepr().getLocation() and - delta(s, _, state) - | - s - order by - loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(), - s.getRepr().toString() - ) - | - // greedy search for the shortest prefix - result = prefix(prev) and delta(prev, Epsilon(), state) - or - not delta(prev, Epsilon(), state) and - result = prefix(prev) + getCanonicalEdgeChar(prev, state) - ) - } - - /** - * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA. - */ - private string getCanonicalEdgeChar(State prev, State next) { - result = - min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) - } - - /** Gets a state within a regular expression that has a pumpable state. */ - pragma[noinline] - State stateInPumpableRegexp() { - exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr())) - } - } + import PrefixConstruction as Prefix /** * Predicates for testing the presence of a rejecting suffix. @@ -1018,8 +1025,6 @@ module ReDoSPruning { * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes. */ private module SuffixConstruction { - import PrefixConstruction - /** * Holds if all states reachable from `fork` by repeating `w` * are likely rejectable by appending some suffix. @@ -1036,7 +1041,7 @@ module ReDoSPruning { */ pragma[noinline] private predicate isLikelyRejectable(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and ( // exists a reject edge with some char. hasRejectEdge(s) @@ -1052,7 +1057,7 @@ module ReDoSPruning { * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state. */ predicate isRejectState(State s) { - s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_) + s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_) } /** @@ -1060,7 +1065,7 @@ module ReDoSPruning { */ pragma[noopt] predicate hasEdgeToLikelyRejectable(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and // all edges (at least one) with some char leads to another state that is rejectable. // the `next` states might not share a common suffix, which can cause FPs. exists(string char | char = hasEdgeToLikelyRejectableHelper(s) | @@ -1076,7 +1081,7 @@ module ReDoSPruning { */ pragma[noinline] private string hasEdgeToLikelyRejectableHelper(State s) { - s = stateInPumpableRegexp() and + s = Prefix::stateInRelevantRegexp() and not hasRejectEdge(s) and not isRejectState(s) and deltaClosedChar(s, result, _) @@ -1088,8 +1093,8 @@ module ReDoSPruning { * `prev` to `next` that the character symbol `char`. */ predicate deltaClosedChar(State prev, string char, State next) { - prev = stateInPumpableRegexp() and - next = stateInPumpableRegexp() and + prev = Prefix::stateInRelevantRegexp() and + next = Prefix::stateInRelevantRegexp() and deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next) } @@ -1099,18 +1104,28 @@ module ReDoSPruning { result = getAnInputSymbolMatching(char) } + pragma[noinline] + RegExpRoot relevantRoot() { + exists(RegExpTerm term, State s | + s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm() + ) + } + /** * Gets a char used for finding possible suffixes inside `root`. */ pragma[noinline] private string relevant(RegExpRoot root) { - exists(ascii(result)) and exists(root) - or - exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) - or - // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). - // The three chars must be kept in sync with `hasSimpleRejectEdge`. - result = ["|", "\n", "Z"] and exists(root) + root = relevantRoot() and + ( + exists(ascii(result)) and exists(root) + or + exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) + or + // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). + // The three chars must be kept in sync with `hasSimpleRejectEdge`. + result = ["|", "\n", "Z"] and exists(root) + ) } /** @@ -1208,12 +1223,12 @@ module ReDoSPruning { predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) { isReDoSAttackable(t, pump, s) and ( - prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and - not PrefixConstruction::prefix(s) = "" + prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and + not Prefix::prefix(s) = "" or - PrefixConstruction::prefix(s) = "" and prefixMsg = "" + Prefix::prefix(s) = "" and prefixMsg = "" or - not exists(PrefixConstruction::prefix(s)) and prefixMsg = "" + not exists(Prefix::prefix(s)) and prefixMsg = "" ) }