mirror of
https://github.com/github/codeql.git
synced 2026-04-25 08:45:14 +02:00
changes based on review
This commit is contained in:
@@ -17,7 +17,7 @@ import javascript
|
||||
/**
|
||||
* A configuration for which parts of a regular expression should be considered relevant for
|
||||
* the different predicates in `ReDoS.qll`.
|
||||
* Used to adjust the computations for either superliniear or exponential backtracking.
|
||||
* Used to adjust the computations for either superlinear or exponential backtracking.
|
||||
*/
|
||||
abstract class ReDoSConfiguration extends string {
|
||||
bindingset[this]
|
||||
@@ -124,12 +124,12 @@ private class RegexpCharacterConstant extends RegExpConstant {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` is the chosen cannonical representative for all terms with string representation `str`.
|
||||
* Holds if `term` is the chosen canonical representative for all terms with string representation `str`.
|
||||
*
|
||||
* Using cannonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s.
|
||||
* Using canonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s.
|
||||
* The number of `InputSymbol`s is decreased by 3 orders of magnitude or more in some larger benchmarks.
|
||||
*/
|
||||
private predicate isCannonicalTerm(RegExpTerm term, string str) {
|
||||
private predicate isCanonicalTerm(RegExpTerm term, string str) {
|
||||
term =
|
||||
rank[1](RegExpTerm t, Location loc, File file |
|
||||
loc = t.getLocation() and
|
||||
@@ -154,7 +154,7 @@ private newtype TInputSymbol =
|
||||
*/
|
||||
CharClass(string charClassString) {
|
||||
exists(RegExpTerm term | term.getRawValue() = charClassString | getRoot(term).isRelevant()) and
|
||||
exists(RegExpTerm recc | isCannonicalTerm(recc, charClassString) |
|
||||
exists(RegExpTerm recc | isCanonicalTerm(recc, charClassString) |
|
||||
recc instanceof RegExpCharacterClass and
|
||||
not recc.(RegExpCharacterClass).isUniversalClass()
|
||||
or
|
||||
@@ -168,6 +168,13 @@ private newtype TInputSymbol =
|
||||
/** An epsilon transition in the automaton. */
|
||||
Epsilon()
|
||||
|
||||
/**
|
||||
* Gets the canonical CharClass for `term`.
|
||||
*/
|
||||
CharClass getCanonicalCharClass(RegExpTerm term) {
|
||||
exists(string str | isCanonicalTerm(term, str) | result = CharClass(str))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `a` and `b` are input symbols from the same regexp.
|
||||
* (And not a `Dot()`, `Any()` or `Epsilon()`)
|
||||
@@ -251,10 +258,7 @@ private module CharacterClasses {
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate hasChildThatMatches(RegExpCharacterClass cc, string char) {
|
||||
exists(string str |
|
||||
isCannonicalTerm(cc, str) and
|
||||
exists(CharClass(str))
|
||||
) and
|
||||
exists(getCanonicalCharClass(cc)) and
|
||||
exists(RegExpTerm child | child = cc.getAChild() |
|
||||
char = child.(RegexpCharacterConstant).getValue()
|
||||
or
|
||||
@@ -350,9 +354,7 @@ private module CharacterClasses {
|
||||
private class PositiveCharacterClass extends CharacterClass {
|
||||
RegExpCharacterClass cc;
|
||||
|
||||
PositiveCharacterClass() {
|
||||
exists(string str | isCannonicalTerm(cc, str) | this = CharClass(str) and not cc.isInverted())
|
||||
}
|
||||
PositiveCharacterClass() { this = getCanonicalCharClass(cc) and not cc.isInverted() }
|
||||
|
||||
override string getARelevantChar() { result = getAMentionedChar(cc) }
|
||||
|
||||
@@ -365,9 +367,7 @@ private module CharacterClasses {
|
||||
private class InvertedCharacterClass extends CharacterClass {
|
||||
RegExpCharacterClass cc;
|
||||
|
||||
InvertedCharacterClass() {
|
||||
exists(string str | isCannonicalTerm(cc, str) | this = CharClass(str) and cc.isInverted())
|
||||
}
|
||||
InvertedCharacterClass() { this = getCanonicalCharClass(cc) and cc.isInverted() }
|
||||
|
||||
override string getARelevantChar() {
|
||||
result = nextChar(getAMentionedChar(cc)) or
|
||||
@@ -405,9 +405,7 @@ private module CharacterClasses {
|
||||
RegExpCharacterClassEscape cc;
|
||||
|
||||
PositiveCharacterClassEscape() {
|
||||
exists(string str | isCannonicalTerm(cc, str) |
|
||||
this = CharClass(str) and cc.getValue() = ["d", "s", "w"]
|
||||
)
|
||||
this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
|
||||
}
|
||||
|
||||
override string getARelevantChar() {
|
||||
@@ -442,9 +440,7 @@ private module CharacterClasses {
|
||||
RegExpCharacterClassEscape cc;
|
||||
|
||||
NegativeCharacterClassEscape() {
|
||||
exists(string str | isCannonicalTerm(cc, str) |
|
||||
this = CharClass(str) and cc.getValue() = ["D", "S", "W"]
|
||||
)
|
||||
this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
|
||||
}
|
||||
|
||||
override string getARelevantChar() {
|
||||
@@ -802,14 +798,14 @@ private module PrefixConstruction {
|
||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||
or
|
||||
not delta(prev, Epsilon(), state) and
|
||||
result = prefix(prev) + getMinimumEdgeChar(prev, state)
|
||||
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the minimum char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
*/
|
||||
private string getMinimumEdgeChar(State prev, State next) {
|
||||
private string getCanonicalEdgeChar(State prev, State next) {
|
||||
result =
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
|
||||
}
|
||||
@@ -882,6 +878,7 @@ private module SuffixConstruction {
|
||||
// all edges (at least one) with some char leads to another state that is rejectable.
|
||||
// the `next` states might not share a common suffix, which can cause FPs.
|
||||
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
|
||||
// noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
|
||||
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
|
||||
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
|
||||
)
|
||||
@@ -935,7 +932,7 @@ private module SuffixConstruction {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is not edge from `s` labeled with "|", "\n", or "Z" in our NFA.
|
||||
* Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
|
||||
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
|
||||
*/
|
||||
private predicate hasSimpleRejectEdge(State s) {
|
||||
@@ -953,6 +950,8 @@ private module SuffixConstruction {
|
||||
exists(string char, InputSymbol sym |
|
||||
char = w.charAt(i) and
|
||||
deltaClosed(prev, sym, result) and
|
||||
// noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
|
||||
// Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
|
||||
sym = getAProcessInputSymbol(char)
|
||||
)
|
||||
)
|
||||
@@ -1015,8 +1014,8 @@ private string rotate(string str, int i) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` may cause superliniear backtracking on strings containing many repetitions of `pump`.
|
||||
* Gets the minimum possible string that causes superliniear backtracking.
|
||||
* Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
|
||||
* Gets the shortest string that causes superlinear backtracking.
|
||||
*/
|
||||
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
|
||||
exists(int i, string c | s = Match(term, i) |
|
||||
|
||||
@@ -44,10 +44,10 @@ import ReDoSUtil
|
||||
*/
|
||||
|
||||
/**
|
||||
* An instantiaion of `ReDoSConfiguration` for superliniear ReDoS.
|
||||
* An instantiaion of `ReDoSConfiguration` for superlinear ReDoS.
|
||||
*/
|
||||
class SuperLiniearReDoSConfiguration extends ReDoSConfiguration {
|
||||
SuperLiniearReDoSConfiguration() { this = "SuperLiniearReDoSConfiguration" }
|
||||
class SuperLinearReDoSConfiguration extends ReDoSConfiguration {
|
||||
SuperLinearReDoSConfiguration() { this = "SuperLinearReDoSConfiguration" }
|
||||
|
||||
override predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
|
||||
}
|
||||
@@ -109,23 +109,23 @@ private module FeasibleTuple {
|
||||
pragma[inline]
|
||||
predicate isFeasibleTuple(State r1, State r2, State r3) {
|
||||
// The first element is either inside a repetition (or the start state itself)
|
||||
isRepeitionOrStart(r1) and
|
||||
isRepetitionOrStart(r1) and
|
||||
// The last element is inside a repetition
|
||||
stateInsideRepetition(r3) and
|
||||
// The states are reachable in the NFA in the order r1 -> r2 -> r3
|
||||
delta+(r1) = r2 and
|
||||
delta+(r2) = r3 and
|
||||
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
|
||||
canReachATarget(r3) and
|
||||
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
|
||||
canReachABeginning(r1)
|
||||
canReachABeginning(r1) and
|
||||
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
|
||||
canReachATarget(r3)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate isRepeitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
|
||||
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
|
||||
|
||||
/**
|
||||
* Holds if state `s` might be inside a backtracking repetition.
|
||||
@@ -155,7 +155,7 @@ private module FeasibleTuple {
|
||||
/**
|
||||
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
|
||||
*
|
||||
* There is a slight implementation difference compared to the paper: this predicate require that `pivot != succ`.
|
||||
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
|
||||
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
|
||||
*/
|
||||
predicate isStartLoops(State pivot, State succ) {
|
||||
@@ -163,7 +163,7 @@ predicate isStartLoops(State pivot, State succ) {
|
||||
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
|
||||
delta+(pivot) = succ and
|
||||
(
|
||||
pivot.getRepr() = any(InfiniteRepetitionQuantifier i)
|
||||
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
|
||||
or
|
||||
pivot = mkMatch(any(RegExpRoot root))
|
||||
)
|
||||
@@ -333,7 +333,7 @@ StateTuple getAnEndTuple(State pivot, State succ) {
|
||||
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
|
||||
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
|
||||
*
|
||||
* This predicate is used by `SuperLiniearReDoSConfiguration`, and the final results are
|
||||
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
|
||||
* available in the `hasReDoSResult` predicate.
|
||||
*/
|
||||
predicate isPumpable(State pivot, State succ, string pump) {
|
||||
|
||||
Reference in New Issue
Block a user