mirror of
https://github.com/github/codeql.git
synced 2026-04-29 18:55:14 +02:00
add a step over empty lookaheads/lookbehinds
This commit is contained in:
@@ -71,6 +71,49 @@ private int ascii(string char) {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` matches at least an epsilon symbol.
|
||||
*
|
||||
* That is, this term does not restrict the language of the enclosing regular expression.
|
||||
*
|
||||
* This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular.
|
||||
*/
|
||||
predicate matchesEpsilon(RegExpTerm t) {
|
||||
t instanceof RegExpStar
|
||||
or
|
||||
t instanceof RegExpOpt
|
||||
or
|
||||
t.(RegExpRange).getLowerBound() = 0
|
||||
or
|
||||
exists(RegExpTerm child |
|
||||
child = t.getAChild() and
|
||||
matchesEpsilon(child)
|
||||
|
|
||||
t instanceof RegExpAlt or
|
||||
t instanceof RegExpGroup or
|
||||
t instanceof RegExpPlus or
|
||||
t instanceof RegExpRange
|
||||
)
|
||||
or
|
||||
matchesEpsilon(t.(RegExpBackRef).getGroup())
|
||||
or
|
||||
forex(RegExpTerm child | child = t.(RegExpSequence).getAChild() | matchesEpsilon(child))
|
||||
}
|
||||
|
||||
/**
|
||||
* A lookahead/lookbehind that matches the empty string.
|
||||
*/
|
||||
class EmptyPositiveSubPatttern extends RegExpSubPattern {
|
||||
EmptyPositiveSubPatttern() {
|
||||
(
|
||||
this instanceof RegExpPositiveLookahead
|
||||
or
|
||||
this instanceof RegExpPositiveLookbehind
|
||||
) and
|
||||
matchesEpsilon(this.getOperand())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A branch in a disjunction that is the root node in a literal, or a literal
|
||||
* whose root node is not a disjunction.
|
||||
@@ -550,6 +593,10 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
|
||||
exists(RegExpDollar dollar | q1 = before(dollar) |
|
||||
lbl = Epsilon() and q2 = Accept(getRoot(dollar))
|
||||
)
|
||||
or
|
||||
exists(EmptyPositiveSubPatttern empty | q1 = before(empty) |
|
||||
lbl = Epsilon() and q2 = after(empty)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -363,35 +363,6 @@ predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` matches at least an epsilon symbol.
|
||||
*
|
||||
* That is, this term does not restrict the language of the enclosing regular expression.
|
||||
*
|
||||
* This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular.
|
||||
*/
|
||||
private predicate matchesEpsilon(RegExpTerm t) {
|
||||
t instanceof RegExpStar
|
||||
or
|
||||
t instanceof RegExpOpt
|
||||
or
|
||||
t.(RegExpRange).getLowerBound() = 0
|
||||
or
|
||||
exists(RegExpTerm child |
|
||||
child = t.getAChild() and
|
||||
matchesEpsilon(child)
|
||||
|
|
||||
t instanceof RegExpAlt or
|
||||
t instanceof RegExpGroup or
|
||||
t instanceof RegExpPlus or
|
||||
t instanceof RegExpRange
|
||||
)
|
||||
or
|
||||
matchesEpsilon(t.(RegExpBackRef).getGroup())
|
||||
or
|
||||
forex(RegExpTerm child | child = t.(RegExpSequence).getAChild() | matchesEpsilon(child))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a message for why `term` can cause polynomial backtracking.
|
||||
*/
|
||||
|
||||
@@ -493,3 +493,7 @@
|
||||
| tst.js:351:15:351:16 | a+ | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a+)* |
|
||||
| tst.js:352:15:352:16 | a* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a*)+b |
|
||||
| tst.js:353:15:353:16 | a+ | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a+)+ |
|
||||
| tst.js:372:16:372:21 | [^"]*? | Strings starting with '"' and with many repetitions of '""' can start matching anywhere after the start of the preceeding ("[^"]*?"\|[^"\\s]+)+(?=\\s*\|\\s*$)X |
|
||||
| tst.js:372:24:372:30 | [^"\\s]+ | Strings with many repetitions of '!' can start matching anywhere after the start of the preceeding ("[^"]*?"\|[^"\\s]+)+ |
|
||||
| tst.js:373:16:373:21 | [^"]*? | Strings starting with '"' and with many repetitions of '""' can start matching anywhere after the start of the preceeding ("[^"]*?"\|[^"\\s]+)+(?=X) |
|
||||
| tst.js:373:24:373:30 | [^"\\s]+ | Strings with many repetitions of '!' can start matching anywhere after the start of the preceeding ("[^"]*?"\|[^"\\s]+)+ |
|
||||
|
||||
@@ -172,3 +172,5 @@
|
||||
| tst.js:361:15:361:33 | ((?:a{0\|-)\|\\w\\{\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0'. |
|
||||
| tst.js:362:15:362:35 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
|
||||
| tst.js:363:15:363:38 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
|
||||
| tst.js:372:24:372:30 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
|
||||
| tst.js:373:24:373:30 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
|
||||
|
||||
@@ -363,4 +363,11 @@ var bad85 = /^((?:a{0,|-)|\w\{\d,)+X$/;
|
||||
var bad86 = /^((?:a{0,2|-)|\w\{\d,\d)+X$/;
|
||||
|
||||
// GOOD:
|
||||
var good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/;
|
||||
var good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/;
|
||||
|
||||
// GOOD
|
||||
var good43 = /("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)/g;
|
||||
|
||||
// BAD
|
||||
var bad87 = /("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)X/g;
|
||||
var bad88 = /("[^"]*?"|[^"\s]+)+(?=X)/g;
|
||||
Reference in New Issue
Block a user