sync changes to other languages

This commit is contained in:
erik-krogh
2022-08-12 20:28:12 +02:00
parent 97681ea219
commit b9e96fb078
10 changed files with 45 additions and 87 deletions

View File

@@ -49,6 +49,9 @@ class HtmlMatchingRegExp extends RootTerm {
}
}
/** DEPRECATED: Alias for HtmlMatchingRegExp */
deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp;
/**
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
*

View File

@@ -1027,7 +1027,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
isReDoSCandidate(fork, w) and
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next)) and
not epsilonSucc*(getProcessPrevious(fork, _, w)) = AcceptAnySuffix(_) // we stop `process(..)` early if we can, check here if it happened.
not getProcessPrevious(fork, _, w) = acceptsAnySuffix() // we stop `process(..)` early if we can, check here if it happened.
}
/**
@@ -1284,7 +1284,7 @@ module Concretizer<CharTree Impl> {
private predicate isRelevant(Node n) {
isARelevantEnd(n)
or
exists(Node prev | isRelevant(prev) | n = getPrev(prev))
exists(Node succ | isRelevant(succ) | n = getPrev(succ))
}
/** Holds if `n` is a root with no predecessors. */
@@ -1299,19 +1299,15 @@ module Concretizer<CharTree Impl> {
}
/** Gets an ancestor of `end`, where `end` is a node that should have a result in `concretize`. */
private Node getANodeInLongChain(Node end) {
isARelevantEnd(end) and result = end
or
exists(Node prev | prev = getANodeInLongChain(end) | result = getPrev(prev))
}
private Node getAnAncestor(Node end) { isARelevantEnd(end) and result = getPrev*(end) }
/** Gets the `i`th character on the path from the root to `n`. */
pragma[noinline]
private string getPrefixChar(Node n, int i) {
exists(Node prev |
result = getChar(prev) and
prev = getANodeInLongChain(n) and
i = nodeDepth(prev)
exists(Node ancestor |
result = getChar(ancestor) and
ancestor = getAnAncestor(n) and
i = nodeDepth(ancestor)
)
}

View File

@@ -28,18 +28,10 @@ signature predicate isRegexpMatchingCandidateSig(
* A module for determining if a regexp matches a given string,
* and reasoning about which capture groups are filled by a given string.
*
* The module parameter `isRegexpMatchingCandidateSig` determines which string should be tested,
* The module parameter `isCandidate` determines which strings should be tested,
* and the results can be read from the `matches` and `fillsCaptureGroup` predicates.
*/
module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
private predicate test(RootTerm reg, string str, boolean ignorePrefix) {
isCandidate(reg, str, ignorePrefix, false)
}
private predicate testWithGroups(RootTerm reg, string str, boolean ignorePrefix) {
isCandidate(reg, str, ignorePrefix, true)
}
/**
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
* The regular expression is modeled as a non-determistic finite automaton,
@@ -50,11 +42,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
private State getAState(RootTerm reg, int i, string str, boolean ignorePrefix) {
// start state, the -1 position before any chars have been matched
i = -1 and
(
test(reg, str, ignorePrefix)
or
testWithGroups(reg, str, ignorePrefix)
) and
isCandidate(reg, str, ignorePrefix, _) and
result.getRepr().getRootTerm() = reg and
isStartState(result)
or
@@ -109,13 +97,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
pragma[noinline]
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
exists(string s, RootTerm r |
test(r, s, _)
or
testWithGroups(r, s, _)
|
char = s.charAt(_)
) and
exists(string s, RootTerm r | isCandidate(r, s, _, _) | char = s.charAt(_)) and
result = getAnInputSymbolMatching(char)
}
@@ -124,7 +106,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
* Starts with an accepting state as found by `getAState` and searches backwards
* to the start state through the reachable states (as found by `getAState`).
*
* This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
* This predicate satisfies the invariant that the result state can be reached with `i` steps from a start state,
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
* The result state is therefore always on a valid path where `reg` accepts `str`.
*
@@ -133,7 +115,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
*/
private State getAStateThatReachesAccept(RootTerm reg, int i, string str, boolean ignorePrefix) {
// base case, reaches an accepting state from the last state in `getAState(..)`
testWithGroups(reg, str, ignorePrefix) and
isCandidate(reg, str, ignorePrefix, true) and
i = str.length() - 1 and
result = getAState(reg, i, str, ignorePrefix) and
epsilonSucc*(result) = Accept(_)
@@ -154,7 +136,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
}
/**
* Holds if `reg` matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
* Holds if `reg` matches `str`, where `str` is in the `isCandidate` predicate.
*/
predicate matches(RootTerm reg, string str) {
exists(State state | state = getAState(reg, str.length() - 1, str, _) |

View File

@@ -351,7 +351,7 @@ predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump
/**
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
*/
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
predicate polynomialReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot |
ReDoSPruning<isReDoSCandidate/2>::hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and
@@ -363,7 +363,7 @@ predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm
* Gets a message for why `term` can cause polynomial backtracking.
*/
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
polynimalReDoS(term, pump, prefixMsg, prev) and
polynomialReDoS(term, pump, prefixMsg, prev) and
result =
"Strings " + prefixMsg + "with many repetitions of '" + pump +
"' can start matching anywhere after the start of the preceeding " + prev