mirror of
https://github.com/github/codeql.git
synced 2025-12-24 04:36:35 +01:00
sync changes to other languages
This commit is contained in:
@@ -49,6 +49,9 @@ class HtmlMatchingRegExp extends RootTerm {
|
||||
}
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for HtmlMatchingRegExp */
|
||||
deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp;
|
||||
|
||||
/**
|
||||
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
|
||||
*
|
||||
|
||||
@@ -1027,7 +1027,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
|
||||
isReDoSCandidate(fork, w) and
|
||||
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next)) and
|
||||
not epsilonSucc*(getProcessPrevious(fork, _, w)) = AcceptAnySuffix(_) // we stop `process(..)` early if we can, check here if it happened.
|
||||
not getProcessPrevious(fork, _, w) = acceptsAnySuffix() // we stop `process(..)` early if we can, check here if it happened.
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1284,7 +1284,7 @@ module Concretizer<CharTree Impl> {
|
||||
private predicate isRelevant(Node n) {
|
||||
isARelevantEnd(n)
|
||||
or
|
||||
exists(Node prev | isRelevant(prev) | n = getPrev(prev))
|
||||
exists(Node succ | isRelevant(succ) | n = getPrev(succ))
|
||||
}
|
||||
|
||||
/** Holds if `n` is a root with no predecessors. */
|
||||
@@ -1299,19 +1299,15 @@ module Concretizer<CharTree Impl> {
|
||||
}
|
||||
|
||||
/** Gets an ancestor of `end`, where `end` is a node that should have a result in `concretize`. */
|
||||
private Node getANodeInLongChain(Node end) {
|
||||
isARelevantEnd(end) and result = end
|
||||
or
|
||||
exists(Node prev | prev = getANodeInLongChain(end) | result = getPrev(prev))
|
||||
}
|
||||
private Node getAnAncestor(Node end) { isARelevantEnd(end) and result = getPrev*(end) }
|
||||
|
||||
/** Gets the `i`th character on the path from the root to `n`. */
|
||||
pragma[noinline]
|
||||
private string getPrefixChar(Node n, int i) {
|
||||
exists(Node prev |
|
||||
result = getChar(prev) and
|
||||
prev = getANodeInLongChain(n) and
|
||||
i = nodeDepth(prev)
|
||||
exists(Node ancestor |
|
||||
result = getChar(ancestor) and
|
||||
ancestor = getAnAncestor(n) and
|
||||
i = nodeDepth(ancestor)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -28,18 +28,10 @@ signature predicate isRegexpMatchingCandidateSig(
|
||||
* A module for determining if a regexp matches a given string,
|
||||
* and reasoning about which capture groups are filled by a given string.
|
||||
*
|
||||
* The module parameter `isRegexpMatchingCandidateSig` determines which string should be tested,
|
||||
* The module parameter `isCandidate` determines which strings should be tested,
|
||||
* and the results can be read from the `matches` and `fillsCaptureGroup` predicates.
|
||||
*/
|
||||
module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
|
||||
private predicate test(RootTerm reg, string str, boolean ignorePrefix) {
|
||||
isCandidate(reg, str, ignorePrefix, false)
|
||||
}
|
||||
|
||||
private predicate testWithGroups(RootTerm reg, string str, boolean ignorePrefix) {
|
||||
isCandidate(reg, str, ignorePrefix, true)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
|
||||
* The regular expression is modeled as a non-determistic finite automaton,
|
||||
@@ -50,11 +42,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
|
||||
private State getAState(RootTerm reg, int i, string str, boolean ignorePrefix) {
|
||||
// start state, the -1 position before any chars have been matched
|
||||
i = -1 and
|
||||
(
|
||||
test(reg, str, ignorePrefix)
|
||||
or
|
||||
testWithGroups(reg, str, ignorePrefix)
|
||||
) and
|
||||
isCandidate(reg, str, ignorePrefix, _) and
|
||||
result.getRepr().getRootTerm() = reg and
|
||||
isStartState(result)
|
||||
or
|
||||
@@ -109,13 +97,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
|
||||
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
|
||||
pragma[noinline]
|
||||
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
|
||||
exists(string s, RootTerm r |
|
||||
test(r, s, _)
|
||||
or
|
||||
testWithGroups(r, s, _)
|
||||
|
|
||||
char = s.charAt(_)
|
||||
) and
|
||||
exists(string s, RootTerm r | isCandidate(r, s, _, _) | char = s.charAt(_)) and
|
||||
result = getAnInputSymbolMatching(char)
|
||||
}
|
||||
|
||||
@@ -124,7 +106,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
|
||||
* Starts with an accepting state as found by `getAState` and searches backwards
|
||||
* to the start state through the reachable states (as found by `getAState`).
|
||||
*
|
||||
* This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
|
||||
* This predicate satisfies the invariant that the result state can be reached with `i` steps from a start state,
|
||||
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
|
||||
* The result state is therefore always on a valid path where `reg` accepts `str`.
|
||||
*
|
||||
@@ -133,7 +115,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
|
||||
*/
|
||||
private State getAStateThatReachesAccept(RootTerm reg, int i, string str, boolean ignorePrefix) {
|
||||
// base case, reaches an accepting state from the last state in `getAState(..)`
|
||||
testWithGroups(reg, str, ignorePrefix) and
|
||||
isCandidate(reg, str, ignorePrefix, true) and
|
||||
i = str.length() - 1 and
|
||||
result = getAState(reg, i, str, ignorePrefix) and
|
||||
epsilonSucc*(result) = Accept(_)
|
||||
@@ -154,7 +136,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `reg` matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
|
||||
* Holds if `reg` matches `str`, where `str` is in the `isCandidate` predicate.
|
||||
*/
|
||||
predicate matches(RootTerm reg, string str) {
|
||||
exists(State state | state = getAState(reg, str.length() - 1, str, _) |
|
||||
|
||||
@@ -351,7 +351,7 @@ predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump
|
||||
/**
|
||||
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
|
||||
*/
|
||||
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
|
||||
predicate polynomialReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
|
||||
exists(State s, State pivot |
|
||||
ReDoSPruning<isReDoSCandidate/2>::hasReDoSResult(t, pump, s, prefixMsg) and
|
||||
isPumpable(pivot, s, _) and
|
||||
@@ -363,7 +363,7 @@ predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm
|
||||
* Gets a message for why `term` can cause polynomial backtracking.
|
||||
*/
|
||||
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
|
||||
polynimalReDoS(term, pump, prefixMsg, prev) and
|
||||
polynomialReDoS(term, pump, prefixMsg, prev) and
|
||||
result =
|
||||
"Strings " + prefixMsg + "with many repetitions of '" + pump +
|
||||
"' can start matching anywhere after the start of the preceeding " + prev
|
||||
|
||||
Reference in New Issue
Block a user