get ReDoSUtil in sync for ruby

This commit is contained in:
Erik Krogh Kristensen
2021-11-18 16:31:45 +01:00
parent 6c2713dd8b
commit ee858d840e
9 changed files with 138 additions and 187 deletions

View File

@@ -218,7 +218,7 @@ private newtype TInputSymbol =
recc instanceof RegExpCharacterClass and
not recc.(RegExpCharacterClass).isUniversalClass()
or
recc instanceof RegExpCharacterClassEscape
isEscapeClass(recc, _)
)
} or
/** An input symbol representing all characters matched by `.`. */
@@ -340,13 +340,13 @@ private module CharacterClasses {
char <= hi
)
or
exists(RegExpCharacterClassEscape escape | escape = child |
escape.getValue() = escape.getValue().toLowerCase() and
classEscapeMatches(escape.getValue(), char)
exists(string charClass | isEscapeClass(child, charClass) |
charClass.toLowerCase() = charClass and
classEscapeMatches(charClass, char)
or
char = getARelevantChar() and
escape.getValue() = escape.getValue().toUpperCase() and
not classEscapeMatches(escape.getValue().toLowerCase(), char)
charClass.toUpperCase() = charClass and
not classEscapeMatches(charClass, char)
)
)
}
@@ -409,10 +409,10 @@ private module CharacterClasses {
or
child.(RegExpCharacterRange).isRange(_, result)
or
exists(RegExpCharacterClassEscape escape | child = escape |
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
exists(string charClass | isEscapeClass(child, charClass) |
result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
or
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
)
)
}
@@ -466,33 +466,36 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \d, \s, and \w.
*/
private class PositiveCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
PositiveCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["d", "s", "w"]
}
override string getARelevantChar() {
cc.getValue() = "d" and
charClass = "d" and
result = ["0", "9"]
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = ["a", "Z", "_", "0", "9"]
}
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
override predicate matches(string char) { classEscapeMatches(charClass, char) }
override string choose() {
cc.getValue() = "d" and
charClass = "d" and
result = "9"
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = "a"
}
}
@@ -501,26 +504,29 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \D, \S, and \W.
*/
private class NegativeCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
NegativeCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["D", "S", "W"]
}
override string getARelevantChar() {
cc.getValue() = "D" and
charClass = "D" and
result = ["a", "Z", "!"]
or
cc.getValue() = "S" and
charClass = "S" and
result = ["a", "9", "!"]
or
cc.getValue() = "W" and
charClass = "W" and
result = [" ", "!"]
}
bindingset[char]
override predicate matches(string char) {
not classEscapeMatches(cc.getValue().toLowerCase(), char)
not classEscapeMatches(charClass.toLowerCase(), char)
}
}
}
@@ -599,7 +605,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
q2 = after(cc)
)
or
exists(RegExpCharacterClassEscape cc |
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
q2 = after(cc)

View File

@@ -5,6 +5,14 @@
import python
import semmle.python.RegexTreeView
/**
* Holds if `term` is an ecape class representing e.g. `\d`.
* `clazz` is which character class it represents, e.g. "d" for `\d`.
*/
predicate isEscapeClass(RegExpTerm term, string clazz) {
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
}
/**
* Holds if the regular expression should not be considered.
*