get ReDoSUtil in sync for ruby

This commit is contained in:
Erik Krogh Kristensen
2021-11-18 16:31:45 +01:00
parent 6c2713dd8b
commit ee858d840e
9 changed files with 138 additions and 187 deletions

View File

@@ -460,9 +460,10 @@
"javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll",
"python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll"
],
"ReDoS Util Python/JS": [
"ReDoS Util Python/JS/Ruby": [
"javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll",
"python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll"
"python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll",
"ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll"
],
"ReDoS Exponential Python/JS": [
"javascript/ql/lib/semmle/javascript/security/performance/ExponentialBackTracking.qll",

View File

@@ -218,7 +218,7 @@ private newtype TInputSymbol =
recc instanceof RegExpCharacterClass and
not recc.(RegExpCharacterClass).isUniversalClass()
or
recc instanceof RegExpCharacterClassEscape
isEscapeClass(recc, _)
)
} or
/** An input symbol representing all characters matched by `.`. */
@@ -340,13 +340,13 @@ private module CharacterClasses {
char <= hi
)
or
exists(RegExpCharacterClassEscape escape | escape = child |
escape.getValue() = escape.getValue().toLowerCase() and
classEscapeMatches(escape.getValue(), char)
exists(string charClass | isEscapeClass(child, charClass) |
charClass.toLowerCase() = charClass and
classEscapeMatches(charClass, char)
or
char = getARelevantChar() and
escape.getValue() = escape.getValue().toUpperCase() and
not classEscapeMatches(escape.getValue().toLowerCase(), char)
charClass.toUpperCase() = charClass and
not classEscapeMatches(charClass, char)
)
)
}
@@ -409,10 +409,10 @@ private module CharacterClasses {
or
child.(RegExpCharacterRange).isRange(_, result)
or
exists(RegExpCharacterClassEscape escape | child = escape |
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
exists(string charClass | isEscapeClass(child, charClass) |
result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
or
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
)
)
}
@@ -466,33 +466,36 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \d, \s, and \w.
*/
private class PositiveCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
PositiveCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["d", "s", "w"]
}
override string getARelevantChar() {
cc.getValue() = "d" and
charClass = "d" and
result = ["0", "9"]
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = ["a", "Z", "_", "0", "9"]
}
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
override predicate matches(string char) { classEscapeMatches(charClass, char) }
override string choose() {
cc.getValue() = "d" and
charClass = "d" and
result = "9"
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = "a"
}
}
@@ -501,26 +504,29 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \D, \S, and \W.
*/
private class NegativeCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
NegativeCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["D", "S", "W"]
}
override string getARelevantChar() {
cc.getValue() = "D" and
charClass = "D" and
result = ["a", "Z", "!"]
or
cc.getValue() = "S" and
charClass = "S" and
result = ["a", "9", "!"]
or
cc.getValue() = "W" and
charClass = "W" and
result = [" ", "!"]
}
bindingset[char]
override predicate matches(string char) {
not classEscapeMatches(cc.getValue().toLowerCase(), char)
not classEscapeMatches(charClass.toLowerCase(), char)
}
}
}
@@ -599,7 +605,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
q2 = after(cc)
)
or
exists(RegExpCharacterClassEscape cc |
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
q2 = after(cc)

View File

@@ -6,6 +6,14 @@
import javascript
/**
* Holds if `term` is an ecape class representing e.g. `\d`.
* `clazz` is which character class it represents, e.g. "d" for `\d`.
*/
predicate isEscapeClass(RegExpTerm term, string clazz) {
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
}
/**
* Holds if the regular expression should not be considered.
*

View File

@@ -218,7 +218,7 @@ private newtype TInputSymbol =
recc instanceof RegExpCharacterClass and
not recc.(RegExpCharacterClass).isUniversalClass()
or
recc instanceof RegExpCharacterClassEscape
isEscapeClass(recc, _)
)
} or
/** An input symbol representing all characters matched by `.`. */
@@ -340,13 +340,13 @@ private module CharacterClasses {
char <= hi
)
or
exists(RegExpCharacterClassEscape escape | escape = child |
escape.getValue() = escape.getValue().toLowerCase() and
classEscapeMatches(escape.getValue(), char)
exists(string charClass | isEscapeClass(child, charClass) |
charClass.toLowerCase() = charClass and
classEscapeMatches(charClass, char)
or
char = getARelevantChar() and
escape.getValue() = escape.getValue().toUpperCase() and
not classEscapeMatches(escape.getValue().toLowerCase(), char)
charClass.toUpperCase() = charClass and
not classEscapeMatches(charClass, char)
)
)
}
@@ -409,10 +409,10 @@ private module CharacterClasses {
or
child.(RegExpCharacterRange).isRange(_, result)
or
exists(RegExpCharacterClassEscape escape | child = escape |
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
exists(string charClass | isEscapeClass(child, charClass) |
result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
or
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
)
)
}
@@ -466,33 +466,36 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \d, \s, and \w.
*/
private class PositiveCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
PositiveCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["d", "s", "w"]
}
override string getARelevantChar() {
cc.getValue() = "d" and
charClass = "d" and
result = ["0", "9"]
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = ["a", "Z", "_", "0", "9"]
}
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
override predicate matches(string char) { classEscapeMatches(charClass, char) }
override string choose() {
cc.getValue() = "d" and
charClass = "d" and
result = "9"
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = "a"
}
}
@@ -501,26 +504,29 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \D, \S, and \W.
*/
private class NegativeCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
NegativeCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["D", "S", "W"]
}
override string getARelevantChar() {
cc.getValue() = "D" and
charClass = "D" and
result = ["a", "Z", "!"]
or
cc.getValue() = "S" and
charClass = "S" and
result = ["a", "9", "!"]
or
cc.getValue() = "W" and
charClass = "W" and
result = [" ", "!"]
}
bindingset[char]
override predicate matches(string char) {
not classEscapeMatches(cc.getValue().toLowerCase(), char)
not classEscapeMatches(charClass.toLowerCase(), char)
}
}
}
@@ -599,7 +605,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
q2 = after(cc)
)
or
exists(RegExpCharacterClassEscape cc |
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
q2 = after(cc)

View File

@@ -5,6 +5,14 @@
import python
import semmle.python.RegexTreeView
/**
* Holds if `term` is an ecape class representing e.g. `\d`.
* `clazz` is which character class it represents, e.g. "d" for `\d`.
*/
predicate isEscapeClass(RegExpTerm term, string clazz) {
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
}
/**
* Holds if the regular expression should not be considered.
*

View File

@@ -13,7 +13,6 @@
*/
import RegExpTreeView
private import codeql.Locations
/**
* A configuration for which parts of a regular expression should be considered relevant for
@@ -219,9 +218,7 @@ private newtype TInputSymbol =
recc instanceof RegExpCharacterClass and
not recc.(RegExpCharacterClass).isUniversalClass()
or
recc instanceof RegExpCharacterClassEscape
or
recc instanceof RegExpNamedCharacterProperty
isEscapeClass(recc, _)
)
} or
/** An input symbol representing all characters matched by `.`. */
@@ -343,22 +340,13 @@ private module CharacterClasses {
char <= hi
)
or
exists(RegExpCharacterClassEscape escape | escape = child |
escape.getValue() = escape.getValue().toLowerCase() and
classEscapeMatches(escape.getValue(), char)
exists(string charClass | isEscapeClass(child, charClass) |
charClass.toLowerCase() = charClass and
classEscapeMatches(charClass, char)
or
char = getARelevantChar() and
escape.getValue() = escape.getValue().toUpperCase() and
not classEscapeMatches(escape.getValue().toLowerCase(), char)
)
or
exists(RegExpNamedCharacterProperty charProp | charProp = child |
not charProp.isInverted() and
namedCharacterPropertyMatches(charProp.getName(), char)
or
char = getARelevantChar() and
charProp.isInverted() and
not namedCharacterPropertyMatches(charProp.getName(), char)
charClass.toUpperCase() = charClass and
not classEscapeMatches(charClass, char)
)
)
}
@@ -421,16 +409,10 @@ private module CharacterClasses {
or
child.(RegExpCharacterRange).isRange(_, result)
or
exists(RegExpCharacterClassEscape escape | child = escape |
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
exists(string charClass | isEscapeClass(child, charClass) |
result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
or
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
)
or
exists(RegExpNamedCharacterProperty charProp | child = charProp |
result = min(string s | namedCharacterPropertyMatches(charProp.getName(), s))
or
result = max(string s | namedCharacterPropertyMatches(charProp.getName(), s))
result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
)
)
}
@@ -480,60 +462,40 @@ private module CharacterClasses {
char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
}
/**
* Holds if the named character property (e.g. from a POSIX bracket
* expression) `propName` matches `char`. For example, it holds when `name` is
* `"word"` and `char` is `"a"`.
*
* TODO: expand to cover more properties.
*/
private predicate namedCharacterPropertyMatches(string propName, string char) {
propName = ["digit", "Digit"] and
char = "0123456789".charAt(_)
or
propName = ["space", "Space"] and
(
char = [" ", "\t", "\r", "\n"]
or
char = getARelevantChar() and
char.regexpMatch("\\u000b|\\u000c") // \v|\f (vertical tab | form feed)
)
or
propName = ["word", "Word"] and
char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
}
/**
* An implementation of `CharacterClass` for \d, \s, and \w.
*/
private class PositiveCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
PositiveCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["d", "s", "w"]
}
override string getARelevantChar() {
cc.getValue() = "d" and
charClass = "d" and
result = ["0", "9"]
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = ["a", "Z", "_", "0", "9"]
}
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
override predicate matches(string char) { classEscapeMatches(charClass, char) }
override string choose() {
cc.getValue() = "d" and
charClass = "d" and
result = "9"
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = "a"
}
}
@@ -542,88 +504,29 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \D, \S, and \W.
*/
private class NegativeCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
NegativeCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["D", "S", "W"]
}
override string getARelevantChar() {
cc.getValue() = "D" and
charClass = "D" and
result = ["a", "Z", "!"]
or
cc.getValue() = "S" and
charClass = "S" and
result = ["a", "9", "!"]
or
cc.getValue() = "W" and
charClass = "W" and
result = [" ", "!"]
}
bindingset[char]
override predicate matches(string char) {
not classEscapeMatches(cc.getValue().toLowerCase(), char)
}
}
/**
* An implementation of `NamedCharacterProperty` for positive (non-inverted)
* character properties.
*/
private class PositiveNamedCharacterProperty extends CharacterClass {
RegExpNamedCharacterProperty cp;
PositiveNamedCharacterProperty() { this = getCanonicalCharClass(cp) and not cp.isInverted() }
override string getARelevantChar() {
exists(string lowerName | lowerName = cp.getName().toLowerCase() |
lowerName = "digit" and
result = ["0", "9"]
or
lowerName = "space" and
result = [" "]
or
lowerName = "word" and
result = ["a", "Z", "_", "0", "9"]
)
}
override predicate matches(string char) { namedCharacterPropertyMatches(cp.getName(), char) }
override string choose() {
exists(string lowerName | lowerName = cp.getName().toLowerCase() |
lowerName = "digit" and
result = "9"
or
lowerName = "space" and
result = " "
or
lowerName = "word" and
result = "a"
)
}
}
private class InvertedNamedCharacterProperty extends CharacterClass {
RegExpNamedCharacterProperty cp;
InvertedNamedCharacterProperty() { this = getCanonicalCharClass(cp) and cp.isInverted() }
override string getARelevantChar() {
exists(string lowerName | lowerName = cp.getName().toLowerCase() |
lowerName = "digit" and
result = ["a", "Z", "!"]
or
lowerName = "space" and
result = ["a", "9", "!"]
or
lowerName = "word" and
result = [" ", "!"]
)
}
bindingset[char]
override predicate matches(string char) {
not namedCharacterPropertyMatches(cp.getName(), char)
not classEscapeMatches(charClass.toLowerCase(), char)
}
}
}
@@ -702,18 +605,12 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
q2 = after(cc)
)
or
exists(RegExpCharacterClassEscape cc |
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
q2 = after(cc)
)
or
exists(RegExpNamedCharacterProperty cp |
q1 = before(cp) and
lbl = CharClass(cp.getRawValue()) and
q2 = after(cp)
)
or
exists(RegExpAlt alt | lbl = Epsilon() | q1 = before(alt) and q2 = before(alt.getAChild()))
or
exists(RegExpSequence seq | lbl = Epsilon() | q1 = before(seq) and q2 = before(seq.getChild(0)))

View File

@@ -1,6 +1,27 @@
private import codeql.ruby.ast.Literal as AST
private import codeql.Locations
private import ParseRegExp
import codeql.Locations
/**
* Holds if `term` is an ecape class representing e.g. `\d`.
* `clazz` is which character class it represents, e.g. "d" for `\d`.
*/
predicate isEscapeClass(RegExpTerm term, string clazz) {
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
or
// TODO: expand to cover more properties
exists(RegExpNamedCharacterProperty escape | term = escape |
escape.getName().toLowerCase() = "digit" and
if escape.isInverted() then clazz = "D" else clazz = "d"
or
escape.getName().toLowerCase() = "space" and
if escape.isInverted() then clazz = "S" else clazz = "s"
or
escape.getName().toLowerCase() = "word" and
if escape.isInverted() then clazz = "W" else clazz = "w"
)
}
/**
* Holds if the regular expression should not be considered.

View File

@@ -91,3 +91,4 @@
| tst.rb:362:11:362:31 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
| tst.rb:363:11:363:34 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
| tst.rb:369:12:369:22 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| tst.rb:375:11:375:27 | ([[:digit:]]\|\\d)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '0'. |

View File

@@ -369,4 +369,7 @@ good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/
bad87 = /^X(\u0061|a)*Y$/
# GOOD
good43 = /^X(\u0061|b)+Y$/
good43 = /^X(\u0061|b)+Y$/
# NOT GODD
bad88 = /X([[:digit:]]|\d)+Y/