Add Java ReDoS libraries to identical-files.json

This commit is contained in:
Joe Farebrother
2022-02-02 13:53:41 +00:00
parent 11e465f2ac
commit f9f7a01f57
3 changed files with 120 additions and 55 deletions

View File

@@ -475,20 +475,23 @@
"python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll",
"ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll"
],
"ReDoS Util Python/JS/Ruby": [
"ReDoS Util Python/JS/Ruby/Java": [
"javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll",
"python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll",
"ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll"
"ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll",
"java/ql/lib/semmle/code/java/security/performance/ReDoSUtil.qll"
],
"ReDoS Exponential Python/JS/Ruby": [
"ReDoS Exponential Python/JS/Ruby/Java": [
"javascript/ql/lib/semmle/javascript/security/performance/ExponentialBackTracking.qll",
"python/ql/lib/semmle/python/security/performance/ExponentialBackTracking.qll",
"ruby/ql/lib/codeql/ruby/security/performance/ExponentialBackTracking.qll"
"ruby/ql/lib/codeql/ruby/security/performance/ExponentialBackTracking.qll",
"java/ql/lib/semmle/code/java/security/performance/ExponentialBackTracking.qll"
],
"ReDoS Polynomial Python/JS/Ruby": [
"ReDoS Polynomial Python/JS/Ruby/Java": [
"javascript/ql/lib/semmle/javascript/security/performance/SuperlinearBackTracking.qll",
"python/ql/lib/semmle/python/security/performance/SuperlinearBackTracking.qll",
"ruby/ql/lib/codeql/ruby/security/performance/SuperlinearBackTracking.qll"
"ruby/ql/lib/codeql/ruby/security/performance/SuperlinearBackTracking.qll",
"java/ql/lib/semmle/code/java/security/performance/SuperlinearBackTracking.qll"
],
"BadTagFilterQuery Python/JS/Ruby": [
"javascript/ql/lib/semmle/javascript/security/BadTagFilterQuery.qll",

View File

@@ -140,9 +140,9 @@ class RegExpRoot extends RegExpTerm {
// there is at least one repetition
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
// is actually used as a RegExp
isUsedAsRegExp() and
this.isUsedAsRegExp() and
// not excluded for library specific reasons
not isExcluded(getRootTerm().getParent())
not isExcluded(this.getRootTerm().getParent())
}
}
@@ -218,7 +218,7 @@ private newtype TInputSymbol =
recc instanceof RegExpCharacterClass and
not recc.(RegExpCharacterClass).isUniversalClass()
or
recc instanceof RegExpCharacterClassEscape
isEscapeClass(recc, _)
)
} or
/** An input symbol representing all characters matched by `.`. */
@@ -302,7 +302,7 @@ abstract class CharacterClass extends InputSymbol {
/**
* Gets a character matched by this character class.
*/
string choose() { result = getARelevantChar() and matches(result) }
string choose() { result = this.getARelevantChar() and this.matches(result) }
}
/**
@@ -340,13 +340,13 @@ private module CharacterClasses {
char <= hi
)
or
exists(RegExpCharacterClassEscape escape | escape = child |
escape.getValue() = escape.getValue().toLowerCase() and
classEscapeMatches(escape.getValue(), char)
exists(string charClass | isEscapeClass(child, charClass) |
charClass.toLowerCase() = charClass and
classEscapeMatches(charClass, char)
or
char = getARelevantChar() and
escape.getValue() = escape.getValue().toUpperCase() and
not classEscapeMatches(escape.getValue().toLowerCase(), char)
charClass.toUpperCase() = charClass and
not classEscapeMatches(charClass, char)
)
)
}
@@ -409,10 +409,10 @@ private module CharacterClasses {
or
child.(RegExpCharacterRange).isRange(_, result)
or
exists(RegExpCharacterClassEscape escape | child = escape |
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
exists(string charClass | isEscapeClass(child, charClass) |
result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
or
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
)
)
}
@@ -466,33 +466,36 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \d, \s, and \w.
*/
private class PositiveCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
PositiveCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["d", "s", "w"]
}
override string getARelevantChar() {
cc.getValue() = "d" and
charClass = "d" and
result = ["0", "9"]
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = ["a", "Z", "_", "0", "9"]
}
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
override predicate matches(string char) { classEscapeMatches(charClass, char) }
override string choose() {
cc.getValue() = "d" and
charClass = "d" and
result = "9"
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = "a"
}
}
@@ -501,26 +504,29 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \D, \S, and \W.
*/
private class NegativeCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
NegativeCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["D", "S", "W"]
}
override string getARelevantChar() {
cc.getValue() = "D" and
charClass = "D" and
result = ["a", "Z", "!"]
or
cc.getValue() = "S" and
charClass = "S" and
result = ["a", "9", "!"]
or
cc.getValue() = "W" and
charClass = "W" and
result = [" ", "!"]
}
bindingset[char]
override predicate matches(string char) {
not classEscapeMatches(cc.getValue().toLowerCase(), char)
not classEscapeMatches(charClass.toLowerCase(), char)
}
}
}
@@ -533,6 +539,55 @@ private class EdgeLabel extends TInputSymbol {
}
}
/**
* A RegExp term that acts like a plus.
* Either it's a RegExpPlus, or it is a range {1,X} where X is >= 30.
* 30 has been chosen as a threshold because for exponential blowup 2^30 is enough to get a decent DOS attack.
*/
private class EffectivelyPlus extends RegExpTerm {
EffectivelyPlus() {
this instanceof RegExpPlus
or
exists(RegExpRange range |
range.getLowerBound() = 1 and
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
|
this = range
)
}
}
/**
* A RegExp term that acts like a star.
* Either it's a RegExpStar, or it is a range {0,X} where X is >= 30.
*/
private class EffectivelyStar extends RegExpTerm {
EffectivelyStar() {
this instanceof RegExpStar
or
exists(RegExpRange range |
range.getLowerBound() = 0 and
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
|
this = range
)
}
}
/**
* A RegExp term that acts like a question mark.
* Either it's a RegExpQuestion, or it is a range {0,1}.
*/
private class EffectivelyQuestion extends RegExpTerm {
EffectivelyQuestion() {
this instanceof RegExpOpt
or
exists(RegExpRange range | range.getLowerBound() = 0 and range.getUpperBound() = 1 |
this = range
)
}
}
/**
* Gets the state before matching `t`.
*/
@@ -542,7 +597,7 @@ private State before(RegExpTerm t) { result = Match(t, 0) }
/**
* Gets a state the NFA may be in after matching `t`.
*/
private State after(RegExpTerm t) {
State after(RegExpTerm t) {
exists(RegExpAlt alt | t = alt.getAChild() | result = after(alt))
or
exists(RegExpSequence seq, int i | t = seq.getChild(i) |
@@ -553,14 +608,14 @@ private State after(RegExpTerm t) {
or
exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp))
or
exists(RegExpStar star | t = star.getAChild() | result = before(star))
exists(EffectivelyStar star | t = star.getAChild() | result = before(star))
or
exists(RegExpPlus plus | t = plus.getAChild() |
exists(EffectivelyPlus plus | t = plus.getAChild() |
result = before(plus) or
result = after(plus)
)
or
exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt))
exists(EffectivelyQuestion opt | t = opt.getAChild() | result = after(opt))
or
exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root))
}
@@ -599,7 +654,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
q2 = after(cc)
)
or
exists(RegExpCharacterClassEscape cc |
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
q2 = after(cc)
@@ -611,15 +666,17 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
or
exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
or
exists(RegExpStar star | lbl = Epsilon() |
exists(EffectivelyStar star | lbl = Epsilon() |
q1 = before(star) and q2 = before(star.getChild(0))
or
q1 = before(star) and q2 = after(star)
)
or
exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0)))
exists(EffectivelyPlus plus | lbl = Epsilon() |
q1 = before(plus) and q2 = before(plus.getChild(0))
)
or
exists(RegExpOpt opt | lbl = Epsilon() |
exists(EffectivelyQuestion opt | lbl = Epsilon() |
q1 = before(opt) and q2 = before(opt.getChild(0))
or
q1 = before(opt) and q2 = after(opt)
@@ -671,7 +728,7 @@ RegExpRoot getRoot(RegExpTerm term) {
/**
* A state in the NFA.
*/
private newtype TState =
newtype TState =
/**
* A state representing that the NFA is about to match a term.
* `i` is used to index into multi-char literals.
@@ -801,29 +858,26 @@ InputSymbol getAnInputSymbolMatching(string char) {
result = Any()
}
/**
* Holds if `state` is a start state.
*/
predicate isStartState(State state) {
state = mkMatch(any(RegExpRoot r))
or
exists(RegExpCaret car | state = after(car))
}
/**
* Predicates for constructing a prefix string that leads to a given state.
*/
private module PrefixConstruction {
/**
* Holds if `state` starts the string matched by the regular expression.
*/
private predicate isStartState(State state) {
state instanceof StateInPumpableRegexp and
(
state = Match(any(RegExpRoot r), _)
or
exists(RegExpCaret car | state = after(car))
)
}
/**
* Holds if `state` is the textually last start state for the regular expression.
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(State s, Location l |
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s

View File

@@ -5,6 +5,14 @@
import java
import semmle.code.java.regex.RegexTreeView
/**
* Holds if `term` is an ecape class representing e.g. `\d`.
* `clazz` is which character class it represents, e.g. "d" for `\d`.
*/
predicate isEscapeClass(RegExpTerm term, string clazz) {
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
}
/**
* Holds if the regular expression should not be considered.
*