add support for escape char classes inside char classes

This commit is contained in:
Erik Krogh Kristensen
2020-11-02 10:54:02 +01:00
parent 0063cb140c
commit a5e75f53ff
3 changed files with 42 additions and 23 deletions

View File

@@ -234,7 +234,14 @@ private module CharacterClasses {
exists(string lo, string hi | child.(RegExpCharacterRange).isRange(lo, hi) | exists(string lo, string hi | child.(RegExpCharacterRange).isRange(lo, hi) |
lo <= char and char <= hi lo <= char and char <= hi
) )
// TODO: RegExpCharacterClassEscape. or
exists(RegExpCharacterClassEscape escape | escape = child |
escape.getValue() = escape.getValue().toLowerCase() and
classEscapeMatches(escape.getValue(), char)
or
escape.getValue() = escape.getValue().toUpperCase() and
not classEscapeMatches(escape.getValue().toLowerCase(), char)
)
) )
} }
@@ -248,6 +255,12 @@ private module CharacterClasses {
child.(RegExpCharacterRange).isRange(result, _) child.(RegExpCharacterRange).isRange(result, _)
or or
child.(RegExpCharacterRange).isRange(_, result) child.(RegExpCharacterRange).isRange(_, result)
or
exists(RegExpCharacterClassEscape escape | child = escape |
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
or
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
)
) )
} }
@@ -264,15 +277,7 @@ private module CharacterClasses {
bindingset[char] bindingset[char]
override predicate matches(string char) { hasChildThatMatches(cc, char) } override predicate matches(string char) { hasChildThatMatches(cc, char) }
override string choose() { override string choose() { result = min(string c | c = getAMentionedChar(cc)) }
result =
min(string c |
exists(RegExpTerm child | child = cc.getAChild() |
c = child.(RegExpConstant).getValue() or
child.(RegExpCharacterRange).isRange(c, _)
)
)
}
} }
/** /**
@@ -293,19 +298,14 @@ private module CharacterClasses {
override string choose() { override string choose() {
// The next char after the max of the inverted charclass. // The next char after the max of the inverted charclass.
result = result = nextChar(max(string c | c = getAMentionedChar(cc)))
nextChar(max(string c |
exists(RegExpTerm child | child = cc.getAChild() |
c = child.(RegExpConstant).getValue() or
child.(RegExpCharacterRange).isRange(_, c)
)
))
} }
} }
/** /**
* Holds if the character class escape `clazz` (\d, \s, or \w) matches `char`. * Holds if the character class escape `clazz` (\d, \s, or \w) matches `char`.
*/ */
pragma[noinline]
private predicate classEscapeMatches(string clazz, string char) { private predicate classEscapeMatches(string clazz, string char) {
clazz = "d" and clazz = "d" and
char = "0123456789".charAt(_) char = "0123456789".charAt(_)

View File

@@ -20,6 +20,7 @@
| regexplib/dates.js:66:201:66:208 | NOVEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'NOVEMBER'. | | regexplib/dates.js:66:201:66:208 | NOVEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'NOVEMBER'. |
| regexplib/dates.js:66:210:66:217 | DECEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'DECEMBER'. | | regexplib/dates.js:66:210:66:217 | DECEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'DECEMBER'. |
| regexplib/dates.js:66:234:66:240 | PRESENT | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'PRESENT'. | | regexplib/dates.js:66:234:66:240 | PRESENT | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'PRESENT'. |
| regexplib/email.js:1:16:1:22 | [-.\\w]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:5:24:5:35 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | regexplib/email.js:5:24:5:35 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:5:63:5:74 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | regexplib/email.js:5:63:5:74 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:6:10:6:35 | (?:[a-zA-Z0-9][\\.\\-\\+_]?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | regexplib/email.js:6:10:6:35 | (?:[a-zA-Z0-9][\\.\\-\\+_]?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
@@ -29,16 +30,18 @@
| regexplib/email.js:25:212:25:223 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | regexplib/email.js:25:212:25:223 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:25:251:25:262 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | regexplib/email.js:25:251:25:262 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:32:10:32:25 | (?:\\w[\\.\\-\\+]?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | regexplib/email.js:32:10:32:25 | (?:\\w[\\.\\-\\+]?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:33:16:33:22 | [-.\\w]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:33:38:33:51 | ([0-9a-zA-Z])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '00.'. | | regexplib/email.js:33:38:33:51 | ([0-9a-zA-Z])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '00.'. |
| regexplib/email.js:33:53:33:58 | [-\\w]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:34:24:34:35 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | regexplib/email.js:34:24:34:35 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:34:63:34:74 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | regexplib/email.js:34:63:34:74 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/markup.js:3:451:3:453 | .+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a '. | | regexplib/markup.js:3:451:3:453 | .+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a '. |
| regexplib/markup.js:13:6:13:12 | [^"']+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '('. | | regexplib/markup.js:13:6:13:12 | [^"']+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '('. |
| regexplib/markup.js:13:14:13:16 | .+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a"'. | | regexplib/markup.js:13:14:13:16 | .+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a"'. |
| regexplib/markup.js:37:29:37:56 | [a-zA-Z0-9\|:\|\\/\|=\|-\|.\|\\?\|&]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '='. | | regexplib/markup.js:37:29:37:56 | [a-zA-Z0-9\|:\|\\/\|=\|-\|.\|\\?\|&]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '00='. |
| regexplib/markup.js:40:23:40:25 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | regexplib/markup.js:40:23:40:25 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/markup.js:40:132:40:134 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' @0<""'. | | regexplib/markup.js:40:132:40:134 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' @0<""'. |
| regexplib/markup.js:53:29:53:56 | [a-zA-Z0-9\|:\|\\/\|=\|-\|.\|\\?\|&]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '='. | | regexplib/markup.js:53:29:53:56 | [a-zA-Z0-9\|:\|\\/\|=\|-\|.\|\\?\|&]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '00='. |
| regexplib/markup.js:56:23:56:25 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | regexplib/markup.js:56:23:56:25 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/markup.js:56:132:56:134 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' @0<""'. | | regexplib/markup.js:56:132:56:134 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' @0<""'. |
| regexplib/misc.js:15:56:15:118 | (([^\\\\/:\\*\\?"\\\|<>\\. ])\|([^\\\\/:\\*\\?"\\\|<>]*[^\\\\/:\\*\\?"\\\|<>\\. ]))? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!\\\\}'. | | regexplib/misc.js:15:56:15:118 | (([^\\\\/:\\*\\?"\\\|<>\\. ])\|([^\\\\/:\\*\\?"\\\|<>]*[^\\\\/:\\*\\?"\\\|<>\\. ]))? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!\\\\}'. |
@@ -68,14 +71,14 @@
| tst.js:19:71:19:90 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. | | tst.js:19:71:19:90 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| tst.js:31:54:31:55 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\|a\|\\n'. | | tst.js:31:54:31:55 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\|a\|\\n'. |
| tst.js:36:23:36:32 | (\\\\\\/\|.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\/'. | | tst.js:36:23:36:32 | (\\\\\\/\|.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\/'. |
| tst.js:41:27:41:28 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. | | tst.js:41:27:41:28 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\t#'. |
| tst.js:47:25:47:27 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '""'. | | tst.js:47:25:47:27 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '""'. |
| tst.js:47:31:47:33 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ''''. | | tst.js:47:31:47:33 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ''''. |
| tst.js:52:37:52:39 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. | | tst.js:52:37:52:39 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| tst.js:52:70:52:72 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. | | tst.js:52:70:52:72 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| tst.js:58:15:58:20 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. | | tst.js:58:15:58:20 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| tst.js:60:43:60:54 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | tst.js:60:43:60:54 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| tst.js:66:16:66:31 | [\\w#:.~>+()\\s-]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. | | tst.js:66:16:66:31 | [\\w#:.~>+()\\s-]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\t'. |
| tst.js:66:38:66:40 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. | | tst.js:66:38:66:40 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| tst.js:71:19:71:26 | (\\\\?.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\a'. | | tst.js:71:19:71:26 | (\\\\?.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\a'. |
| tst.js:74:14:74:21 | (b\|a?b)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. | | tst.js:74:14:74:21 | (b\|a?b)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
@@ -100,5 +103,9 @@
| tst.js:149:15:149:24 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. | | tst.js:149:15:149:24 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
| tst.js:152:15:152:28 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000b'. | | tst.js:152:15:152:28 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000b'. |
| tst.js:155:15:155:24 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. | | tst.js:155:15:155:24 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
| tst.js:158:15:158:22 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. | | tst.js:158:15:158:22 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
| tst.js:161:15:161:22 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. | | tst.js:161:15:161:22 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| tst.js:164:15:164:24 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| tst.js:167:15:167:27 | (1s\|[\\da-z])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '1s'. |
| tst.js:170:15:170:23 | (0\|[\\d])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| tst.js:173:16:173:20 | [\\d]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |

View File

@@ -158,4 +158,16 @@ var bad35 = /((\f|[\f])*)"/;
var bad36 = /((\W|\D)*)"/; var bad36 = /((\W|\D)*)"/;
// NOT GOOD // NOT GOOD
var bad37 = /((\S|\w)*)"/; var bad37 = /((\S|\w)*)"/;
// NOT GOOD
var bad38 = /((\S|[\w])*)"/;
// NOT GOOD
var bad39 = /((1s|[\da-z])*)"/;
// NOT GOOD
var bad40 = /((0|[\d])*)"/;
// NOT GOOD
var bad41 = /(([\d]+)*)"/;