mirror of
https://github.com/github/codeql.git
synced 2026-05-02 20:25:13 +02:00
Merge pull request #6462 from erik-krogh/repeat
JS: support more regular expressions in js/incomplete-multi-character-sanitization
This commit is contained in:
@@ -56,31 +56,69 @@ DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
|
||||
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
|
||||
}
|
||||
|
||||
private import semmle.javascript.security.performance.ReDoSUtil as ReDoSUtil
|
||||
|
||||
/**
|
||||
* Gets a char from a dangerous prefix that is matched by `t`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
|
||||
t.isNullable() and result = ""
|
||||
or
|
||||
t.getAMatchedString() = result
|
||||
or
|
||||
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
|
||||
exists(ReDoSUtil::CharacterClass cc |
|
||||
cc = ReDoSUtil::getCanonicalCharClass(t) and
|
||||
cc.matches(result) and
|
||||
result.regexpMatch("\\w") and
|
||||
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
|
||||
not cc.matches(">")
|
||||
)
|
||||
or
|
||||
t instanceof RegExpDot and
|
||||
result.length() = 1
|
||||
or
|
||||
(
|
||||
t instanceof RegExpOpt or
|
||||
t instanceof RegExpStar or
|
||||
t instanceof RegExpPlus or
|
||||
t instanceof RegExpGroup or
|
||||
t instanceof RegExpAlt
|
||||
) and
|
||||
result = getADangerousMatchedChar(t.getAChild())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
|
||||
*
|
||||
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
|
||||
*/
|
||||
DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
|
||||
exists(string left |
|
||||
t.isNullable() and left = ""
|
||||
or
|
||||
t.getAMatchedString() = left
|
||||
or
|
||||
(
|
||||
t instanceof RegExpOpt or
|
||||
t instanceof RegExpStar or
|
||||
t instanceof RegExpPlus or
|
||||
t instanceof RegExpGroup or
|
||||
t instanceof RegExpAlt
|
||||
) and
|
||||
left = getADangerousMatchedPrefixSubstring(t.getAChild())
|
||||
|
|
||||
result = left + getADangerousMatchedPrefixSubstring(t.getSuccessor()) or
|
||||
result = left
|
||||
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
|
||||
or
|
||||
result = getADangerousMatchedChar(t)
|
||||
or
|
||||
// loop around for repetitions (only considering alphanumeric characters in the repetition)
|
||||
exists(RepetitionMatcher repetition | t = repetition |
|
||||
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
|
||||
)
|
||||
}
|
||||
|
||||
class RepetitionMatcher extends EmptyReplaceRegExpTerm {
|
||||
string char;
|
||||
|
||||
pragma[noinline]
|
||||
RepetitionMatcher() {
|
||||
(this instanceof RegExpPlus or this instanceof RegExpStar) and
|
||||
char = getADangerousMatchedChar(this.getAChild()) and
|
||||
char.regexpMatch("\\w")
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
string getAChar() { result = char }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerablity of kind `kind`.
|
||||
*/
|
||||
@@ -151,7 +189,7 @@ where
|
||||
// skip leading optional elements
|
||||
not dangerous.isNullable() and
|
||||
// only warn about the longest match (presumably the most descriptive)
|
||||
prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length()) and
|
||||
prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length(), m) and
|
||||
// only warn once per kind
|
||||
not exists(EmptyReplaceRegExpTerm other |
|
||||
other = dangerous.getAChild+() or other = dangerous.getPredecessor+()
|
||||
|
||||
@@ -155,15 +155,22 @@ private class RegexpCharacterConstant extends RegExpConstant {
|
||||
RegexpCharacterConstant() { this.isCharacter() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A regexp term that is relevant for this ReDoS analysis.
|
||||
*/
|
||||
class RelevantRegExpTerm extends RegExpTerm {
|
||||
RelevantRegExpTerm() { getRoot(this).isRelevant() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` is the chosen canonical representative for all terms with string representation `str`.
|
||||
*
|
||||
* Using canonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s.
|
||||
* The number of `InputSymbol`s is decreased by 3 orders of magnitude or more in some larger benchmarks.
|
||||
*/
|
||||
private predicate isCanonicalTerm(RegExpTerm term, string str) {
|
||||
private predicate isCanonicalTerm(RelevantRegExpTerm term, string str) {
|
||||
term =
|
||||
rank[1](RegExpTerm t, Location loc, File file |
|
||||
min(RelevantRegExpTerm t, Location loc, File file |
|
||||
loc = t.getLocation() and
|
||||
file = t.getFile() and
|
||||
str = t.getRawValue()
|
||||
@@ -178,15 +185,15 @@ private predicate isCanonicalTerm(RegExpTerm term, string str) {
|
||||
private newtype TInputSymbol =
|
||||
/** An input symbol corresponding to character `c`. */
|
||||
Char(string c) {
|
||||
c = any(RegexpCharacterConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_)
|
||||
c = any(RegexpCharacterConstant cc | cc instanceof RelevantRegExpTerm).getValue().charAt(_)
|
||||
} or
|
||||
/**
|
||||
* An input symbol representing all characters matched by
|
||||
* a (non-universal) character class that has string representation `charClassString`.
|
||||
*/
|
||||
CharClass(string charClassString) {
|
||||
exists(RegExpTerm term | term.getRawValue() = charClassString | getRoot(term).isRelevant()) and
|
||||
exists(RegExpTerm recc | isCanonicalTerm(recc, charClassString) |
|
||||
exists(RelevantRegExpTerm term | term.getRawValue() = charClassString) and
|
||||
exists(RelevantRegExpTerm recc | isCanonicalTerm(recc, charClassString) |
|
||||
recc instanceof RegExpCharacterClass and
|
||||
not recc.(RegExpCharacterClass).isUniversalClass()
|
||||
or
|
||||
@@ -251,7 +258,7 @@ class InputSymbol extends TInputSymbol {
|
||||
/**
|
||||
* An abstract input symbol that represents a character class.
|
||||
*/
|
||||
abstract private class CharacterClass extends InputSymbol {
|
||||
abstract class CharacterClass extends InputSymbol {
|
||||
/**
|
||||
* Gets a character that is relevant for intersection-tests involving this
|
||||
* character class.
|
||||
@@ -626,13 +633,10 @@ RegExpRoot getRoot(RegExpTerm term) {
|
||||
}
|
||||
|
||||
private newtype TState =
|
||||
Match(RegExpTerm t, int i) {
|
||||
getRoot(t).isRelevant() and
|
||||
(
|
||||
i = 0
|
||||
or
|
||||
exists(t.(RegexpCharacterConstant).getValue().charAt(i))
|
||||
)
|
||||
Match(RelevantRegExpTerm t, int i) {
|
||||
i = 0
|
||||
or
|
||||
exists(t.(RegexpCharacterConstant).getValue().charAt(i))
|
||||
} or
|
||||
Accept(RegExpRoot l) { l.isRelevant() } or
|
||||
AcceptAnySuffix(RegExpRoot l) { l.isRelevant() }
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
| tst-multi-character-sanitization.js:3:13:3:57 | content ... gi, "") | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:3:30:3:30 | < | <cript |
|
||||
| tst-multi-character-sanitization.js:3:13:3:57 | content ... gi, "") | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:3:30:3:30 | < | <script |
|
||||
| tst-multi-character-sanitization.js:4:13:4:47 | content ... /g, "") | This string may still contain $@, which may cause a HTML attribute injection vulnerability. | tst-multi-character-sanitization.js:4:30:4:40 | on\\w+=".*" | on |
|
||||
| tst-multi-character-sanitization.js:5:13:5:49 | content ... /g, "") | This string may still contain $@, which may cause a HTML attribute injection vulnerability. | tst-multi-character-sanitization.js:5:30:5:42 | on\\w+=\\'.*\\' | on |
|
||||
| tst-multi-character-sanitization.js:9:13:9:47 | content ... gi, "") | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:9:30:9:30 | < | <cript |
|
||||
| tst-multi-character-sanitization.js:9:13:9:47 | content ... gi, "") | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:9:30:9:30 | < | <script |
|
||||
| tst-multi-character-sanitization.js:10:13:10:49 | content ... /g, "") | This string may still contain $@, which may cause a HTML attribute injection vulnerability. | tst-multi-character-sanitization.js:10:30:10:42 | .on\\w+=.*".*" | on |
|
||||
| tst-multi-character-sanitization.js:11:13:11:51 | content ... /g, "") | This string may still contain $@, which may cause a HTML attribute injection vulnerability. | tst-multi-character-sanitization.js:11:30:11:44 | .on\\w+=.*\\'.*\\' | on |
|
||||
| tst-multi-character-sanitization.js:19:3:19:35 | respons ... pt, "") | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:18:18:18:24 | <script | <script |
|
||||
@@ -31,4 +31,9 @@
|
||||
| tst-multi-character-sanitization.js:126:7:129:34 | x\\n . ... //, "") | This string may still contain $@, which may cause a path injection vulnerability. | tst-multi-character-sanitization.js:129:21:129:22 | \\/ | /.. |
|
||||
| tst-multi-character-sanitization.js:135:2:135:44 | content ... gi, "") | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:135:19:135:25 | <script | <script |
|
||||
| tst-multi-character-sanitization.js:136:2:136:46 | content ... gi, "") | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:136:19:136:19 | < | <script |
|
||||
| tst-multi-character-sanitization.js:137:2:137:48 | content ... gi, "") | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:137:19:137:20 | .+ | <script |
|
||||
| tst-multi-character-sanitization.js:138:2:138:48 | content ... gi, "") | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:138:21:138:21 | < | <script |
|
||||
| tst-multi-character-sanitization.js:142:13:142:62 | content ... gi, "") | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:142:30:142:36 | <script | <script |
|
||||
| tst-multi-character-sanitization.js:143:13:143:56 | content ... /g, '') | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:143:30:143:30 | < | <script |
|
||||
| tst-multi-character-sanitization.js:144:13:144:91 | content ... /g, '') | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:144:30:144:30 | < | <script |
|
||||
| tst-multi-character-sanitization.js:145:13:145:90 | content ... /g, '') | This string may still contain $@, which may cause a HTML element injection vulnerability. | tst-multi-character-sanitization.js:145:30:145:30 | < | <script |
|
||||
|
||||
@@ -134,6 +134,14 @@
|
||||
(function (content) {
|
||||
content.replace(/<script.*\/script>/gi, ""); // NOT OK
|
||||
content.replace(/<(script).*\/script>/gi, ""); // NOT OK
|
||||
content.replace(/.+<(script).*\/script>/gi, ""); // OK
|
||||
content.replace(/.+<(script).*\/script>/gi, ""); // NOT OK
|
||||
content.replace(/.*<(script).*\/script>/gi, ""); // NOT OK
|
||||
});
|
||||
|
||||
(function (content) {
|
||||
content = content.replace(/<script[\s\S]*?<\/script>/gi, ""); // NOT OK
|
||||
content = content.replace(/<[a-zA-Z\/](.|\n)*?>/g, '') || ' '; // NOT OK
|
||||
content = content.replace(/<(script|iframe|video)[\s\S]*?<\/(script|iframe|video)>/g, '') // NOT OK
|
||||
content = content.replace(/<(script|iframe|video)(.|\s)*?\/(script|iframe|video)>/g, '') // NOT OK
|
||||
content = content.replace(/<[^<]*>/g, ""); // OK
|
||||
});
|
||||
Reference in New Issue
Block a user