JS: rewrite js/incomplete-multi-character-sanitization

This commit is contained in:
Esben Sparre Andreasen
2020-06-19 11:50:39 +02:00
parent 9d4cd0aa85
commit 580a24e982
4 changed files with 204 additions and 95 deletions

View File

@@ -12,69 +12,153 @@
*/
import javascript
import semmle.javascript.security.IncompleteBlacklistSanitizer
predicate isDangerous(RegExpTerm t) {
// path traversals
t.getAMatchedString() = ["..", "/..", "../"]
or
exists(RegExpTerm start |
start = t.(RegExpSequence).getAChild() and
start.getConstantValue() = "." and
start.getSuccessor().getConstantValue() = "." and
not [start.getPredecessor(), start.getSuccessor().getSuccessor()].getConstantValue() = "."
)
or
// HTML comments
t.getAMatchedString() = "<!--"
or
// HTML scripts
t.getAMatchedString().regexpMatch("(?i)<script.*")
or
exists(RegExpSequence seq | seq = t |
t.getChild(0).getConstantValue() = "<" and
// the `cript|scrip` case has been observed in the wild, not sure what the goal of that pattern is...
t.getChild(0)
.getSuccessor+()
.getAMatchedString()
.regexpMatch("(?i)iframe|script|cript|scrip|style")
)
or
// HTML attributes
exists(string dangerousPrefix | dangerousPrefix = ["ng-", "on"] |
t.getAMatchedString().regexpMatch("(i?)" + dangerousPrefix + "[a-z]+")
/**
* A regexp term that matches substrings that should be replaced with the empty string.
*/
class EmptyReplaceRegExpTerm extends RegExpTerm {
EmptyReplaceRegExpTerm() {
exists(StringReplaceCall replace |
[replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
this = replace.getRegExp().getRoot().getAChild*()
)
}
}
/**
* A prefix that may be dangerous to sanitize explicitly.
*
* Note that this class exists solely as a (necessary) optimization for this query.
*/
class DangerousPrefix extends string {
DangerousPrefix() {
this = ["/..", "../"] or
this = "<!--" or
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
}
}
/**
* A substring of a prefix that may be dangerous to sanitize explicitly.
*/
class DangerousPrefixSubstring extends string {
DangerousPrefixSubstring() {
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
}
}
/**
* Gets a dangerous prefix that is in the prefix language of `t`.
*/
DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
result = getADangerousMatchedPrefixSubstring(t) and
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
}
/**
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
*
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
*/
DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
exists(string left |
t.isNullable() and left = ""
or
exists(RegExpTerm start, RegExpTerm event | start = t.getAChild() |
start.getConstantValue().regexpMatch("(?i)[^a-z]*" + dangerousPrefix) and
event = start.getSuccessor() and
exists(RegExpTerm quantified | quantified = event.(RegExpQuantifier).getChild(0) |
quantified
.(RegExpCharacterClass)
.getAChild()
.(RegExpCharacterRange)
.isRange(["a", "A"], ["z", "Z"]) or
[quantified, quantified.(RegExpRange).getAChild()].(RegExpCharacterClassEscape).getValue() =
"w"
)
t.getAMatchedString() = left
or
(
t instanceof RegExpOpt or
t instanceof RegExpStar or
t instanceof RegExpPlus or
t instanceof RegExpGroup or
t instanceof RegExpAlt
) and
left = getADangerousMatchedPrefixSubstring(t.getAChild())
|
result = left + getADangerousMatchedPrefixSubstring(t.getSuccessor()) or
result = left
)
}
/**
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerablity of kind `kind`.
*/
predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
prefix = getADangerousMatchedPrefix(t) and
(
kind = "path injection" and
// upwards navigation
prefix = ["/..", "../"] and
not t.getSuccessor*().getAMatchedString().regexpMatch("(?i).*[a-z0-9_-]+.*") // explicit path name mentions make this an unlikely sanitizer
or
kind = "HTML element injection" and
(
// comments
prefix = "<!--" and
not t.getSuccessor*().getAMatchedString().regexpMatch("(?i).*[a-z0-9_]+.*") // explicit comment content mentions make this an unlikely sanitizer
or
// specific tags
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"] // the `cript|scrip` case has been observed in the wild several times
)
)
or
kind = "HTML attribute injection" and
prefix =
[
// ordinary event handler prefix
"on",
// angular prefixes
"ng-", "ng:", "data-ng-", "x-ng-"
] and
(
// explicit matching: `onclick` and `ng-bind`
t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
or
// regexp-based matching: `on[a-z]+`
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
start.getConstantValue().regexpMatch("(?i)[^a-z]*" + prefix) and
isCommonWordMatcher(start.getSuccessor())
)
)
}
from StringReplaceCall replace, RegExpTerm regexp, RegExpTerm dangerous
/**
* Holds if `t` is a common pattern for matching words
*/
predicate isCommonWordMatcher(RegExpTerm t) {
exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) |
// [a-z]+ and similar
quantified
.(RegExpCharacterClass)
.getAChild()
.(RegExpCharacterRange)
.isRange(["a", "A"], ["z", "Z"])
or
// \w+ or [\w]+
[quantified, quantified.(RegExpCharacterClass).getAChild()]
.(RegExpCharacterClassEscape)
.getValue() = "w"
)
}
from
StringReplaceCall replace, EmptyReplaceRegExpTerm regexp, EmptyReplaceRegExpTerm dangerous,
string prefix, string kind
where
[replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
replace.isGlobal() and
regexp = replace.getRegExp().getRoot() and
dangerous.getRootTerm() = regexp and
isDangerous(dangerous) and
// avoid anchored terms
not exists(RegExpAnchor a | a.getRootTerm() = regexp) and
// avoid flagging wrappers
not (
dangerous instanceof RegExpAlt or
dangerous instanceof RegExpGroup
// only warn about the longest match (presumably the most descriptive)
prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length()) and
// only warn once per kind
not exists(EmptyReplaceRegExpTerm other |
other = dangerous.getAChild+() or other = dangerous.getPredecessor+()
|
matchesDangerousPrefix(other, _, kind)
) and
// don't flag replace operations in a loop
not replace.getReceiver().getALocalSource() = replace
select replace, "The replaced string may still contain a substring that starts matching at $@.",
dangerous, dangerous.toString()
not replace.getAMethodCall*().flowsTo(replace.getReceiver()) and
// avoid anchored terms
not exists(RegExpAnchor a | regexp = a.getRootTerm())
select replace,
"This string may still contain a substring that starts matching at $@, which may cause a " + kind +
" vulnerability.", dangerous, prefix