JS: Make XSS MetacharEscapeSanitizer more precise

This commit is contained in:
Asger Feldthaus
2021-03-15 23:39:52 +00:00
parent effa52f9e1
commit 198bdcab26
2 changed files with 63 additions and 9 deletions

View File

@@ -1114,4 +1114,64 @@ module RegExp {
or
result = node.asExpr().(StringLiteral).asRegExp()
}
/**
* A character that will be analyzed by `RegExp::alwaysMatchesMetaCharacter`.
*
* Currently only `<`, `'`, and `"` are considered to be meta-characters, but new meta-characters
* can be added by subclassing this class.
*/
abstract class MetaCharacter extends string {
bindingset[this]
MetaCharacter() { any() }
/**
* Holds if the given atomic term matches this meta-character.
*
* Does not hold for derived terms like alternatives and groups.
*
* By default, `.`, `\W`, `\S`, and `\D` are considered to match any meta-character,
* but the predicate can be overridden for meta-characters where this is not the case.
*/
predicate matchedByAtom(RegExpTerm term) {
term.(RegExpConstant).getConstantValue() = this
or
term instanceof RegExpDot
or
term.(RegExpCharacterClassEscape).getValue() = ["\\W", "\\S", "\\D"]
or
exists(string lo, string hi |
term.(RegExpCharacterRange).isRange(lo, hi) and
lo <= this and
this <= hi
)
}
}
private class DefaultMetaCharacter extends MetaCharacter {
DefaultMetaCharacter() { this = ["<", "'", "\""] }
}
/**
* Holds if `term` can match any occurence of `char` within a string (not taking into account
* the context in which `term` appears).
*
* This predicate is under-approximate and never considers sequences to guarantee a match.
*/
predicate alwaysMatchesMetaCharacter(RegExpTerm term, MetaCharacter char) {
not term.getParent() instanceof RegExpSequence and // restrict size of predicate
char.matchedByAtom(term)
or
alwaysMatchesMetaCharacter(term.(RegExpGroup).getAChild(), char)
or
alwaysMatchesMetaCharacter(term.(RegExpAlt).getAlternative(), char)
or
exists(RegExpCharacterClass class_ | term = class_ |
not class_.isInverted() and
char.matchedByAtom(class_.getAChild())
or
class_.isInverted() and
not char.matchedByAtom(class_.getAChild())
)
}
}

View File

@@ -28,19 +28,13 @@ module Shared {
abstract class SanitizerGuard extends TaintTracking::SanitizerGuardNode { }
/**
* A global regexp replacement involving an HTML meta-character, viewed as a sanitizer for
* A global regexp replacement involving the `<`, `'`, or `"` meta-character, viewed as a sanitizer for
* XSS vulnerabilities.
*
* The XSS queries do not attempt to reason about correctness or completeness of sanitizers,
* so any such replacement stops taint propagation.
*/
class MetacharEscapeSanitizer extends Sanitizer, StringReplaceCall {
MetacharEscapeSanitizer() {
this.isGlobal() and
exists(RegExpConstant c |
c.getLiteral() = getRegExp().asExpr() and
c.getValue().regexpMatch("['\"&<>]")
)
isGlobal() and
RegExp::alwaysMatchesMetaCharacter(getRegExp().getRoot(), ["<", "'", "\""])
}
}