Merge pull request #8142 from github/hmac/incomplete-multi-char-sanitization

This commit is contained in:
Harry Maclean
2022-08-18 10:02:39 +12:00
committed by GitHub
15 changed files with 724 additions and 198 deletions

View File

@@ -20,6 +20,8 @@ class StringSubstitutionCall extends DataFlow::CallNode {
this.getMethodName() = ["sub", "sub!", "gsub", "gsub!"] and
exists(this.getReceiver()) and
this.getNumberOfArguments() = 2
or
this.getNumberOfArguments() = 1 and exists(this.getBlock())
}
/**
@@ -45,9 +47,10 @@ class StringSubstitutionCall extends DataFlow::CallNode {
* call, if any.
*/
RE::RegExpPatternSource getPatternRegExp() {
// TODO: using local flow means we miss regexps defined as constants outside
// of the function scope.
result.(DataFlow::LocalSourceNode).flowsTo(this.getPatternArgument())
or
result.asExpr().getExpr() =
this.getPatternArgument().asExpr().getExpr().(ConstantReadAccess).getValue()
}
/**
@@ -59,11 +62,19 @@ class StringSubstitutionCall extends DataFlow::CallNode {
}
/**
* Gets the string value passed as the second (replacement) argument in this
* call, if any.
* Gets the string value used to replace instances of the pattern, if any.
* This includes values passed explicitly as the second argument and values
* returned from the block, if one is given.
*/
string getReplacementString() {
result = this.getReplacementArgument().asExpr().getConstantValue().getString()
or
exists(DataFlow::Node blockReturnNode, DataFlow::LocalSourceNode stringNode |
exprNodeReturnedFrom(blockReturnNode, this.getBlock().asExpr().getExpr())
|
stringNode.flowsTo(blockReturnNode) and
result = stringNode.asExpr().getConstantValue().getString()
)
}
/** Gets a string that is being replaced by this call. */
@@ -77,7 +88,6 @@ class StringSubstitutionCall extends DataFlow::CallNode {
predicate replaces(string old, string new) {
old = this.getAReplacedString() and
new = this.getReplacementString()
// TODO: handle block-variant of the call
}
}

View File

@@ -268,6 +268,9 @@ class RegExpTerm extends RegExpParent {
/** Gets the primary QL class for this term. */
override string getAPrimaryQlClass() { result = "RegExpTerm" }
/** Holds if this regular expression term can match the empty string. */
predicate isNullable() { none() }
}
/**
@@ -326,6 +329,8 @@ class RegExpStar extends InfiniteRepetitionQuantifier {
RegExpStar() { this.getQualifier().charAt(0) = "*" }
override string getAPrimaryQlClass() { result = "RegExpStar" }
override predicate isNullable() { any() }
}
/**
@@ -341,6 +346,8 @@ class RegExpPlus extends InfiniteRepetitionQuantifier {
RegExpPlus() { this.getQualifier().charAt(0) = "+" }
override string getAPrimaryQlClass() { result = "RegExpPlus" }
override predicate isNullable() { this.getAChild().isNullable() }
}
/**
@@ -356,6 +363,8 @@ class RegExpOpt extends RegExpQuantifier {
RegExpOpt() { this.getQualifier().charAt(0) = "?" }
override string getAPrimaryQlClass() { result = "RegExpOpt" }
override predicate isNullable() { any() }
}
/**
@@ -375,6 +384,8 @@ class RegExpRange extends RegExpQuantifier {
RegExpRange() { re.multiples(part_end, end, lower, upper) }
override string getAPrimaryQlClass() { result = "RegExpRange" }
/** Gets the string defining the upper bound of this range, if any. */
string getUpper() { result = upper }
@@ -393,7 +404,7 @@ class RegExpRange extends RegExpQuantifier {
/** Gets the lower bound of the range. */
int getLowerBound() { result = this.getLower().toInt() }
override string getAPrimaryQlClass() { result = "RegExpRange" }
override predicate isNullable() { this.getAChild().isNullable() or this.getLowerBound() = 0 }
}
/**
@@ -440,6 +451,10 @@ class RegExpSequence extends RegExpTerm, TRegExpSequence {
}
override string getAPrimaryQlClass() { result = "RegExpSequence" }
override predicate isNullable() {
forall(RegExpTerm child | child = this.getAChild() | child.isNullable())
}
}
pragma[nomagic]
@@ -505,6 +520,8 @@ class RegExpAlt extends RegExpTerm, TRegExpAlt {
override string getAMatchedString() { result = this.getAlternative().getAMatchedString() }
override string getAPrimaryQlClass() { result = "RegExpAlt" }
override predicate isNullable() { this.getAChild().isNullable() }
}
class RegExpCharEscape = RegExpEscape;
@@ -579,6 +596,8 @@ class RegExpEscape extends RegExpNormalChar {
*/
class RegExpWordBoundary extends RegExpSpecialChar {
RegExpWordBoundary() { this.getChar() = "\\b" }
override predicate isNullable() { none() }
}
/**
@@ -607,6 +626,8 @@ class RegExpCharacterClassEscape extends RegExpEscape {
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpCharacterClassEscape" }
override predicate isNullable() { none() }
}
/**
@@ -663,6 +684,8 @@ class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
}
override string getAPrimaryQlClass() { result = "RegExpCharacterClass" }
override predicate isNullable() { none() }
}
/**
@@ -702,6 +725,8 @@ class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
}
override string getAPrimaryQlClass() { result = "RegExpCharacterRange" }
override predicate isNullable() { none() }
}
/**
@@ -773,6 +798,8 @@ class RegExpConstant extends RegExpTerm {
override string getConstantValue() { result = this.getValue() }
override string getAPrimaryQlClass() { result = "RegExpConstant" }
override predicate isNullable() { none() }
}
/**
@@ -820,6 +847,8 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
override string getAMatchedString() { result = this.getAChild().getAMatchedString() }
override string getAPrimaryQlClass() { result = "RegExpGroup" }
override predicate isNullable() { this.getAChild().isNullable() }
}
/**
@@ -867,6 +896,8 @@ class RegExpDot extends RegExpSpecialChar {
RegExpDot() { this.getChar() = "." }
override string getAPrimaryQlClass() { result = "RegExpDot" }
override predicate isNullable() { none() }
}
/**
@@ -897,6 +928,8 @@ class RegExpDollar extends RegExpAnchor {
RegExpDollar() { this.getChar() = ["$", "\\Z", "\\z"] }
override string getAPrimaryQlClass() { result = "RegExpDollar" }
override predicate isNullable() { any() }
}
/**
@@ -912,6 +945,8 @@ class RegExpCaret extends RegExpAnchor {
RegExpCaret() { this.getChar() = ["^", "\\A"] }
override string getAPrimaryQlClass() { result = "RegExpCaret" }
override predicate isNullable() { any() }
}
/**
@@ -929,6 +964,8 @@ class RegExpZeroWidthMatch extends RegExpGroup {
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpZeroWidthMatch" }
override predicate isNullable() { any() }
}
/**
@@ -954,6 +991,8 @@ class RegExpSubPattern extends RegExpZeroWidthMatch {
result.getEnd() = in_end
)
}
override predicate isNullable() { any() }
}
/**
@@ -981,6 +1020,8 @@ class RegExpPositiveLookahead extends RegExpLookahead {
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
override string getAPrimaryQlClass() { result = "RegExpPositiveLookahead" }
override predicate isNullable() { any() }
}
/**
@@ -1076,6 +1117,8 @@ class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpBackRef" }
override predicate isNullable() { this.getGroup().isNullable() }
}
/**

View File

@@ -0,0 +1,202 @@
/**
* Provides shared predicates for reasoning about improper multi-character sanitization.
*/
import IncompleteMultiCharacterSanitizationSpecific
/**
* A prefix that may be dangerous to sanitize explicitly.
*
* Note that this class exists solely as a (necessary) optimization for this query.
*/
private class DangerousPrefix extends string {
DangerousPrefix() {
this = ["/..", "../"] or
this = "<!--" or
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
}
}
/**
* A substring of a prefix that may be dangerous to sanitize explicitly.
*/
private class DangerousPrefixSubstring extends string {
DangerousPrefixSubstring() {
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
}
}
/**
* Gets a char from a dangerous prefix that is matched by `t`.
*/
pragma[noinline]
private DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
t.isNullable() and result = ""
or
result = t.getAMatchedString()
or
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
exists(NfaUtils::CharacterClass cc |
cc = NfaUtils::getCanonicalCharClass(t) and
cc.matches(result) and
result.regexpMatch("\\w") and
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
not cc.matches(">")
)
or
t instanceof RegExpDot and
result.length() = 1
or
(
t instanceof RegExpOpt or
t instanceof RegExpStar or
t instanceof RegExpPlus or
t instanceof RegExpGroup or
t instanceof RegExpAlt
) and
result = getADangerousMatchedChar(t.getAChild())
}
/**
* Gets a dangerous prefix that is in the prefix language of `t`.
*/
private DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
result = getADangerousMatchedPrefixSubstring(t) and
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
}
/**
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
*
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
*/
private DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
or
result = getADangerousMatchedChar(t)
or
// loop around for repetitions (only considering alphanumeric characters in the repetition)
exists(RepetitionMatcher repetition | t = repetition |
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
)
}
private class RepetitionMatcher extends EmptyReplaceRegExpTerm {
string char;
pragma[noinline]
RepetitionMatcher() {
(this instanceof RegExpPlus or this instanceof RegExpStar) and
char = getADangerousMatchedChar(this.getAChild()) and
char.regexpMatch("\\w")
}
pragma[noinline]
string getAChar() { result = char }
}
/**
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerability of kind `kind`.
*/
predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
prefix = getADangerousMatchedPrefix(t) and
(
kind = "path injection" and
prefix = ["/..", "../"] and
// If the regex is matching explicit path components, it is unlikely that it's being used as a sanitizer.
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*")
or
kind = "HTML element injection" and
(
// comments
prefix = "<!--" and
// If the regex is matching explicit textual content of an HTML comment, it is unlikely that it's being used as a sanitizer.
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*")
or
// specific tags
// the `cript|scrip` case has been observed in the wild several times
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"]
)
)
or
kind = "HTML attribute injection" and
prefix =
[
// ordinary event handler prefix
"on",
// angular prefixes
"ng-", "ng:", "data-ng-", "x-ng-"
] and
(
// explicit matching: `onclick` and `ng-bind`
t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
or
// regexp-based matching: `on[a-z]+`
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
start.getAMatchedString().regexpMatch("(?i)[^a-z]*" + prefix) and
isCommonWordMatcher(start.getSuccessor())
)
)
}
/**
* Holds if `t` is a common pattern for matching words
*/
private predicate isCommonWordMatcher(RegExpTerm t) {
exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) |
// [a-z]+ and similar
quantified
.(RegExpCharacterClass)
.getAChild()
.(RegExpCharacterRange)
.isRange(["a", "A"], ["z", "Z"])
or
// \w+ or [\w]+
[quantified, quantified.(RegExpCharacterClass).getAChild()]
.(RegExpCharacterClassEscape)
.getValue() = "w"
)
}
/**
* Holds if `replace` has a pattern argument containing a regular expression
* `dangerous` which matches a dangerous string beginning with `prefix`, in an
* attempt to avoid a vulnerability of kind `kind`.
*/
predicate isResult(
StringSubstitutionCall replace, EmptyReplaceRegExpTerm dangerous, string prefix, string kind
) {
exists(EmptyReplaceRegExpTerm regexp |
replace = regexp.getCall() and
dangerous.getRootTerm() = regexp and
// skip leading optional elements
not dangerous.isNullable() and
// only warn about the longest match
prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length(), m) and
// only warn once per kind
not exists(EmptyReplaceRegExpTerm other |
other = dangerous.getAChild+() or other = dangerous.getPredecessor+()
|
matchesDangerousPrefix(other, _, kind) and
not other.isNullable()
) and
// avoid anchored terms
not exists(RegExpAnchor a | regexp = a.getRootTerm()) and
// Don't flag replace operations that are called repeatedly in a loop, as they can actually work correctly.
not replace.flowsTo(replace.getReceiver+())
)
}
/**
* Holds if `replace` has a pattern argument containing a regular expression
* `dangerous` which matches a dangerous string beginning with `prefix`. `msg`
* is the alert we report.
*/
query predicate problems(
StringSubstitutionCall replace, string msg, EmptyReplaceRegExpTerm dangerous, string prefix
) {
exists(string kind |
isResult(replace, dangerous, prefix, kind) and
msg = "This string may still contain $@, which may cause a " + kind + " vulnerability."
)
}

View File

@@ -0,0 +1,24 @@
/**
* Provides language-specific predicates for reasoning about improper multi-character sanitization.
*/
import codeql.ruby.frameworks.core.String
import codeql.ruby.regexp.RegExpTreeView
import codeql.ruby.security.regexp.NfaUtils as NfaUtils
/**
* A regexp term that matches substrings that should be replaced with the empty string.
*/
class EmptyReplaceRegExpTerm extends RegExpTerm {
private StringSubstitutionCall call;
EmptyReplaceRegExpTerm() {
call.getReplacementString() = "" and
this = call.getPatternRegExp().getRegExpTerm().getAChild*()
}
/**
* Get the substitution call that uses this regexp term.
*/
StringSubstitutionCall getCall() { result = call }
}