mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Share IncompleteMultiCharacterSanitization JS/Ruby
Most of the classes and predicates in this query can be shared between the two languages. There's just a few language-specific things that we place in IncompleteMultiCharacterSanitizationSpecific.
This commit is contained in:
@@ -597,5 +597,9 @@
|
||||
"Swift patterns test file": [
|
||||
"swift/ql/test/extractor-tests/patterns/patterns.swift",
|
||||
"swift/ql/test/library-tests/parent/patterns.swift"
|
||||
],
|
||||
"IncompleteMultiCharacterSanitization JS/Ruby": [
|
||||
"javascript/ql/lib/semmle/javascript/security/IncompleteMultiCharacterSanitization.qll",
|
||||
"ruby/ql/lib/codeql/ruby/security/IncompleteMultiCharacterSanitization.qll"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
* Provides shared predicates for reasoning about improper multi-character sanitization.
|
||||
*/
|
||||
|
||||
import IncompleteMultiCharacterSanitizationSpecific
|
||||
|
||||
/**
|
||||
* A prefix that may be dangerous to sanitize explicitly.
|
||||
*
|
||||
* Note that this class exists solely as a (necessary) optimization for this query.
|
||||
*/
|
||||
private class DangerousPrefix extends string {
|
||||
DangerousPrefix() {
|
||||
this = ["/..", "../"] or
|
||||
this = "<!--" or
|
||||
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A substring of a prefix that may be dangerous to sanitize explicitly.
|
||||
*/
|
||||
private class DangerousPrefixSubstring extends string {
|
||||
DangerousPrefixSubstring() {
|
||||
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char from a dangerous prefix that is matched by `t`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
|
||||
t.isNullable() and result = ""
|
||||
or
|
||||
result = t.getAMatchedString()
|
||||
or
|
||||
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
|
||||
exists(ReDoSUtil::CharacterClass cc |
|
||||
cc = ReDoSUtil::getCanonicalCharClass(t) and
|
||||
cc.matches(result) and
|
||||
result.regexpMatch("\\w") and
|
||||
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
|
||||
not cc.matches(">")
|
||||
)
|
||||
or
|
||||
t instanceof RegExpDot and
|
||||
result.length() = 1
|
||||
or
|
||||
(
|
||||
t instanceof RegExpOpt or
|
||||
t instanceof RegExpStar or
|
||||
t instanceof RegExpPlus or
|
||||
t instanceof RegExpGroup or
|
||||
t instanceof RegExpAlt
|
||||
) and
|
||||
result = getADangerousMatchedChar(t.getAChild())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a dangerous prefix that is in the prefix language of `t`.
|
||||
*/
|
||||
private DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
|
||||
result = getADangerousMatchedPrefixSubstring(t) and
|
||||
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
|
||||
*
|
||||
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
|
||||
*/
|
||||
private DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
|
||||
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
|
||||
or
|
||||
result = getADangerousMatchedChar(t)
|
||||
or
|
||||
// loop around for repetitions (only considering alphanumeric characters in the repetition)
|
||||
exists(RepetitionMatcher repetition | t = repetition |
|
||||
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
|
||||
)
|
||||
}
|
||||
|
||||
private class RepetitionMatcher extends EmptyReplaceRegExpTerm {
|
||||
string char;
|
||||
|
||||
pragma[noinline]
|
||||
RepetitionMatcher() {
|
||||
(this instanceof RegExpPlus or this instanceof RegExpStar) and
|
||||
char = getADangerousMatchedChar(this.getAChild()) and
|
||||
char.regexpMatch("\\w")
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
string getAChar() { result = char }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerability of kind `kind`.
|
||||
*/
|
||||
predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
|
||||
prefix = getADangerousMatchedPrefix(t) and
|
||||
(
|
||||
kind = "path injection" and
|
||||
prefix = ["/..", "../"] and
|
||||
// If the regex is matching explicit path components, it is unlikely that it's being used as a sanitizer.
|
||||
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*")
|
||||
or
|
||||
kind = "HTML element injection" and
|
||||
(
|
||||
// comments
|
||||
prefix = "<!--" and
|
||||
// If the regex is matching explicit textual content of an HTML comment, it is unlikely that it's being used as a sanitizer.
|
||||
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*")
|
||||
or
|
||||
// specific tags
|
||||
// the `cript|scrip` case has been observed in the wild several times
|
||||
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"]
|
||||
)
|
||||
)
|
||||
or
|
||||
kind = "HTML attribute injection" and
|
||||
prefix =
|
||||
[
|
||||
// ordinary event handler prefix
|
||||
"on",
|
||||
// angular prefixes
|
||||
"ng-", "ng:", "data-ng-", "x-ng-"
|
||||
] and
|
||||
(
|
||||
// explicit matching: `onclick` and `ng-bind`
|
||||
t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
|
||||
or
|
||||
// regexp-based matching: `on[a-z]+`
|
||||
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
|
||||
start.getAMatchedString().regexpMatch("(?i)[^a-z]*" + prefix) and
|
||||
isCommonWordMatcher(start.getSuccessor())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` is a common pattern for matching words
|
||||
*/
|
||||
private predicate isCommonWordMatcher(RegExpTerm t) {
|
||||
exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) |
|
||||
// [a-z]+ and similar
|
||||
quantified
|
||||
.(RegExpCharacterClass)
|
||||
.getAChild()
|
||||
.(RegExpCharacterRange)
|
||||
.isRange(["a", "A"], ["z", "Z"])
|
||||
or
|
||||
// \w+ or [\w]+
|
||||
[quantified, quantified.(RegExpCharacterClass).getAChild()]
|
||||
.(RegExpCharacterClassEscape)
|
||||
.getValue() = "w"
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* Provides language-specific predicates for reasoning about improper multi-character sanitization.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import semmle.javascript.security.performance.ReDoSUtil as ReDoSUtil
|
||||
|
||||
/**
|
||||
* A regexp term that matches substrings that should be replaced with the empty string.
|
||||
*/
|
||||
class EmptyReplaceRegExpTerm extends RegExpTerm {
|
||||
EmptyReplaceRegExpTerm() {
|
||||
exists(StringReplaceCall replace |
|
||||
[replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
|
||||
this = replace.getRegExp().getRoot().getAChild*()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -14,172 +14,7 @@
|
||||
*/
|
||||
|
||||
import javascript
|
||||
|
||||
/**
|
||||
* A regexp term that matches substrings that should be replaced with the empty string.
|
||||
*/
|
||||
class EmptyReplaceRegExpTerm extends RegExpTerm {
|
||||
EmptyReplaceRegExpTerm() {
|
||||
exists(StringReplaceCall replace |
|
||||
[replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
|
||||
this = replace.getRegExp().getRoot().getAChild*()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A prefix that may be dangerous to sanitize explicitly.
|
||||
*
|
||||
* Note that this class exists solely as a (necessary) optimization for this query.
|
||||
*/
|
||||
class DangerousPrefix extends string {
|
||||
DangerousPrefix() {
|
||||
this = ["/..", "../"] or
|
||||
this = "<!--" or
|
||||
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A substring of a prefix that may be dangerous to sanitize explicitly.
|
||||
*/
|
||||
class DangerousPrefixSubstring extends string {
|
||||
DangerousPrefixSubstring() {
|
||||
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a dangerous prefix that is in the prefix language of `t`.
|
||||
*/
|
||||
DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
|
||||
result = getADangerousMatchedPrefixSubstring(t) and
|
||||
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
|
||||
}
|
||||
|
||||
private import semmle.javascript.security.regexp.NfaUtils as NfaUtils
|
||||
|
||||
/**
|
||||
* Gets a char from a dangerous prefix that is matched by `t`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
|
||||
t.isNullable() and result = ""
|
||||
or
|
||||
t.getAMatchedString() = result
|
||||
or
|
||||
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
|
||||
exists(NfaUtils::CharacterClass cc |
|
||||
cc = NfaUtils::getCanonicalCharClass(t) and
|
||||
cc.matches(result) and
|
||||
result.regexpMatch("\\w") and
|
||||
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
|
||||
not cc.matches(">")
|
||||
)
|
||||
or
|
||||
t instanceof RegExpDot and
|
||||
result.length() = 1
|
||||
or
|
||||
(
|
||||
t instanceof RegExpOpt or
|
||||
t instanceof RegExpStar or
|
||||
t instanceof RegExpPlus or
|
||||
t instanceof RegExpGroup or
|
||||
t instanceof RegExpAlt
|
||||
) and
|
||||
result = getADangerousMatchedChar(t.getAChild())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
|
||||
*
|
||||
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
|
||||
*/
|
||||
DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
|
||||
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
|
||||
or
|
||||
result = getADangerousMatchedChar(t)
|
||||
or
|
||||
// loop around for repetitions (only considering alphanumeric characters in the repetition)
|
||||
exists(RepetitionMatcher repetition | t = repetition |
|
||||
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
|
||||
)
|
||||
}
|
||||
|
||||
class RepetitionMatcher extends EmptyReplaceRegExpTerm {
|
||||
string char;
|
||||
|
||||
pragma[noinline]
|
||||
RepetitionMatcher() {
|
||||
(this instanceof RegExpPlus or this instanceof RegExpStar) and
|
||||
char = getADangerousMatchedChar(this.getAChild()) and
|
||||
char.regexpMatch("\\w")
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
string getAChar() { result = char }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerablity of kind `kind`.
|
||||
*/
|
||||
predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
|
||||
prefix = getADangerousMatchedPrefix(t) and
|
||||
(
|
||||
kind = "path injection" and
|
||||
// upwards navigation
|
||||
prefix = ["/..", "../"] and
|
||||
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*") // explicit path name mentions make this an unlikely sanitizer
|
||||
or
|
||||
kind = "HTML element injection" and
|
||||
(
|
||||
// comments
|
||||
prefix = "<!--" and
|
||||
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*") // explicit comment content mentions make this an unlikely sanitizer
|
||||
or
|
||||
// specific tags
|
||||
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"] // the `cript|scrip` case has been observed in the wild several times
|
||||
)
|
||||
)
|
||||
or
|
||||
kind = "HTML attribute injection" and
|
||||
prefix =
|
||||
[
|
||||
// ordinary event handler prefix
|
||||
"on",
|
||||
// angular prefixes
|
||||
"ng-", "ng:", "data-ng-", "x-ng-"
|
||||
] and
|
||||
(
|
||||
// explicit matching: `onclick` and `ng-bind`
|
||||
t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
|
||||
or
|
||||
// regexp-based matching: `on[a-z]+`
|
||||
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
|
||||
start.getConstantValue().regexpMatch("(?i)[^a-z]*" + prefix) and
|
||||
isCommonWordMatcher(start.getSuccessor())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` is a common pattern for matching words
|
||||
*/
|
||||
predicate isCommonWordMatcher(RegExpTerm t) {
|
||||
exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) |
|
||||
// [a-z]+ and similar
|
||||
quantified
|
||||
.(RegExpCharacterClass)
|
||||
.getAChild()
|
||||
.(RegExpCharacterRange)
|
||||
.isRange(["a", "A"], ["z", "Z"])
|
||||
or
|
||||
// \w+ or [\w]+
|
||||
[quantified, quantified.(RegExpCharacterClass).getAChild()]
|
||||
.(RegExpCharacterClassEscape)
|
||||
.getValue() = "w"
|
||||
)
|
||||
}
|
||||
private import semmle.javascript.security.IncompleteMultiCharacterSanitization
|
||||
|
||||
from
|
||||
StringReplaceCall replace, EmptyReplaceRegExpTerm regexp, EmptyReplaceRegExpTerm dangerous,
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
* Provides shared predicates for reasoning about improper multi-character sanitization.
|
||||
*/
|
||||
|
||||
import IncompleteMultiCharacterSanitizationSpecific
|
||||
|
||||
/**
|
||||
* A prefix that may be dangerous to sanitize explicitly.
|
||||
*
|
||||
* Note that this class exists solely as a (necessary) optimization for this query.
|
||||
*/
|
||||
private class DangerousPrefix extends string {
|
||||
DangerousPrefix() {
|
||||
this = ["/..", "../"] or
|
||||
this = "<!--" or
|
||||
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A substring of a prefix that may be dangerous to sanitize explicitly.
|
||||
*/
|
||||
private class DangerousPrefixSubstring extends string {
|
||||
DangerousPrefixSubstring() {
|
||||
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char from a dangerous prefix that is matched by `t`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
|
||||
t.isNullable() and result = ""
|
||||
or
|
||||
result = t.getAMatchedString()
|
||||
or
|
||||
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
|
||||
exists(ReDoSUtil::CharacterClass cc |
|
||||
cc = ReDoSUtil::getCanonicalCharClass(t) and
|
||||
cc.matches(result) and
|
||||
result.regexpMatch("\\w") and
|
||||
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
|
||||
not cc.matches(">")
|
||||
)
|
||||
or
|
||||
t instanceof RegExpDot and
|
||||
result.length() = 1
|
||||
or
|
||||
(
|
||||
t instanceof RegExpOpt or
|
||||
t instanceof RegExpStar or
|
||||
t instanceof RegExpPlus or
|
||||
t instanceof RegExpGroup or
|
||||
t instanceof RegExpAlt
|
||||
) and
|
||||
result = getADangerousMatchedChar(t.getAChild())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a dangerous prefix that is in the prefix language of `t`.
|
||||
*/
|
||||
private DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
|
||||
result = getADangerousMatchedPrefixSubstring(t) and
|
||||
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
|
||||
*
|
||||
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
|
||||
*/
|
||||
private DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
|
||||
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
|
||||
or
|
||||
result = getADangerousMatchedChar(t)
|
||||
or
|
||||
// loop around for repetitions (only considering alphanumeric characters in the repetition)
|
||||
exists(RepetitionMatcher repetition | t = repetition |
|
||||
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
|
||||
)
|
||||
}
|
||||
|
||||
private class RepetitionMatcher extends EmptyReplaceRegExpTerm {
|
||||
string char;
|
||||
|
||||
pragma[noinline]
|
||||
RepetitionMatcher() {
|
||||
(this instanceof RegExpPlus or this instanceof RegExpStar) and
|
||||
char = getADangerousMatchedChar(this.getAChild()) and
|
||||
char.regexpMatch("\\w")
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
string getAChar() { result = char }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerability of kind `kind`.
|
||||
*/
|
||||
predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
|
||||
prefix = getADangerousMatchedPrefix(t) and
|
||||
(
|
||||
kind = "path injection" and
|
||||
prefix = ["/..", "../"] and
|
||||
// If the regex is matching explicit path components, it is unlikely that it's being used as a sanitizer.
|
||||
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*")
|
||||
or
|
||||
kind = "HTML element injection" and
|
||||
(
|
||||
// comments
|
||||
prefix = "<!--" and
|
||||
// If the regex is matching explicit textual content of an HTML comment, it is unlikely that it's being used as a sanitizer.
|
||||
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*")
|
||||
or
|
||||
// specific tags
|
||||
// the `cript|scrip` case has been observed in the wild several times
|
||||
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"]
|
||||
)
|
||||
)
|
||||
or
|
||||
kind = "HTML attribute injection" and
|
||||
prefix =
|
||||
[
|
||||
// ordinary event handler prefix
|
||||
"on",
|
||||
// angular prefixes
|
||||
"ng-", "ng:", "data-ng-", "x-ng-"
|
||||
] and
|
||||
(
|
||||
// explicit matching: `onclick` and `ng-bind`
|
||||
t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
|
||||
or
|
||||
// regexp-based matching: `on[a-z]+`
|
||||
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
|
||||
start.getAMatchedString().regexpMatch("(?i)[^a-z]*" + prefix) and
|
||||
isCommonWordMatcher(start.getSuccessor())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` is a common pattern for matching words
|
||||
*/
|
||||
private predicate isCommonWordMatcher(RegExpTerm t) {
|
||||
exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) |
|
||||
// [a-z]+ and similar
|
||||
quantified
|
||||
.(RegExpCharacterClass)
|
||||
.getAChild()
|
||||
.(RegExpCharacterRange)
|
||||
.isRange(["a", "A"], ["z", "Z"])
|
||||
or
|
||||
// \w+ or [\w]+
|
||||
[quantified, quantified.(RegExpCharacterClass).getAChild()]
|
||||
.(RegExpCharacterClassEscape)
|
||||
.getValue() = "w"
|
||||
)
|
||||
}
|
||||
@@ -2,180 +2,10 @@
|
||||
* Provides predicates for reasoning about improper multi-character sanitization.
|
||||
*/
|
||||
|
||||
private import ruby
|
||||
private import codeql.ruby.regexp.RegExpTreeView as RETV
|
||||
private import codeql.ruby.security.performance.ReDoSUtil as ReDoSUtil
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.frameworks.core.String
|
||||
private import codeql.ruby.dataflow.internal.DataFlowDispatch
|
||||
|
||||
/**
|
||||
* A regexp term that matches substrings that should be replaced with the empty string.
|
||||
*/
|
||||
class EmptyReplaceRegExpTerm extends RETV::RegExpTerm {
|
||||
private StringSubstitutionCall call;
|
||||
|
||||
EmptyReplaceRegExpTerm() {
|
||||
call.getReplacementString() = "" and
|
||||
this = call.getPatternRegExp().getRegExpTerm().getAChild*()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the substitution call that uses this regexp term.
|
||||
*/
|
||||
StringSubstitutionCall getCall() { result = call }
|
||||
}
|
||||
|
||||
/**
|
||||
* A prefix that may be dangerous to sanitize explicitly.
|
||||
*
|
||||
* Note that this class exists solely as a (necessary) optimization for this query.
|
||||
*/
|
||||
private class DangerousPrefix extends string {
|
||||
DangerousPrefix() {
|
||||
this = ["/..", "../"] or
|
||||
this = "<!--" or
|
||||
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A substring of a prefix that may be dangerous to sanitize explicitly.
|
||||
*/
|
||||
private class DangerousPrefixSubstring extends string {
|
||||
DangerousPrefixSubstring() {
|
||||
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a dangerous prefix that is in the prefix language of `t`.
|
||||
*/
|
||||
private DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
|
||||
result = getADangerousMatchedPrefixSubstring(t) and
|
||||
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
|
||||
t.isNullable() and result = ""
|
||||
or
|
||||
result = t.getAMatchedString()
|
||||
or
|
||||
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
|
||||
exists(ReDoSUtil::CharacterClass cc |
|
||||
cc = ReDoSUtil::getCanonicalCharClass(t) and
|
||||
cc.matches(result) and
|
||||
result.regexpMatch("\\w") and
|
||||
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
|
||||
not cc.matches(">")
|
||||
)
|
||||
or
|
||||
t instanceof RETV::RegExpDot and
|
||||
result.length() = 1
|
||||
or
|
||||
(
|
||||
t instanceof RETV::RegExpOpt or
|
||||
t instanceof RETV::RegExpStar or
|
||||
t instanceof RETV::RegExpPlus or
|
||||
t instanceof RETV::RegExpGroup or
|
||||
t instanceof RETV::RegExpAlt
|
||||
) and
|
||||
result = getADangerousMatchedChar(t.getAChild())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
|
||||
*
|
||||
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
|
||||
*/
|
||||
private DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
|
||||
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
|
||||
or
|
||||
result = getADangerousMatchedChar(t)
|
||||
or
|
||||
// loop around for repetitions (only considering alphanumeric characters in the repetition)
|
||||
exists(RepetitionMatcher repetition | t = repetition |
|
||||
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
|
||||
)
|
||||
}
|
||||
|
||||
private class RepetitionMatcher extends EmptyReplaceRegExpTerm {
|
||||
string char;
|
||||
|
||||
pragma[noinline]
|
||||
RepetitionMatcher() {
|
||||
(this instanceof RETV::RegExpPlus or this instanceof RETV::RegExpStar) and
|
||||
char = getADangerousMatchedChar(this.getAChild()) and
|
||||
char.regexpMatch("\\w")
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
string getAChar() { result = char }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerability of kind `kind`.
|
||||
*/
|
||||
private predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
|
||||
prefix = getADangerousMatchedPrefix(t) and
|
||||
(
|
||||
kind = "path injection" and
|
||||
prefix = ["/..", "../"] and
|
||||
// If the regex is matching explicit path components, it is unlikely that it's being used as a sanitizer.
|
||||
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*")
|
||||
or
|
||||
kind = "HTML element injection" and
|
||||
(
|
||||
// comments
|
||||
prefix = "<!--" and
|
||||
// If the regex is matching explicit textual content of an HTML comment, it is unlikely that it's being used as a sanitizer.
|
||||
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*")
|
||||
or
|
||||
// specific tags
|
||||
// the `cript|scrip` case has been observed in the wild several times
|
||||
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"]
|
||||
)
|
||||
)
|
||||
or
|
||||
kind = "HTML attribute injection" and
|
||||
prefix =
|
||||
[
|
||||
// ordinary event handler prefix
|
||||
"on",
|
||||
// angular prefixes
|
||||
"ng-", "ng:", "data-ng-", "x-ng-"
|
||||
] and
|
||||
(
|
||||
// explicit matching: `onclick` and `ng-bind`
|
||||
t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
|
||||
or
|
||||
// regexp-based matching: `on[a-z]+`
|
||||
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
|
||||
start.getAMatchedString().regexpMatch("(?i)[^a-z]*" + prefix) and
|
||||
isCommonWordMatcher(start.getSuccessor())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` is a common pattern for matching words
|
||||
*/
|
||||
private predicate isCommonWordMatcher(RETV::RegExpTerm t) {
|
||||
exists(RETV::RegExpTerm quantified | quantified = t.(RETV::RegExpQuantifier).getChild(0) |
|
||||
// [a-z]+ and similar
|
||||
quantified
|
||||
.(RETV::RegExpCharacterClass)
|
||||
.getAChild()
|
||||
.(RETV::RegExpCharacterRange)
|
||||
.isRange(["a", "A"], ["z", "Z"])
|
||||
or
|
||||
// \w+ or [\w]+
|
||||
[quantified, quantified.(RETV::RegExpCharacterClass).getAChild()]
|
||||
.(RETV::RegExpCharacterClassEscape)
|
||||
.getValue() = "w"
|
||||
)
|
||||
}
|
||||
private import codeql.ruby.security.IncompleteMultiCharacterSanitization
|
||||
|
||||
/**
|
||||
* Holds if `replace` has a pattern argument containing a regular expression
|
||||
@@ -199,8 +29,8 @@ predicate hasResult(
|
||||
matchesDangerousPrefix(other, _, kind) and
|
||||
not other.isNullable()
|
||||
) and
|
||||
not exists(RETV::RegExpCaret c | regexp = c.getRootTerm()) and
|
||||
not exists(RETV::RegExpDollar d | regexp = d.getRootTerm()) and
|
||||
not exists(RegExpCaret c | regexp = c.getRootTerm()) and
|
||||
not exists(RegExpDollar d | regexp = d.getRootTerm()) and
|
||||
// Don't flag replace operations that are called repeatedly in a loop, as they can actually work correctly.
|
||||
not replace.flowsTo(replace.getReceiver+())
|
||||
)
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Provides language-specific predicates for reasoning about improper multi-character sanitization.
|
||||
*/
|
||||
|
||||
private import codeql.ruby.frameworks.core.String
|
||||
import codeql.ruby.regexp.RegExpTreeView
|
||||
import codeql.ruby.security.performance.ReDoSUtil as ReDoSUtil
|
||||
|
||||
/**
|
||||
* A regexp term that matches substrings that should be replaced with the empty string.
|
||||
*/
|
||||
class EmptyReplaceRegExpTerm extends RegExpTerm {
|
||||
private StringSubstitutionCall call;
|
||||
|
||||
EmptyReplaceRegExpTerm() {
|
||||
call.getReplacementString() = "" and
|
||||
this = call.getPatternRegExp().getRegExpTerm().getAChild*()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the substitution call that uses this regexp term.
|
||||
*/
|
||||
StringSubstitutionCall getCall() { result = call }
|
||||
}
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
import ruby
|
||||
import codeql.ruby.security.IncompleteMultiCharacterSanitizationQuery as Query
|
||||
import codeql.ruby.security.IncompleteMultiCharacterSanitizationSpecific as Specific
|
||||
import codeql.ruby.frameworks.core.String
|
||||
import TestUtilities.InlineExpectationsTest
|
||||
|
||||
@@ -20,7 +21,7 @@ class Test extends InlineExpectationsTest {
|
||||
|
||||
predicate hasResult(Location location, string element, string value) {
|
||||
exists(
|
||||
StringSubstitutionCall replace, Query::EmptyReplaceRegExpTerm dangerous, string prefix,
|
||||
StringSubstitutionCall replace, Specific::EmptyReplaceRegExpTerm dangerous, string prefix,
|
||||
string kind
|
||||
|
|
||||
replace.getLocation() = location and
|
||||
|
||||
Reference in New Issue
Block a user