JS/Ruby: share implementation of IncompleteUrlSubstringSanitization query

This commit is contained in:
Arthur Baars
2022-03-09 12:11:14 +01:00
parent 49b4fe77ad
commit 747c7f6b5e
7 changed files with 143 additions and 115 deletions

View File

@@ -508,5 +508,9 @@
"java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll",
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll"
],
"IncompleteUrlSubstringSanitization": [
"javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll",
"ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitization.qll"
]
}

View File

@@ -11,60 +11,4 @@
* external/cwe/cwe-020
*/
import javascript
private import semmle.javascript.dataflow.InferredTypes
/**
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
*/
class SomeSubstringCheck extends DataFlow::Node {
DataFlow::Node substring;
SomeSubstringCheck() {
this.(StringOps::StartsWith).getSubstring() = substring or
this.(StringOps::Includes).getSubstring() = substring or
this.(StringOps::EndsWith).getSubstring() = substring
}
/**
* Gets the substring.
*/
DataFlow::Node getSubstring() { result = substring }
}
from SomeSubstringCheck check, DataFlow::Node substring, string target, string msg
where
substring = check.getSubstring() and
substring.mayHaveStringValue(target) and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
"(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD with path elements, and the check is an includes check
check instanceof StringOps::Includes and
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/[a-z0-9/_-]+")
) and
(
if check instanceof StringOps::StartsWith
then msg = "may be followed by an arbitrary host name"
else
if check instanceof StringOps::EndsWith
then msg = "may be preceded by an arbitrary host name"
else msg = "can be anywhere in the URL, and arbitrary hosts may come before or after it"
) and
// whitelist
not (
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
check instanceof StringOps::EndsWith and
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
or
// the trailing port or slash makes the prefix-check safe
check instanceof StringOps::StartsWith and
target.regexpMatch(".*(:[0-9]+|/)")
)
select check, "'$@' " + msg + ".", substring, target
import IncompleteUrlSubstringSanitization

View File

@@ -0,0 +1,62 @@
/**
* Incomplete URL substring sanitization
*/
private import IncompleteUrlSubstringSanitizationSpecific
/**
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
*/
class SomeSubstringCheck extends DataFlow::Node {
DataFlow::Node substring;
SomeSubstringCheck() {
this.(StringOps::StartsWith).getSubstring() = substring or
this.(StringOps::Includes).getSubstring() = substring or
this.(StringOps::EndsWith).getSubstring() = substring
}
/**
* Gets the substring.
*/
DataFlow::Node getSubstring() { result = substring }
}
/** Holds if there is an incomplete URL substring sanitization problem */
query predicate problems(
SomeSubstringCheck check, string msg, DataFlow::Node substring, string target
) {
substring = check.getSubstring() and
mayHaveStringValue(substring, target) and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
"(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD with path elements, and the check is an includes check
check instanceof StringOps::Includes and
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/[a-z0-9/_-]+")
) and
(
if check instanceof StringOps::StartsWith
then msg = "'$@' may be followed by an arbitrary host name."
else
if check instanceof StringOps::EndsWith
then msg = "'$@' may be preceded by an arbitrary host name."
else msg = "'$@' can be anywhere in the URL, and arbitrary hosts may come before or after it."
) and
// whitelist
not (
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
check instanceof StringOps::EndsWith and
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
or
// the trailing port or slash makes the prefix-check safe
check instanceof StringOps::StartsWith and
target.regexpMatch(".*(:[0-9]+|/)")
)
}

View File

@@ -0,0 +1,5 @@
import javascript
import semmle.javascript.dataflow.InferredTypes
/** Holds if `node` may evaluate to `value` */
predicate mayHaveStringValue(DataFlow::Node node, string value) { node.mayHaveStringValue(value) }

View File

@@ -11,61 +11,4 @@
* external/cwe/cwe-020
*/
import codeql.ruby.DataFlow
import codeql.ruby.StringOps
import codeql.ruby.security.performance.RegExpTreeView::RegExpPatterns as RegExpPatterns
/**
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
*/
class SomeSubstringCheck extends DataFlow::Node {
DataFlow::Node substring;
SomeSubstringCheck() {
this.(StringOps::StartsWith).getSubstring() = substring or
this.(StringOps::Includes).getSubstring() = substring or
this.(StringOps::EndsWith).getSubstring() = substring
}
/**
* Gets the substring.
*/
DataFlow::Node getSubstring() { result = substring }
}
from SomeSubstringCheck check, DataFlow::Node substring, string target, string msg
where
substring = check.getSubstring() and
substring.asExpr().getExpr().getConstantValue().getString() = target and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
"(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD with path elements, and the check is an includes check
check instanceof StringOps::Includes and
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/[a-z0-9/_-]+")
) and
(
if check instanceof StringOps::StartsWith
then msg = "may be followed by an arbitrary host name"
else
if check instanceof StringOps::EndsWith
then msg = "may be preceded by an arbitrary host name"
else msg = "can be anywhere in the URL, and arbitrary hosts may come before or after it"
) and
// whitelist
not (
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
check instanceof StringOps::EndsWith and
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
or
// the trailing port or slash makes the prefix-check safe
check instanceof StringOps::StartsWith and
target.regexpMatch(".*(:[0-9]+|/)")
)
select check, "'$@' " + msg + ".", substring, target
import IncompleteUrlSubstringSanitization

View File

@@ -0,0 +1,62 @@
/**
* Incomplete URL substring sanitization
*/
private import IncompleteUrlSubstringSanitizationSpecific
/**
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
*/
class SomeSubstringCheck extends DataFlow::Node {
DataFlow::Node substring;
SomeSubstringCheck() {
this.(StringOps::StartsWith).getSubstring() = substring or
this.(StringOps::Includes).getSubstring() = substring or
this.(StringOps::EndsWith).getSubstring() = substring
}
/**
* Gets the substring.
*/
DataFlow::Node getSubstring() { result = substring }
}
/** Holds if there is an incomplete URL substring sanitization problem */
query predicate problems(
SomeSubstringCheck check, string msg, DataFlow::Node substring, string target
) {
substring = check.getSubstring() and
mayHaveStringValue(substring, target) and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
"(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD with path elements, and the check is an includes check
check instanceof StringOps::Includes and
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/[a-z0-9/_-]+")
) and
(
if check instanceof StringOps::StartsWith
then msg = "'$@' may be followed by an arbitrary host name."
else
if check instanceof StringOps::EndsWith
then msg = "'$@' may be preceded by an arbitrary host name."
else msg = "'$@' can be anywhere in the URL, and arbitrary hosts may come before or after it."
) and
// whitelist
not (
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
check instanceof StringOps::EndsWith and
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
or
// the trailing port or slash makes the prefix-check safe
check instanceof StringOps::StartsWith and
target.regexpMatch(".*(:[0-9]+|/)")
)
}

View File

@@ -0,0 +1,8 @@
import codeql.ruby.DataFlow
import codeql.ruby.StringOps
import codeql.ruby.security.performance.RegExpTreeView::RegExpPatterns as RegExpPatterns
/** Holds if `node` may evaluate to `value` */
predicate mayHaveStringValue(DataFlow::Node node, string value) {
node.asExpr().getExpr().getConstantValue().getString() = value
}