move getACommonTld into a utility module without parameters

This commit is contained in:
erik-krogh
2022-12-17 22:55:49 +01:00
parent ba7321ac5c
commit 35e8d6afd4
5 changed files with 43 additions and 38 deletions

View File

@@ -3,6 +3,7 @@
*/
private import IncompleteUrlSubstringSanitizationSpecific
private import codeql.regex.HostnameRegexp::Utils
/**
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
@@ -30,9 +31,7 @@ query predicate problems(
mayHaveStringValue(substring, target) and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + HostnameRegexp::getACommonTld() +
"(:[0-9]+)?/?")
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + getACommonTld() + "(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")

View File

@@ -4,4 +4,4 @@ import semmle.javascript.dataflow.InferredTypes
/** Holds if `node` may evaluate to `value` */
predicate mayHaveStringValue(DataFlow::Node node, string value) { node.mayHaveStringValue(value) }
import semmle.javascript.security.regexp.HostnameRegexp as HostnameRegexp
import codeql.regex.HostnameRegexp::Utils

View File

@@ -3,6 +3,7 @@
*/
private import IncompleteUrlSubstringSanitizationSpecific
private import codeql.regex.HostnameRegexp::Utils
/**
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
@@ -30,9 +31,7 @@ query predicate problems(
mayHaveStringValue(substring, target) and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + HostnameRegexp::getACommonTld() +
"(:[0-9]+)?/?")
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + getACommonTld() + "(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")

View File

@@ -5,5 +5,3 @@ import codeql.ruby.StringOps
predicate mayHaveStringValue(DataFlow::Node node, string value) {
node.asExpr().getConstantValue().getString() = value
}
import codeql.ruby.security.regexp.HostnameRegexp as HostnameRegexp

View File

@@ -11,7 +11,10 @@ private import RegexTreeView
*/
signature module HostnameRegexpSig<RegexTreeViewSig TreeImpl> {
/** A node in the data-flow graph. */
class DataFlowNode;
class DataFlowNode {
/** Gets a string representation of this node. */
string toString();
}
/** A node in the data-flow graph that represents a regular expression pattern. */
class RegExpPatternSource extends DataFlowNode {
@@ -28,12 +31,26 @@ signature module HostnameRegexpSig<RegexTreeViewSig TreeImpl> {
}
}
/**
* Utility predicates and classes that doesn't depend on any signature.
*/
module Utils {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
}
/**
* Classes and predicates implementing an analysis on regular expressions
* that match URLs and hostname patterns.
*/
module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
private import TreeImpl
import Utils
/**
* Holds if the given constant is unlikely to occur in the origin part of a URL.
@@ -213,30 +230,6 @@ module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
)
}
/**
* Holds if `regexp` is a regular expression that is likely to match a hostname,
* but the pattern is incomplete and may match more hosts than intended.
*/
predicate incompleteHostnameRegExp(
RegExpSequence hostSequence, string message, Specific::DataFlowNode aux, string label
) {
exists(Specific::RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
regexp = re.getRegExpTerm() and
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
(
if re.getAParse() != re
then (
kind = "string, which is used as a regular expression $@," and
aux = re.getAParse()
) else (
kind = "regular expression" and aux = re
)
)
|
message = "This " + kind + " " + msg and label = "here"
)
}
/** Holds if `term` is one of the transitive left children of a regexp. */
predicate isLeftArmTerm(RegExpTerm term) {
term.isRootTerm()
@@ -262,10 +255,26 @@ module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
}
/**
* Gets a pattern that matches common top-level domain names in lower case.
* Holds if `regexp` is a regular expression that is likely to match a hostname,
* but the pattern is incomplete and may match more hosts than intended.
*/
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
predicate incompleteHostnameRegExp(
RegExpSequence hostSequence, string message, Specific::DataFlowNode aux, string label
) {
exists(Specific::RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
regexp = re.getRegExpTerm() and
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
(
if re.getAParse() != re
then (
kind = "string, which is used as a regular expression $@," and
aux = re.getAParse()
) else (
kind = "regular expression" and aux = re
)
)
|
message = "This " + kind + " " + msg and label = "here"
)
}
}