mirror of
https://github.com/github/codeql.git
synced 2026-05-03 12:45:27 +02:00
move getACommonTld into a utility module without parameters
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
*/
|
||||
|
||||
private import IncompleteUrlSubstringSanitizationSpecific
|
||||
private import codeql.regex.HostnameRegexp::Utils
|
||||
|
||||
/**
|
||||
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
|
||||
@@ -30,9 +31,7 @@ query predicate problems(
|
||||
mayHaveStringValue(substring, target) and
|
||||
(
|
||||
// target contains a domain on a common TLD, and perhaps some other URL components
|
||||
target
|
||||
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + HostnameRegexp::getACommonTld() +
|
||||
"(:[0-9]+)?/?")
|
||||
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + getACommonTld() + "(:[0-9]+)?/?")
|
||||
or
|
||||
// target is a HTTP URL to a domain on any TLD
|
||||
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
|
||||
|
||||
@@ -4,4 +4,4 @@ import semmle.javascript.dataflow.InferredTypes
|
||||
/** Holds if `node` may evaluate to `value` */
|
||||
predicate mayHaveStringValue(DataFlow::Node node, string value) { node.mayHaveStringValue(value) }
|
||||
|
||||
import semmle.javascript.security.regexp.HostnameRegexp as HostnameRegexp
|
||||
import codeql.regex.HostnameRegexp::Utils
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
*/
|
||||
|
||||
private import IncompleteUrlSubstringSanitizationSpecific
|
||||
private import codeql.regex.HostnameRegexp::Utils
|
||||
|
||||
/**
|
||||
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
|
||||
@@ -30,9 +31,7 @@ query predicate problems(
|
||||
mayHaveStringValue(substring, target) and
|
||||
(
|
||||
// target contains a domain on a common TLD, and perhaps some other URL components
|
||||
target
|
||||
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + HostnameRegexp::getACommonTld() +
|
||||
"(:[0-9]+)?/?")
|
||||
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + getACommonTld() + "(:[0-9]+)?/?")
|
||||
or
|
||||
// target is a HTTP URL to a domain on any TLD
|
||||
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
|
||||
|
||||
@@ -5,5 +5,3 @@ import codeql.ruby.StringOps
|
||||
predicate mayHaveStringValue(DataFlow::Node node, string value) {
|
||||
node.asExpr().getConstantValue().getString() = value
|
||||
}
|
||||
|
||||
import codeql.ruby.security.regexp.HostnameRegexp as HostnameRegexp
|
||||
|
||||
@@ -11,7 +11,10 @@ private import RegexTreeView
|
||||
*/
|
||||
signature module HostnameRegexpSig<RegexTreeViewSig TreeImpl> {
|
||||
/** A node in the data-flow graph. */
|
||||
class DataFlowNode;
|
||||
class DataFlowNode {
|
||||
/** Gets a string representation of this node. */
|
||||
string toString();
|
||||
}
|
||||
|
||||
/** A node in the data-flow graph that represents a regular expression pattern. */
|
||||
class RegExpPatternSource extends DataFlowNode {
|
||||
@@ -28,12 +31,26 @@ signature module HostnameRegexpSig<RegexTreeViewSig TreeImpl> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility predicates and classes that doesn't depend on any signature.
|
||||
*/
|
||||
module Utils {
|
||||
/**
|
||||
* Gets a pattern that matches common top-level domain names in lower case.
|
||||
*/
|
||||
string getACommonTld() {
|
||||
// according to ranking by http://google.com/search?q=site:.<<TLD>>
|
||||
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Classes and predicates implementing an analysis on regular expressions
|
||||
* that match URLs and hostname patterns.
|
||||
*/
|
||||
module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
|
||||
private import TreeImpl
|
||||
import Utils
|
||||
|
||||
/**
|
||||
* Holds if the given constant is unlikely to occur in the origin part of a URL.
|
||||
@@ -213,30 +230,6 @@ module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `regexp` is a regular expression that is likely to match a hostname,
|
||||
* but the pattern is incomplete and may match more hosts than intended.
|
||||
*/
|
||||
predicate incompleteHostnameRegExp(
|
||||
RegExpSequence hostSequence, string message, Specific::DataFlowNode aux, string label
|
||||
) {
|
||||
exists(Specific::RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
|
||||
regexp = re.getRegExpTerm() and
|
||||
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
|
||||
(
|
||||
if re.getAParse() != re
|
||||
then (
|
||||
kind = "string, which is used as a regular expression $@," and
|
||||
aux = re.getAParse()
|
||||
) else (
|
||||
kind = "regular expression" and aux = re
|
||||
)
|
||||
)
|
||||
|
|
||||
message = "This " + kind + " " + msg and label = "here"
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `term` is one of the transitive left children of a regexp. */
|
||||
predicate isLeftArmTerm(RegExpTerm term) {
|
||||
term.isRootTerm()
|
||||
@@ -262,10 +255,26 @@ module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a pattern that matches common top-level domain names in lower case.
|
||||
* Holds if `regexp` is a regular expression that is likely to match a hostname,
|
||||
* but the pattern is incomplete and may match more hosts than intended.
|
||||
*/
|
||||
string getACommonTld() {
|
||||
// according to ranking by http://google.com/search?q=site:.<<TLD>>
|
||||
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
|
||||
predicate incompleteHostnameRegExp(
|
||||
RegExpSequence hostSequence, string message, Specific::DataFlowNode aux, string label
|
||||
) {
|
||||
exists(Specific::RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
|
||||
regexp = re.getRegExpTerm() and
|
||||
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
|
||||
(
|
||||
if re.getAParse() != re
|
||||
then (
|
||||
kind = "string, which is used as a regular expression $@," and
|
||||
aux = re.getAParse()
|
||||
) else (
|
||||
kind = "regular expression" and aux = re
|
||||
)
|
||||
)
|
||||
|
|
||||
message = "This " + kind + " " + msg and label = "here"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user