move getACommonTld to the shared pack

This commit is contained in:
erik-krogh
2022-12-15 14:53:46 +01:00
parent f67d0bc8c0
commit 355499ea52
11 changed files with 24 additions and 21 deletions

View File

@@ -999,11 +999,12 @@ predicate isInterpretedAsRegExp(DataFlow::Node source) {
/**
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
deprecated module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
* DEPRECATED: use the similarly named predicate from `HostnameRegex` from the `regex` pack instead.
*/
string getACommonTld() {
deprecated string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}

View File

@@ -12,8 +12,6 @@ private module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
class DataFlowNode = JS::DataFlow::Node;
class RegExpPatternSource = RegExp::RegExpPatternSource;
string getACommonTld() { result = RegExp::RegExpPatterns::getACommonTld() }
}
import Shared::Make<TreeImpl, Impl>

View File

@@ -31,7 +31,7 @@ query predicate problems(
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::getACommonTld() +
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + HostnameRegexp::getACommonTld() +
"(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD

View File

@@ -3,3 +3,5 @@ import semmle.javascript.dataflow.InferredTypes
/** Holds if `node` may evaluate to `value` */
predicate mayHaveStringValue(DataFlow::Node node, string value) { node.mayHaveStringValue(value) }
import semmle.javascript.security.regexp.HostnameRegexp as HostnameRegexp

View File

@@ -9,11 +9,12 @@ private import semmle.python.dataflow.new.DataFlow
/**
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
deprecated module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
* DEPRECATED: use the similarly named predicate from `HostnameRegex` from the `regex` pack instead.
*/
string getACommonTld() {
deprecated string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}

View File

@@ -13,8 +13,6 @@ private module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
class DataFlowNode = DataFlow::Node;
class RegExpPatternSource = Regexp::RegExpPatternSource;
string getACommonTld() { result = Regexp::RegExpPatterns::getACommonTld() }
}
import Shared::Make<TreeImpl, Impl>

View File

@@ -15,11 +15,12 @@ private import codeql.ruby.ApiGraphs
/**
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
deprecated module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
* DEPRECATED: use the similarly named predicate from `HostnameRegex` from the `regex` pack instead.
*/
string getACommonTld() {
deprecated string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}

View File

@@ -12,8 +12,6 @@ private module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
class DataFlowNode = DataFlow::Node;
class RegExpPatternSource = Regexp::RegExpPatternSource;
string getACommonTld() { result = Regexp::RegExpPatterns::getACommonTld() }
}
import Shared::Make<TreeImpl, Impl>

View File

@@ -31,7 +31,7 @@ query predicate problems(
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::getACommonTld() +
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + HostnameRegexp::getACommonTld() +
"(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD

View File

@@ -1,8 +1,9 @@
import codeql.ruby.DataFlow
import codeql.ruby.StringOps
import codeql.ruby.Regexp::RegExpPatterns as RegExpPatterns
/** Holds if `node` may evaluate to `value` */
predicate mayHaveStringValue(DataFlow::Node node, string value) {
node.asExpr().getConstantValue().getString() = value
}
import codeql.ruby.security.regexp.HostnameRegexp as HostnameRegexp

View File

@@ -10,11 +10,6 @@ private import RegexTreeView
* analysis on regular expressions matching hostnames.
*/
signature module HostnameRegexpSig<RegexTreeViewSig TreeImpl> {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string getACommonTld();
/** A node in the data-flow graph. */
class DataFlowNode;
@@ -97,7 +92,7 @@ module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
seq.getChild(i)
.(RegExpConstant)
.getValue()
.regexpMatch("(?i)" + Specific::getACommonTld() + "(:\\d+)?([/?#].*)?") and
.regexpMatch("(?i)" + getACommonTld() + "(:\\d+)?([/?#].*)?") and
isDotLike(seq.getChild(i - 1)) and
not (i = 1 and matchesBeginningOfString(seq))
}
@@ -265,4 +260,12 @@ module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
private RegExpTerm getLastChild(RegExpTerm parent) {
result = max(RegExpTerm child, int i | child = parent.getChild(i) | child order by i)
}
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
}