mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
Ruby: improve tracking of regular expressions
There are two flavours of `match?`. If the receiver of `match?` has type String then the argument to `match?` is a regular expression. However, if the receiver of `match?` has type Regexp then the argument is the text. The role of receiver and argument flips depending on the type of the receiver, this caused a lot of false positives when looking for string-like literals that are used as a regular expression. This commit attempts to improve things by trying to determine whether the type of the receiver is known to be of type Regexp. In such cases we know that the argument is unlikely to be regular expression.
This commit is contained in:
@@ -114,7 +114,7 @@ class StdLibRegExpInterpretation extends RegExpInterpretation::Range {
|
||||
mce.getMethodName() = ["match", "match?"] and
|
||||
this = mce.getArgument(0) and
|
||||
// exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
|
||||
not mce.getReceiver().asExpr().getExpr() instanceof Ast::RegExpLiteral
|
||||
not mce.getReceiver() = trackRegexpType()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@ private import codeql.ruby.ast.Literal as Ast
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.controlflow.CfgNodes
|
||||
private import codeql.ruby.dataflow.internal.tainttrackingforlibraries.TaintTrackingImpl
|
||||
private import codeql.ruby.typetracking.TypeTracker
|
||||
private import codeql.ruby.ApiGraphs
|
||||
|
||||
class RegExpConfiguration extends Configuration {
|
||||
RegExpConfiguration() { this = "RegExpConfiguration" }
|
||||
@@ -19,12 +21,26 @@ class RegExpConfiguration extends Configuration {
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof RegExpInterpretation::Range }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
// stop flow if `node` is receiver of
|
||||
// https://ruby-doc.org/core-2.4.0/String.html#method-i-match
|
||||
exists(DataFlow::CallNode mce |
|
||||
mce.getMethodName() = ["match", "match?"] and
|
||||
exists(DataFlow::CallNode mce | mce.getMethodName() = ["match", "match?"] |
|
||||
// receiver of https://ruby-doc.org/core-2.4.0/String.html#method-i-match
|
||||
node = mce.getReceiver() and
|
||||
mce.getArgument(0).asExpr().getExpr() instanceof Ast::RegExpLiteral
|
||||
mce.getArgument(0) = trackRegexpType()
|
||||
or
|
||||
// first argument of https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
|
||||
node = mce.getArgument(0) and
|
||||
mce.getReceiver() = trackRegexpType()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private DataFlow::LocalSourceNode trackRegexpType(TypeTracker t) {
|
||||
t.start() and
|
||||
(
|
||||
result.asExpr().getExpr() instanceof Ast::RegExpLiteral or
|
||||
result = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"])
|
||||
)
|
||||
or
|
||||
exists(TypeTracker t2 | result = trackRegexpType(t2).track(t2, t))
|
||||
}
|
||||
|
||||
DataFlow::Node trackRegexpType() { trackRegexpType(TypeTracker::end()).flowsTo(result) }
|
||||
|
||||
@@ -16,3 +16,5 @@
|
||||
| missing_regexp_anchor.rb:50:1:50:30 | /^good\\\\\\\\.com\|better\\\\\\\\.com/ | Misleading operator precedence. The subexpression '^good\\\\\\\\.com' is anchored at the beginning, but the other parts of this regular expression are not |
|
||||
| missing_regexp_anchor.rb:52:1:52:15 | /^foo\|bar\|baz$/ | Misleading operator precedence. The subexpression '^foo' is anchored at the beginning, but the other parts of this regular expression are not |
|
||||
| missing_regexp_anchor.rb:52:1:52:15 | /^foo\|bar\|baz$/ | Misleading operator precedence. The subexpression 'baz$' is anchored at the end, but the other parts of this regular expression are not |
|
||||
| missing_regexp_anchor.rb:60:20:60:39 | "http://example.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
| missing_regexp_anchor.rb:61:19:61:38 | "http://example.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
|
||||
@@ -50,4 +50,12 @@ foo.sub!(/www\.example\.com/, "bar") # GOOD
|
||||
/^good\\\\.com|better\\\\.com/ # BAD
|
||||
|
||||
/^foo|bar|baz$/ # BAD
|
||||
/^foo|%/ # OK
|
||||
/^foo|%/ # OK
|
||||
|
||||
REGEXP = /foo/
|
||||
REGEXP.match? "http://example.com" # GOOD: the url is the text not the regexp
|
||||
REGEXP.match "http://example.com" # GOOD: the url is the text not the regexp
|
||||
"http://example.com".match? REGEXP # GOOD: the url is the text not the regexp
|
||||
"http://example.com".match REGEXP # GOOD: the url is the text not the regexp
|
||||
"some text".match? "http://example.com" # BAD
|
||||
"some text".match "http://example.com" # BAD
|
||||
|
||||
Reference in New Issue
Block a user