Ruby: parse some string literals as regex

In addition to regex literals, also parse normal string literals
as regular expressions if they somehow "flow" into a method call
that is known to interpret string values as regular expressions.
This commit is contained in:
Arthur Baars
2022-02-28 17:20:53 +01:00
parent 5ce6b847d1
commit 1240c11c4b
2 changed files with 89 additions and 6 deletions

View File

@@ -7,8 +7,32 @@
private import codeql.ruby.ast.Literal as AST
private import codeql.Locations
private import codeql.ruby.DataFlow
private import codeql.ruby.TaintTracking
private import codeql.ruby.typetracking.TypeTracker
private import codeql.ruby.ApiGraphs
/**
* A `StringlikeLiteral` containing a regular expression term, that is, either
* a regular expression literal, a string literal used in a context where
* it is parsed as regular expression.
*/
abstract class RegExp extends AST::StringlikeLiteral {
/**
* Holds if this `RegExp` has the `s` flag for multi-line matching.
*/
predicate isDotAll() { none() }
/**
* Holds if this `RegExp` has the `i` flag for case-insensitive matching.
*/
predicate isIgnoreCase() { none() }
/**
* Gets the flags for this `RegExp`, or the empty string if it has no flags.
*/
string getFlags() { result = "" }
class RegExp extends AST::RegExpLiteral {
/**
* Helper predicate for `charSetStart(int start, int end)`.
*
@@ -933,3 +957,63 @@ class RegExp extends AST::RegExpLiteral {
this.lastPart(start, end)
}
}
private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
override predicate isDotAll() { this.hasMultilineFlag() }
override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
override string getFlags() { result = this.getFlagString() }
}
private class ParsedStringRegExp extends RegExp {
private DataFlow::Node parse;
ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
DataFlow::Node getAParse() { result = parse }
override predicate isDotAll() { none() }
override predicate isIgnoreCase() { none() }
override string getFlags() { none() }
}
/**
* Holds if `source` may be interpreted as a regular expression.
*/
cached
private predicate isInterpretedAsRegExp(DataFlow::Node source) {
// The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
or
// The argument of a call that coerces the argument to a regular expression.
exists(DataFlow::CallNode mce |
mce.getMethodName() = ["match", "match?"] and
source = mce.getArgument(0)
)
}
/**
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
* as a part of a regular expression.
*/
private DataFlow::Node regExpSource(DataFlow::Node re, TypeBackTracker t) {
t.start() and
re = result and
isInterpretedAsRegExp(result)
or
exists(TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
t2 = t.smallstep(result, succ)
or
TaintTracking::localTaintStep(result, succ) and
t = t2
)
}
/**
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
* as a part of a regular expression.
*/
DataFlow::Node regExpSource(DataFlow::Node re) { result = regExpSource(re, TypeBackTracker::end()) }

View File

@@ -1,7 +1,6 @@
private import codeql.ruby.ast.Literal as AST
private import codeql.Locations
private import ParseRegExp
import codeql.Locations
/**
* Holds if `term` is an ecape class representing e.g. `\d`.
@@ -27,7 +26,7 @@ predicate isEscapeClass(RegExpTerm term, string clazz) {
* Holds if the regular expression should not be considered.
*/
predicate isExcluded(RegExpParent parent) {
parent.(RegExpTerm).getRegExp().hasFreeSpacingFlag() // exclude free-spacing mode regexes
parent.(RegExpTerm).getRegExp().(AST::RegExpLiteral).hasFreeSpacingFlag() // exclude free-spacing mode regexes
}
/**
@@ -93,11 +92,11 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {
override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() }
predicate isDotAll() { re.hasMultilineFlag() }
predicate isDotAll() { re.isDotAll() }
predicate isIgnoreCase() { re.hasCaseInsensitiveFlag() }
predicate isIgnoreCase() { re.isIgnoreCase() }
string getFlags() { result = re.getFlagString() }
string getFlags() { result = re.getFlags() }
override string getAPrimaryQlClass() { result = "RegExpLiteral" }
}