mirror of
https://github.com/github/codeql.git
synced 2026-05-03 04:39:29 +02:00
Ruby: parse some string literals as regex
In addition to regex literals, also parse normal string literals as regular expressions if they somehow "flow" into a method call that is known to interpret string values as regular expressions.
This commit is contained in:
@@ -7,8 +7,32 @@
|
||||
|
||||
private import codeql.ruby.ast.Literal as AST
|
||||
private import codeql.Locations
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.TaintTracking
|
||||
private import codeql.ruby.typetracking.TypeTracker
|
||||
private import codeql.ruby.ApiGraphs
|
||||
|
||||
/**
|
||||
* A `StringlikeLiteral` containing a regular expression term, that is, either
|
||||
* a regular expression literal, a string literal used in a context where
|
||||
* it is parsed as regular expression.
|
||||
*/
|
||||
abstract class RegExp extends AST::StringlikeLiteral {
|
||||
/**
|
||||
* Holds if this `RegExp` has the `s` flag for multi-line matching.
|
||||
*/
|
||||
predicate isDotAll() { none() }
|
||||
|
||||
/**
|
||||
* Holds if this `RegExp` has the `i` flag for case-insensitive matching.
|
||||
*/
|
||||
predicate isIgnoreCase() { none() }
|
||||
|
||||
/**
|
||||
* Gets the flags for this `RegExp`, or the empty string if it has no flags.
|
||||
*/
|
||||
string getFlags() { result = "" }
|
||||
|
||||
class RegExp extends AST::RegExpLiteral {
|
||||
/**
|
||||
* Helper predicate for `charSetStart(int start, int end)`.
|
||||
*
|
||||
@@ -933,3 +957,63 @@ class RegExp extends AST::RegExpLiteral {
|
||||
this.lastPart(start, end)
|
||||
}
|
||||
}
|
||||
|
||||
private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
|
||||
override predicate isDotAll() { this.hasMultilineFlag() }
|
||||
|
||||
override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
|
||||
|
||||
override string getFlags() { result = this.getFlagString() }
|
||||
}
|
||||
|
||||
private class ParsedStringRegExp extends RegExp {
|
||||
private DataFlow::Node parse;
|
||||
|
||||
ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
|
||||
|
||||
DataFlow::Node getAParse() { result = parse }
|
||||
|
||||
override predicate isDotAll() { none() }
|
||||
|
||||
override predicate isIgnoreCase() { none() }
|
||||
|
||||
override string getFlags() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `source` may be interpreted as a regular expression.
|
||||
*/
|
||||
cached
|
||||
private predicate isInterpretedAsRegExp(DataFlow::Node source) {
|
||||
// The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
|
||||
source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
|
||||
or
|
||||
// The argument of a call that coerces the argument to a regular expression.
|
||||
exists(DataFlow::CallNode mce |
|
||||
mce.getMethodName() = ["match", "match?"] and
|
||||
source = mce.getArgument(0)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
private DataFlow::Node regExpSource(DataFlow::Node re, TypeBackTracker t) {
|
||||
t.start() and
|
||||
re = result and
|
||||
isInterpretedAsRegExp(result)
|
||||
or
|
||||
exists(TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
|
||||
t2 = t.smallstep(result, succ)
|
||||
or
|
||||
TaintTracking::localTaintStep(result, succ) and
|
||||
t = t2
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
DataFlow::Node regExpSource(DataFlow::Node re) { result = regExpSource(re, TypeBackTracker::end()) }
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
private import codeql.ruby.ast.Literal as AST
|
||||
private import codeql.Locations
|
||||
private import ParseRegExp
|
||||
import codeql.Locations
|
||||
|
||||
/**
|
||||
* Holds if `term` is an ecape class representing e.g. `\d`.
|
||||
@@ -27,7 +26,7 @@ predicate isEscapeClass(RegExpTerm term, string clazz) {
|
||||
* Holds if the regular expression should not be considered.
|
||||
*/
|
||||
predicate isExcluded(RegExpParent parent) {
|
||||
parent.(RegExpTerm).getRegExp().hasFreeSpacingFlag() // exclude free-spacing mode regexes
|
||||
parent.(RegExpTerm).getRegExp().(AST::RegExpLiteral).hasFreeSpacingFlag() // exclude free-spacing mode regexes
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -93,11 +92,11 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {
|
||||
|
||||
override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() }
|
||||
|
||||
predicate isDotAll() { re.hasMultilineFlag() }
|
||||
predicate isDotAll() { re.isDotAll() }
|
||||
|
||||
predicate isIgnoreCase() { re.hasCaseInsensitiveFlag() }
|
||||
predicate isIgnoreCase() { re.isIgnoreCase() }
|
||||
|
||||
string getFlags() { result = re.getFlagString() }
|
||||
string getFlags() { result = re.getFlags() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpLiteral" }
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user