mirror of
https://github.com/github/codeql.git
synced 2026-05-01 03:35:13 +02:00
Merge pull request #8293 from aibaars/regex-pattern-source
Ruby: parse more string literals as regular expressions
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* The `Regex` class is now an abstract class that extends `StringlikeLiteral` with implementations for `RegExpLiteral` and string literals that 'flow' into functions that are known to interpret string arguments as regular expressions such as `Regex.new` and `String.match`.
|
||||
@@ -7,8 +7,32 @@
|
||||
|
||||
private import codeql.ruby.ast.Literal as AST
|
||||
private import codeql.Locations
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.TaintTracking
|
||||
private import codeql.ruby.typetracking.TypeTracker
|
||||
private import codeql.ruby.ApiGraphs
|
||||
|
||||
/**
|
||||
* A `StringlikeLiteral` containing a regular expression term, that is, either
|
||||
* a regular expression literal, or a string literal used in a context where
|
||||
* it is parsed as regular expression.
|
||||
*/
|
||||
abstract class RegExp extends AST::StringlikeLiteral {
|
||||
/**
|
||||
* Holds if this `RegExp` has the `s` flag for multi-line matching.
|
||||
*/
|
||||
predicate isDotAll() { none() }
|
||||
|
||||
/**
|
||||
* Holds if this `RegExp` has the `i` flag for case-insensitive matching.
|
||||
*/
|
||||
predicate isIgnoreCase() { none() }
|
||||
|
||||
/**
|
||||
* Gets the flags for this `RegExp`, or the empty string if it has no flags.
|
||||
*/
|
||||
string getFlags() { result = "" }
|
||||
|
||||
class RegExp extends AST::RegExpLiteral {
|
||||
/**
|
||||
* Helper predicate for `charSetStart(int start, int end)`.
|
||||
*
|
||||
@@ -933,3 +957,63 @@ class RegExp extends AST::RegExpLiteral {
|
||||
this.lastPart(start, end)
|
||||
}
|
||||
}
|
||||
|
||||
private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
|
||||
override predicate isDotAll() { this.hasMultilineFlag() }
|
||||
|
||||
override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
|
||||
|
||||
override string getFlags() { result = this.getFlagString() }
|
||||
}
|
||||
|
||||
private class ParsedStringRegExp extends RegExp {
|
||||
private DataFlow::Node parse;
|
||||
|
||||
ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
|
||||
|
||||
DataFlow::Node getAParse() { result = parse }
|
||||
|
||||
override predicate isDotAll() { none() }
|
||||
|
||||
override predicate isIgnoreCase() { none() }
|
||||
|
||||
override string getFlags() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `source` may be interpreted as a regular expression.
|
||||
*/
|
||||
cached
|
||||
private predicate isInterpretedAsRegExp(DataFlow::Node source) {
|
||||
// The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
|
||||
source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
|
||||
or
|
||||
// The argument of a call that coerces the argument to a regular expression.
|
||||
exists(DataFlow::CallNode mce |
|
||||
mce.getMethodName() = ["match", "match?"] and
|
||||
source = mce.getArgument(0)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
private DataFlow::Node regExpSource(DataFlow::Node re, TypeBackTracker t) {
|
||||
t.start() and
|
||||
re = result and
|
||||
isInterpretedAsRegExp(result)
|
||||
or
|
||||
exists(TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
|
||||
t2 = t.smallstep(result, succ)
|
||||
or
|
||||
TaintTracking::localTaintStep(result, succ) and
|
||||
t = t2
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
DataFlow::Node regExpSource(DataFlow::Node re) { result = regExpSource(re, TypeBackTracker::end()) }
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
private import codeql.ruby.ast.Literal as AST
|
||||
private import codeql.Locations
|
||||
private import ParseRegExp
|
||||
import codeql.Locations
|
||||
private import codeql.ruby.DataFlow
|
||||
|
||||
/**
|
||||
* Holds if `term` is an ecape class representing e.g. `\d`.
|
||||
@@ -27,7 +27,7 @@ predicate isEscapeClass(RegExpTerm term, string clazz) {
|
||||
* Holds if the regular expression should not be considered.
|
||||
*/
|
||||
predicate isExcluded(RegExpParent parent) {
|
||||
parent.(RegExpTerm).getRegExp().hasFreeSpacingFlag() // exclude free-spacing mode regexes
|
||||
parent.(RegExpTerm).getRegExp().(AST::RegExpLiteral).hasFreeSpacingFlag() // exclude free-spacing mode regexes
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -93,11 +93,11 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {
|
||||
|
||||
override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() }
|
||||
|
||||
predicate isDotAll() { re.hasMultilineFlag() }
|
||||
predicate isDotAll() { re.isDotAll() }
|
||||
|
||||
predicate isIgnoreCase() { re.hasCaseInsensitiveFlag() }
|
||||
predicate isIgnoreCase() { re.isIgnoreCase() }
|
||||
|
||||
string getFlags() { result = re.getFlagString() }
|
||||
string getFlags() { result = re.getFlags() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpLiteral" }
|
||||
}
|
||||
@@ -795,3 +795,47 @@ class RegExpNamedCharacterProperty extends RegExpTerm, TRegExpNamedCharacterProp
|
||||
RegExpTerm getParsedRegExp(AST::RegExpLiteral re) {
|
||||
result.getRegExp() = re and result.isRootTerm()
|
||||
}
|
||||
|
||||
/**
|
||||
* A node whose value may flow to a position where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
abstract class RegExpPatternSource extends DataFlow::Node {
|
||||
/**
|
||||
* Gets a node where the pattern of this node is parsed as a part of
|
||||
* a regular expression.
|
||||
*/
|
||||
abstract DataFlow::Node getAParse();
|
||||
|
||||
/**
|
||||
* Gets the root term of the regular expression parsed from this pattern.
|
||||
*/
|
||||
abstract RegExpTerm getRegExpTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression literal, viewed as the pattern source for itself.
|
||||
*/
|
||||
private class RegExpLiteralPatternSource extends RegExpPatternSource {
|
||||
private AST::RegExpLiteral astNode;
|
||||
|
||||
RegExpLiteralPatternSource() { astNode = this.asExpr().getExpr() }
|
||||
|
||||
override DataFlow::Node getAParse() { result = this }
|
||||
|
||||
override RegExpTerm getRegExpTerm() { result = astNode.getParsed() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node whose string value may flow to a position where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
private class StringRegExpPatternSource extends RegExpPatternSource {
|
||||
private DataFlow::Node parse;
|
||||
|
||||
StringRegExpPatternSource() { this = regExpSource(parse) }
|
||||
|
||||
override DataFlow::Node getAParse() { result = parse }
|
||||
|
||||
override RegExpTerm getRegExpTerm() { result.getRegExp() = this.asExpr().getExpr() }
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
| tst.rb:74:10:74:17 | (b\|a?b)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| tst.rb:77:10:77:17 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.rb:83:10:83:16 | (.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
|
||||
| tst.rb:89:21:89:28 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.rb:95:11:95:24 | ([\\S\\s]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '`'. |
|
||||
| tst.rb:101:11:101:19 | (.\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '`'. |
|
||||
| tst.rb:107:11:107:19 | (b\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
|
||||
@@ -85,7 +85,7 @@ bad16 = /(.|\n)*!/m
|
||||
# GOOD
|
||||
good8 = /([\w.]+)*/
|
||||
|
||||
# BAD - we don't yet parse regexps constructed from strings
|
||||
# NOT GOOD
|
||||
bad17 = Regexp.new '(a|aa?)*b'
|
||||
|
||||
# GOOD - not used as regexp
|
||||
|
||||
Reference in New Issue
Block a user