mirror of
https://github.com/github/codeql.git
synced 2026-04-25 08:45:14 +02:00
add a RegexExecution, and use it to track regular expressions to their uses in a nice way in rb/polynomial-redos
This commit is contained in:
@@ -10,6 +10,7 @@ private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.Frameworks
|
||||
private import codeql.ruby.dataflow.RemoteFlowSources
|
||||
private import codeql.ruby.ApiGraphs
|
||||
private import codeql.ruby.Regexp as RE
|
||||
|
||||
/**
|
||||
* A data-flow node that constructs a SQL statement.
|
||||
@@ -77,6 +78,55 @@ module SqlExecution {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegexExecution::Range` instead.
|
||||
*/
|
||||
class RegexExecution extends DataFlow::Node instanceof RegexExecution::Range {
|
||||
/** Gets the data flow node for the regex being executed by this node. */
|
||||
DataFlow::Node getRegex() { result = super.getRegex() }
|
||||
|
||||
/** Gets a dataflow node for the string to be searched or matched against. */
|
||||
DataFlow::Node getString() { result = super.getString() }
|
||||
|
||||
/** Gets a parsed regular expression term that is executed at this node. */
|
||||
RE::RegExpTerm getTerm() { result = super.getTerm() }
|
||||
|
||||
/**
|
||||
* Gets the name of this regex execution, typically the name of an executing method.
|
||||
* This is used for nice alert messages and should include the module if possible.
|
||||
*/
|
||||
string getName() { result = super.getName() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling new regular-expression execution APIs. */
|
||||
module RegexExecution {
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RegexExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the data flow node for the regex being executed by this node. */
|
||||
abstract DataFlow::Node getRegex();
|
||||
|
||||
/** Gets a dataflow node for the string to be searched or matched against. */
|
||||
abstract DataFlow::Node getString();
|
||||
|
||||
/** Gets the parsed regular expression term that is executed by this node. */
|
||||
abstract RE::RegExpTerm getTerm();
|
||||
|
||||
/**
|
||||
* Gets the name of this regex execution, typically the name of an executing method.
|
||||
* This is used for nice alert messages and should include the module if possible.
|
||||
*/
|
||||
abstract string getName();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node that performs a file system access, including reading and writing data,
|
||||
* creating and deleting files and folders, checking and updating permissions, and so on.
|
||||
|
||||
@@ -8,9 +8,11 @@
|
||||
import regexp.RegExpTreeView // re-export
|
||||
private import regexp.internal.ParseRegExp
|
||||
private import regexp.internal.RegExpConfiguration
|
||||
private import codeql.ruby.ast.Literal as Ast
|
||||
private import codeql.ruby.AST as Ast
|
||||
private import codeql.ruby.CFG
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.ApiGraphs
|
||||
private import codeql.ruby.Concepts
|
||||
|
||||
/**
|
||||
* Provides utility predicates related to regular expressions.
|
||||
@@ -63,7 +65,11 @@ private class RegExpLiteralPatternSource extends RegExpPatternSource {
|
||||
private class StringRegExpPatternSource extends RegExpPatternSource {
|
||||
private DataFlow::Node parse;
|
||||
|
||||
StringRegExpPatternSource() { this = regExpSource(parse) }
|
||||
StringRegExpPatternSource() {
|
||||
this = regExpSource(parse) and
|
||||
// `regExpSource()` tracks both strings and regex literals, narrow it down to strings.
|
||||
this.asExpr().getConstantValue().isString(_)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAParse() { result = parse }
|
||||
|
||||
@@ -104,6 +110,7 @@ module RegExpInterpretation {
|
||||
|
||||
/**
|
||||
* A node interpreted as a regular expression.
|
||||
* Speficically nodes where string values are interpreted as regular expressions.
|
||||
*/
|
||||
class StdLibRegExpInterpretation extends RegExpInterpretation::Range {
|
||||
StdLibRegExpInterpretation() {
|
||||
@@ -128,3 +135,87 @@ cached
|
||||
DataFlow::Node regExpSource(DataFlow::Node re) {
|
||||
exists(RegExpConfiguration c | c.hasFlow(result, re))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `exec` is a node where `regexp` is interpreted as a regular expression and
|
||||
* tested against the string value of `input`.
|
||||
* `name` describes the regexp execution, typically the name of the method being called.
|
||||
*/
|
||||
private predicate regexExecution(
|
||||
DataFlow::Node exec, DataFlow::Node input, DataFlow::Node regexp, string name
|
||||
) {
|
||||
// `=~` or `!~`
|
||||
exists(CfgNodes::ExprNodes::BinaryOperationCfgNode op |
|
||||
name = op.getOperator() and
|
||||
exec.asExpr() = op and
|
||||
(
|
||||
op.getExpr() instanceof Ast::RegExpMatchExpr or
|
||||
op.getExpr() instanceof Ast::NoRegExpMatchExpr
|
||||
) and
|
||||
(
|
||||
input.asExpr() = op.getLeftOperand() and regexp.asExpr() = op.getRightOperand()
|
||||
or
|
||||
input.asExpr() = op.getRightOperand() and regexp.asExpr() = op.getLeftOperand()
|
||||
)
|
||||
)
|
||||
or
|
||||
// Any of the methods on `String` that take a regexp.
|
||||
exists(DataFlow::CallNode call | exec = call |
|
||||
name = "String#" + call.getMethodName() and
|
||||
call.getMethodName() =
|
||||
[
|
||||
"[]", "gsub", "gsub!", "index", "match", "match?", "partition", "rindex", "rpartition",
|
||||
"scan", "slice!", "split", "sub", "sub!"
|
||||
] and
|
||||
input = call.getReceiver() and
|
||||
regexp = call.getArgument(0) and
|
||||
// exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match, they are handled on the next case of this disjunction
|
||||
// also see `StdLibRegExpInterpretation`
|
||||
not (
|
||||
call.getMethodName() = ["match", "match?"] and
|
||||
call.getReceiver() = trackRegexpType()
|
||||
)
|
||||
)
|
||||
or
|
||||
// A call to `match` or `match?` where the regexp is the receiver.
|
||||
exists(DataFlow::CallNode call | exec = call |
|
||||
name = "Regexp#" + call.getMethodName() and
|
||||
call.getMethodName() = ["match", "match?"] and
|
||||
regexp = call.getReceiver() and
|
||||
input = call.getArgument(0)
|
||||
)
|
||||
or
|
||||
// a case-when statement
|
||||
exists(CfgNodes::ExprNodes::CaseExprCfgNode caseWhen |
|
||||
name = "case-when" and
|
||||
exec.asExpr() = caseWhen and
|
||||
input.asExpr() = caseWhen.getValue()
|
||||
|
|
||||
regexp.asExpr() = caseWhen.getBranch(_).(CfgNodes::ExprNodes::WhenClauseCfgNode).getPattern(_)
|
||||
or
|
||||
regexp.asExpr() = caseWhen.getBranch(_).(CfgNodes::ExprNodes::InClauseCfgNode).getPattern()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* An execution of a regular expression by the standard library.
|
||||
*/
|
||||
private class StdRegexpExecution extends RegexExecution::Range {
|
||||
DataFlow::Node regexp;
|
||||
DataFlow::Node input;
|
||||
string name;
|
||||
|
||||
StdRegexpExecution() { regexExecution(this, input, regexp, name) }
|
||||
|
||||
override DataFlow::Node getRegex() { result = regexp }
|
||||
|
||||
override DataFlow::Node getString() { result = input }
|
||||
|
||||
override RegExpTerm getTerm() { result = getTermForNode(regexp) }
|
||||
|
||||
override string getName() { result = name }
|
||||
}
|
||||
|
||||
private RegExpTerm getTermForNode(DataFlow::Node node) {
|
||||
exists(RegExpPatternSource source | source = regExpSource(node) | result = source.getRegExpTerm())
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ private import codeql.ruby.controlflow.CfgNodes
|
||||
private import codeql.ruby.dataflow.internal.DataFlowImplForRegExp
|
||||
private import codeql.ruby.typetracking.TypeTracker
|
||||
private import codeql.ruby.ApiGraphs
|
||||
private import codeql.ruby.Concepts
|
||||
private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
|
||||
private import codeql.ruby.TaintTracking
|
||||
private import codeql.ruby.frameworks.core.String
|
||||
@@ -13,18 +14,30 @@ private import codeql.ruby.frameworks.core.String
|
||||
class RegExpConfiguration extends Configuration {
|
||||
RegExpConfiguration() { this = "RegExpConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
|
||||
// track both string literals and regexp literals - the latter for finding executions of regular expressions that are used elsewhere.
|
||||
state = "string" and
|
||||
source.asExpr() =
|
||||
any(ExprCfgNode e |
|
||||
e.getConstantValue().isString(_) and
|
||||
not e instanceof ExprNodes::VariableReadAccessCfgNode and
|
||||
not e instanceof ExprNodes::ConstantReadAccessCfgNode
|
||||
)
|
||||
or
|
||||
state = "reg" and
|
||||
source.asExpr().getExpr() instanceof Ast::RegExpLiteral
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof RegExpInterpretation::Range }
|
||||
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
|
||||
state = "string" and
|
||||
sink instanceof RegExpInterpretation::Range
|
||||
or
|
||||
state = "reg" and
|
||||
sink = any(RegexExecution exec).getRegex()
|
||||
}
|
||||
|
||||
override predicate isBarrier(DataFlow::Node node) {
|
||||
override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
|
||||
state = "string" and
|
||||
exists(DataFlow::CallNode mce | mce.getMethodName() = ["match", "match?"] |
|
||||
// receiver of https://ruby-doc.org/core-2.4.0/String.html#method-i-match
|
||||
node = mce.getReceiver() and
|
||||
@@ -36,22 +49,29 @@ class RegExpConfiguration extends Configuration {
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
// include taint flow through `String` summaries
|
||||
TaintTracking::localTaintStep(nodeFrom, nodeTo) and
|
||||
nodeFrom.(DataFlowPrivate::SummaryNode).getSummarizedCallable() instanceof
|
||||
String::SummarizedCallable
|
||||
or
|
||||
// string concatenations, and
|
||||
exists(CfgNodes::ExprNodes::OperationCfgNode op |
|
||||
op = nodeTo.asExpr() and
|
||||
op.getAnOperand() = nodeFrom.asExpr() and
|
||||
op.getExpr().(Ast::BinaryOperation).getOperator() = "+"
|
||||
override predicate isAdditionalFlowStep(
|
||||
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
|
||||
DataFlow::FlowState stateTo
|
||||
) {
|
||||
stateFrom = stateTo and
|
||||
stateFrom = "string" and
|
||||
(
|
||||
// include taint flow through `String` summaries
|
||||
TaintTracking::localTaintStep(nodeFrom, nodeTo) and
|
||||
nodeFrom.(DataFlowPrivate::SummaryNode).getSummarizedCallable() instanceof
|
||||
String::SummarizedCallable
|
||||
or
|
||||
// string concatenations, and
|
||||
exists(CfgNodes::ExprNodes::OperationCfgNode op |
|
||||
op = nodeTo.asExpr() and
|
||||
op.getAnOperand() = nodeFrom.asExpr() and
|
||||
op.getExpr().(Ast::BinaryOperation).getOperator() = "+"
|
||||
)
|
||||
or
|
||||
// string interpolations
|
||||
nodeFrom.asExpr() =
|
||||
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
|
||||
)
|
||||
or
|
||||
// string interpolations
|
||||
nodeFrom.asExpr() =
|
||||
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,8 @@ private import codeql.ruby.CFG
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.dataflow.RemoteFlowSources
|
||||
private import codeql.ruby.regexp.RegExpTreeView::RegexTreeView as TreeView
|
||||
private import codeql.ruby.Regexp as RE
|
||||
private import codeql.ruby.Concepts
|
||||
|
||||
/**
|
||||
* Provides default sources, sinks and sanitizers for reasoning about
|
||||
@@ -53,80 +55,22 @@ module PolynomialReDoS {
|
||||
*/
|
||||
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
|
||||
|
||||
/**
|
||||
* Gets the AST of a regular expression object that can flow to `node`.
|
||||
*/
|
||||
RegExpTerm getRegExpObjectFromNode(DataFlow::Node node) {
|
||||
exists(DataFlow::LocalSourceNode regexp |
|
||||
regexp.flowsTo(node) and
|
||||
result = regexp.asExpr().(CfgNodes::ExprNodes::RegExpLiteralCfgNode).getExpr().getParsed()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A regexp match against a superlinear backtracking term, seen as a sink for
|
||||
* polynomial regular expression denial-of-service vulnerabilities.
|
||||
*/
|
||||
class PolynomialBackTrackingTermMatch extends Sink {
|
||||
PolynomialBackTrackingTerm term;
|
||||
DataFlow::ExprNode matchNode;
|
||||
RegexExecution exec;
|
||||
|
||||
PolynomialBackTrackingTermMatch() {
|
||||
exists(DataFlow::Node regexp |
|
||||
term.getRootTerm() = getRegExpObjectFromNode(regexp) and
|
||||
(
|
||||
// `=~` or `!~`
|
||||
exists(CfgNodes::ExprNodes::BinaryOperationCfgNode op |
|
||||
matchNode.asExpr() = op and
|
||||
(
|
||||
op.getExpr() instanceof Ast::RegExpMatchExpr or
|
||||
op.getExpr() instanceof Ast::NoRegExpMatchExpr
|
||||
) and
|
||||
(
|
||||
this.asExpr() = op.getLeftOperand() and regexp.asExpr() = op.getRightOperand()
|
||||
or
|
||||
this.asExpr() = op.getRightOperand() and regexp.asExpr() = op.getLeftOperand()
|
||||
)
|
||||
)
|
||||
or
|
||||
// Any of the methods on `String` that take a regexp.
|
||||
exists(CfgNodes::ExprNodes::MethodCallCfgNode call |
|
||||
matchNode.asExpr() = call and
|
||||
call.getExpr().getMethodName() =
|
||||
[
|
||||
"[]", "gsub", "gsub!", "index", "match", "match?", "partition", "rindex",
|
||||
"rpartition", "scan", "slice!", "split", "sub", "sub!"
|
||||
] and
|
||||
this.asExpr() = call.getReceiver() and
|
||||
regexp.asExpr() = call.getArgument(0)
|
||||
)
|
||||
or
|
||||
// A call to `match` or `match?` where the regexp is the receiver.
|
||||
exists(CfgNodes::ExprNodes::MethodCallCfgNode call |
|
||||
matchNode.asExpr() = call and
|
||||
call.getExpr().getMethodName() = ["match", "match?"] and
|
||||
regexp.asExpr() = call.getReceiver() and
|
||||
this.asExpr() = call.getArgument(0)
|
||||
)
|
||||
or
|
||||
// a case-when statement
|
||||
exists(CfgNodes::ExprNodes::CaseExprCfgNode caseWhen |
|
||||
matchNode.asExpr() = caseWhen and
|
||||
this.asExpr() = caseWhen.getValue()
|
||||
|
|
||||
regexp.asExpr() =
|
||||
caseWhen.getBranch(_).(CfgNodes::ExprNodes::WhenClauseCfgNode).getPattern(_)
|
||||
or
|
||||
regexp.asExpr() =
|
||||
caseWhen.getBranch(_).(CfgNodes::ExprNodes::InClauseCfgNode).getPattern()
|
||||
)
|
||||
)
|
||||
)
|
||||
term.getRootTerm() = exec.getTerm() and
|
||||
this = exec.getString()
|
||||
}
|
||||
|
||||
override RegExpTerm getRegExp() { result = term }
|
||||
|
||||
override DataFlow::Node getHighlight() { result = matchNode }
|
||||
override DataFlow::Node getHighlight() { result = exec }
|
||||
}
|
||||
|
||||
private predicate lengthGuard(CfgNodes::AstCfgNode g, CfgNode node, boolean branch) {
|
||||
|
||||
@@ -23,6 +23,13 @@ edges
|
||||
| PolynomialReDoS.rb:29:9:29:18 | ...[...] : | PolynomialReDoS.rb:30:5:30:5 | b |
|
||||
| PolynomialReDoS.rb:31:9:31:14 | call to params : | PolynomialReDoS.rb:31:9:31:18 | ...[...] : |
|
||||
| PolynomialReDoS.rb:31:9:31:18 | ...[...] : | PolynomialReDoS.rb:32:5:32:5 | c |
|
||||
| PolynomialReDoS.rb:54:12:54:17 | call to params : | PolynomialReDoS.rb:54:12:54:24 | ...[...] : |
|
||||
| PolynomialReDoS.rb:54:12:54:24 | ...[...] : | PolynomialReDoS.rb:56:38:56:41 | name : |
|
||||
| PolynomialReDoS.rb:54:12:54:24 | ...[...] : | PolynomialReDoS.rb:58:37:58:40 | name : |
|
||||
| PolynomialReDoS.rb:56:38:56:41 | name : | PolynomialReDoS.rb:61:33:61:37 | input : |
|
||||
| PolynomialReDoS.rb:58:37:58:40 | name : | PolynomialReDoS.rb:65:42:65:46 | input : |
|
||||
| PolynomialReDoS.rb:61:33:61:37 | input : | PolynomialReDoS.rb:62:5:62:9 | input |
|
||||
| PolynomialReDoS.rb:65:42:65:46 | input : | PolynomialReDoS.rb:66:5:66:9 | input |
|
||||
nodes
|
||||
| PolynomialReDoS.rb:4:12:4:17 | call to params : | semmle.label | call to params : |
|
||||
| PolynomialReDoS.rb:4:12:4:24 | ...[...] : | semmle.label | ...[...] : |
|
||||
@@ -52,6 +59,14 @@ nodes
|
||||
| PolynomialReDoS.rb:32:5:32:5 | c | semmle.label | c |
|
||||
| PolynomialReDoS.rb:42:10:42:13 | name | semmle.label | name |
|
||||
| PolynomialReDoS.rb:47:10:47:13 | name | semmle.label | name |
|
||||
| PolynomialReDoS.rb:54:12:54:17 | call to params : | semmle.label | call to params : |
|
||||
| PolynomialReDoS.rb:54:12:54:24 | ...[...] : | semmle.label | ...[...] : |
|
||||
| PolynomialReDoS.rb:56:38:56:41 | name : | semmle.label | name : |
|
||||
| PolynomialReDoS.rb:58:37:58:40 | name : | semmle.label | name : |
|
||||
| PolynomialReDoS.rb:61:33:61:37 | input : | semmle.label | input : |
|
||||
| PolynomialReDoS.rb:62:5:62:9 | input | semmle.label | input |
|
||||
| PolynomialReDoS.rb:65:42:65:46 | input : | semmle.label | input : |
|
||||
| PolynomialReDoS.rb:66:5:66:9 | input | semmle.label | input |
|
||||
subpaths
|
||||
#select
|
||||
| PolynomialReDoS.rb:10:5:10:17 | ... =~ ... | PolynomialReDoS.rb:4:12:4:17 | call to params : | PolynomialReDoS.rb:10:5:10:8 | name | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | PolynomialReDoS.rb:7:19:7:21 | \\s+ | regular expression | PolynomialReDoS.rb:4:12:4:17 | call to params | user-provided value |
|
||||
@@ -74,3 +89,5 @@ subpaths
|
||||
| PolynomialReDoS.rb:32:5:32:20 | call to sub! | PolynomialReDoS.rb:31:9:31:14 | call to params : | PolynomialReDoS.rb:32:5:32:5 | c | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | PolynomialReDoS.rb:7:19:7:21 | \\s+ | regular expression | PolynomialReDoS.rb:31:9:31:14 | call to params | user-provided value |
|
||||
| PolynomialReDoS.rb:42:5:45:7 | case ... | PolynomialReDoS.rb:4:12:4:17 | call to params : | PolynomialReDoS.rb:42:10:42:13 | name | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | PolynomialReDoS.rb:7:19:7:21 | \\s+ | regular expression | PolynomialReDoS.rb:4:12:4:17 | call to params | user-provided value |
|
||||
| PolynomialReDoS.rb:47:5:50:7 | case ... | PolynomialReDoS.rb:4:12:4:17 | call to params : | PolynomialReDoS.rb:47:10:47:13 | name | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | PolynomialReDoS.rb:48:14:48:16 | \\s+ | regular expression | PolynomialReDoS.rb:4:12:4:17 | call to params | user-provided value |
|
||||
| PolynomialReDoS.rb:62:5:62:22 | call to gsub | PolynomialReDoS.rb:54:12:54:17 | call to params : | PolynomialReDoS.rb:62:5:62:9 | input | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | PolynomialReDoS.rb:56:31:56:33 | \\s+ | regular expression | PolynomialReDoS.rb:54:12:54:17 | call to params | user-provided value |
|
||||
| PolynomialReDoS.rb:66:5:66:34 | call to match? | PolynomialReDoS.rb:54:12:54:17 | call to params : | PolynomialReDoS.rb:66:5:66:9 | input | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | PolynomialReDoS.rb:58:30:58:32 | \\s+ | regular expression | PolynomialReDoS.rb:54:12:54:17 | call to params | user-provided value |
|
||||
|
||||
Reference in New Issue
Block a user