Simplify dataflow logic by using only one configuration, and expessing more sinks with models-as-data

This commit is contained in:
Joe Farebrother
2022-04-05 15:39:46 +01:00
parent 2a80540157
commit eec57d4f25
2 changed files with 93 additions and 146 deletions

View File

@@ -13,87 +13,53 @@ private class ExploitableStringLiteral extends StringLiteral {
ExploitableStringLiteral() { this.getValue().matches(["%+%", "%*%", "%{%}%"]) }
}
private class RegexCompileFlowConf extends DataFlow2::Configuration {
RegexCompileFlowConf() { this = "RegexCompileFlowConfig" }
override predicate isSource(DataFlow::Node node) {
node.asExpr() instanceof ExploitableStringLiteral
}
override predicate isSink(DataFlow::Node node) {
sinkNode(node, ["regex-compile", "regex-compile-match", "regex-compile-find"])
}
override predicate isBarrier(DataFlow::Node node) {
node.getEnclosingCallable().getDeclaringType() instanceof NonSecurityTestClass
}
}
/**
* Holds if `s` is used as a regex, with the mode `mode` (if known).
* If regex mode is not known, `mode` will be `"None"`.
*
* As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
* and therefore may be relevant for ReDoS queries are considered.
* Holds if `kind` is an external sink kind that is relevant for regex flow.
* `full` is true if sinks with this kind match against the full string of its input.
* `strArg` is the index of the argument to methods with this sink kind that contan the string to be matched against,
* where -1 is the qualifier; or -2 if no such argument exists.
*/
predicate usedAsRegex(StringLiteral s, string mode, boolean match_full_string) {
exists(DataFlow::Node sink |
any(RegexCompileFlowConf c).hasFlow(DataFlow2::exprNode(s), sink) and
mode = "None" and // TODO: proper mode detection
(if matchesFullString(sink) then match_full_string = true else match_full_string = false)
private predicate regexSinkKindInfo(string kind, boolean full, int strArg) {
sinkModel(_, _, _, _, _, _, _, kind) and
exists(string fullStr, string strArgStr |
(
full = true and fullStr = "f"
or
full = false and fullStr = ""
) and
(
strArgStr.toInt() = strArg
or
strArg = -2 and
strArgStr = ""
)
|
kind = "regex-use[" + fullStr + strArgStr + "]"
)
}
/**
* Holds if the regex that flows to `sink` is used to match against a full string,
* as though it was implicitly surrounded by ^ and $.
*/
private predicate matchesFullString(DataFlow::Node sink) {
sinkNode(sink, "regex-compile-match")
or
exists(DataFlow::Node matchSource, RegexCompileToMatchConf conf |
matchSource.asExpr().(MethodAccess).getAnArgument() = sink.asExpr() and
conf.hasFlow(matchSource, _)
)
}
/** A sink that is relevant for regex flow. */
private class RegexFlowSink extends DataFlow::Node {
boolean full;
int strArg;
private class RegexCompileToMatchConf extends DataFlow2::Configuration {
RegexCompileToMatchConf() { this = "RegexCompileToMatchConfig" }
override predicate isSource(DataFlow::Node node) { sourceNode(node, "regex-compile") }
override predicate isSink(DataFlow::Node node) { sinkNode(node, "regex-match") }
override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(MethodAccess ma | node2.asExpr() = ma and node1.asExpr() = ma.getQualifier() |
ma.getMethod().hasQualifiedName("java.util.regex", "Pattern", "matcher")
RegexFlowSink() {
exists(string kind |
regexSinkKindInfo(kind, full, strArg) and
sinkNode(this, kind)
)
}
}
/**
* A method access that can match a regex against a string
*/
abstract class RegexMatchMethodAccess extends MethodAccess {
string package;
string type;
string name;
int regexArg;
int stringArg;
Method m;
/** Holds if a regex that flows here is matched against a full string (rather than a substring). */
predicate matchesFullString() { full = true }
RegexMatchMethodAccess() {
this.getMethod().getSourceDeclaration().overrides*(m) and
m.hasQualifiedName(package, type, name) and
regexArg in [-1 .. m.getNumberOfParameters() - 1] and
stringArg in [-1 .. m.getNumberOfParameters() - 1]
/** Gets the string expression that a regex that flows here is matched against, if any. */
Expr getStringArgument() {
exists(MethodAccess ma |
this.asExpr() = argOf(ma, _) and
result = argOf(ma, strArg)
)
}
/** Gets the argument of this call that the regex to be matched against flows into. */
Expr getRegexArg() { result = argOf(this, regexArg) }
/** Gets the argument of this call that the string being matched flows into. */
Expr getStringArg() { result = argOf(this, stringArg) }
}
private Expr argOf(MethodAccess ma, int arg) {
@@ -115,35 +81,7 @@ class RegexAdditionalFlowStep extends Unit {
abstract predicate step(DataFlow::Node node1, DataFlow::Node node2);
}
// TODO: can this be done with the models-as-data framework?
private class JdkRegexMatchMethodAccess extends RegexMatchMethodAccess {
JdkRegexMatchMethodAccess() {
package = "java.util.regex" and
type = "Pattern" and
(
name = "matcher" and regexArg = -1 and stringArg = 0
or
name = "matches" and regexArg = 0 and stringArg = 1
or
name = "split" and regexArg = -1 and stringArg = 0
or
name = "splitAsStream" and regexArg = -1 and stringArg = 0
)
or
package = "java.lang" and
type = "String" and
name = ["matches", "split", "replaceAll", "replaceFirst"] and
regexArg = 0 and
stringArg = -1
or
package = "java.util.function" and
type = "Predicate" and
name = "test" and
regexArg = -1 and
stringArg = 0
}
}
// TODO: This may be able to be done with models-as-data if query-specific flow steps beome supported.
private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
override predicate step(DataFlow::Node node1, DataFlow::Node node2) {
exists(MethodAccess ma, Method m, string package, string type, string name, int arg |
@@ -155,7 +93,7 @@ private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
package = "java.util.regex" and
type = "Pattern" and
(
name = ["asMatchPredicate", "asPredicate"] and
name = ["asMatchPredicate", "asPredicate", "matcher"] and
arg = -1
or
name = "compile" and
@@ -170,16 +108,6 @@ private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
}
}
private class GuavaRegexMatchMethodAccess extends RegexMatchMethodAccess {
GuavaRegexMatchMethodAccess() {
package = "com.google.common.base" and
regexArg = -1 and
stringArg = 0 and
type = ["Splitter", "Splitter$MapSplitter"] and
name = ["split", "splitToList"]
}
}
private class GuavaRegexFlowStep extends RegexAdditionalFlowStep {
override predicate step(DataFlow::Node node1, DataFlow::Node node2) {
exists(MethodAccess ma, Method m, string package, string type, string name, int arg |
@@ -209,20 +137,46 @@ private class GuavaRegexFlowStep extends RegexAdditionalFlowStep {
}
}
private class RegexMatchFlowConf extends DataFlow2::Configuration {
RegexMatchFlowConf() { this = "RegexMatchFlowConf" }
private class RegexFlowConf extends DataFlow2::Configuration {
RegexFlowConf() { this = "RegexFlowConfig" }
override predicate isSource(DataFlow::Node src) {
src.asExpr() instanceof ExploitableStringLiteral
override predicate isSource(DataFlow::Node node) {
node.asExpr() instanceof ExploitableStringLiteral
}
override predicate isSink(DataFlow::Node sink) {
exists(RegexMatchMethodAccess ma | sink.asExpr() = ma.getRegexArg())
}
override predicate isSink(DataFlow::Node node) { node instanceof RegexFlowSink }
override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
any(RegexAdditionalFlowStep s).step(node1, node2)
}
override predicate isBarrier(DataFlow::Node node) {
node.getEnclosingCallable().getDeclaringType() instanceof NonSecurityTestClass
}
}
/**
* Holds if `regex` is used as a regex, with the mode `mode` (if known).
* If regex mode is not known, `mode` will be `"None"`.
*
* As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
* and therefore may be relevant for ReDoS queries are considered.
*/
predicate usedAsRegex(StringLiteral regex, string mode, boolean match_full_string) {
any(RegexFlowConf c).hasFlow(DataFlow2::exprNode(regex), _) and
mode = "None" and // TODO: proper mode detection
(if matchesFullString(regex) then match_full_string = true else match_full_string = false)
}
/**
* Holds if `regex` is used as a regular expression that is matched against a full string,
* as though it was implicitly surrounded by ^ and $.
*/
private predicate matchesFullString(StringLiteral regex) {
exists(RegexFlowConf c, RegexFlowSink sink |
sink.matchesFullString() and
c.hasFlow(DataFlow2::exprNode(regex), sink)
)
}
/**
@@ -232,12 +186,8 @@ private class RegexMatchFlowConf extends DataFlow2::Configuration {
* and therefore may be relevant for ReDoS queries are considered.
*/
predicate regexMatchedAgainst(StringLiteral regex, Expr str) {
exists(
DataFlow::Node src, DataFlow::Node sink, RegexMatchMethodAccess ma, RegexMatchFlowConf conf
|
src.asExpr() = regex and
sink.asExpr() = ma.getRegexArg() and
conf.hasFlow(src, sink) and
str = ma.getStringArg()
exists(RegexFlowConf c, RegexFlowSink sink |
str = sink.getStringArgument() and
c.hasFlow(DataFlow2::exprNode(regex), sink)
)
}

View File

@@ -3,33 +3,30 @@
import java
import semmle.code.java.dataflow.ExternalFlow
private class RegexSourceCsv extends SourceModelCsv {
override predicate row(string row) {
row =
[
//"namespace;type;subtypes;name;signature;ext;output;kind"
"java.util.regex;Pattern;false;compile;(String);;ReturnValue;regex-compile",
]
}
}
private class RegexSinkCsv extends SinkModelCsv {
override predicate row(string row) {
row =
[
//"namespace;type;subtypes;name;signature;ext;input;kind"
"java.util.regex;Pattern;false;compile;(String);;Argument[0];regex-compile",
"java.util.regex;Pattern;false;compile;(String,int);;Argument[0];regex-compile",
"java.util.regex;Pattern;false;matches;(String,CharSequence);;Argument[0];regex-compile-match",
"java.lang;String;false;matches;(String);;Argument[0];regex-compile-match",
"java.lang;String;false;split;(String);;Argument[0];regex-compile-find",
"java.lang;String;false;split;(String,int);;Argument[0];regex-compile-find",
"java.lang;String;false;replaceAll;(String,String);;Argument[0];regex-compile-find",
"java.lang;String;false;replaceFirst;(String,String);;Argument[0];regex-compile-find",
"com.google.common.base;Splitter;false;onPattern;(String);;Argument[0];regex-compile-find",
// regex-match sinks
"java.util.regex;Pattern;false;asMatchPredicate;();;Argument[-1];regex-match",
"java.util.regex;Matcher;false;matches;();;Argument[-1];regex-match",
"java.util.regex;Matcher;false;matches;();;Argument[-1];regex-use[f]",
"java.util.regex;Pattern;false;asMatchPredicate;();;Argument[-1];regex-use[f]",
"java.util.regex;Pattern;false;compile;(String);;Argument[0];regex-use[]",
"java.util.regex;Pattern;false;compile;(String,int);;Argument[0];regex-use[]",
"java.util.regex;Pattern;false;matcher;(CharSequence);;Argument[-1];regex-use[0]",
"java.util.regex;Pattern;false;matches;(String,CharSequence);;Argument[0];regex-use[f1]",
"java.util.regex;Pattern;false;split;(CharSequence);;Argument[-1];regex-use[0]",
"java.util.regex;Pattern;false;split;(CharSequence,int);;Argument[-1];regex-use[0]",
"java.util.regex;Pattern;false;splitAsStream;(CharSequence);;Argument[-1];regex-use[0]",
"java.util.function;Predicate;false;test;(Object);;Argument[-1];regex-use[0]",
"java.lang;String;false;matches;(String);;Argument[0];regex-use[f-1]",
"java.lang;String;false;split;(String);;Argument[0];regex-use[-1]",
"java.lang;String;false;split;(String,int);;Argument[0];regex-use[-1]",
"java.lang;String;false;replaceAll;(String,String);;Argument[0];regex-use[-1]",
"java.lang;String;false;replaceFirst;(String,String);;Argument[0];regex-use[-1]",
"com.google.common.base;Splitter;false;onPattern;(String);;Argument[0];regex-use[]",
"com.google.common.base;Splitter;false;split;(CharSequence);;Argument[-1];regex-use[0]",
"com.google.common.base;Splitter;false;splitToList;(CharSequence);;Argument[-1];regex-use[0]",
"com.google.common.base;Splitter$MapSplitter;false;split;(CharSequence);;Argument[-1];regex-use[0]",
]
}
}