From eec57d4f25bfca3eb6e5b4339169df532df2ff44 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Tue, 5 Apr 2022 15:39:46 +0100 Subject: [PATCH] Simplify dataflow logic by using only one configuration, and expessing more sinks with models-as-data --- .../code/java/regex/RegexFlowConfigs.qll | 198 +++++++----------- .../code/java/regex/RegexFlowModels.qll | 41 ++-- 2 files changed, 93 insertions(+), 146 deletions(-) diff --git a/java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll b/java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll index 6339ff238f3..c400b521f80 100644 --- a/java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll +++ b/java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll @@ -13,87 +13,53 @@ private class ExploitableStringLiteral extends StringLiteral { ExploitableStringLiteral() { this.getValue().matches(["%+%", "%*%", "%{%}%"]) } } -private class RegexCompileFlowConf extends DataFlow2::Configuration { - RegexCompileFlowConf() { this = "RegexCompileFlowConfig" } - - override predicate isSource(DataFlow::Node node) { - node.asExpr() instanceof ExploitableStringLiteral - } - - override predicate isSink(DataFlow::Node node) { - sinkNode(node, ["regex-compile", "regex-compile-match", "regex-compile-find"]) - } - - override predicate isBarrier(DataFlow::Node node) { - node.getEnclosingCallable().getDeclaringType() instanceof NonSecurityTestClass - } -} - /** - * Holds if `s` is used as a regex, with the mode `mode` (if known). - * If regex mode is not known, `mode` will be `"None"`. - * - * As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`) - * and therefore may be relevant for ReDoS queries are considered. + * Holds if `kind` is an external sink kind that is relevant for regex flow. + * `full` is true if sinks with this kind match against the full string of its input. + * `strArg` is the index of the argument to methods with this sink kind that contan the string to be matched against, + * where -1 is the qualifier; or -2 if no such argument exists. */ -predicate usedAsRegex(StringLiteral s, string mode, boolean match_full_string) { - exists(DataFlow::Node sink | - any(RegexCompileFlowConf c).hasFlow(DataFlow2::exprNode(s), sink) and - mode = "None" and // TODO: proper mode detection - (if matchesFullString(sink) then match_full_string = true else match_full_string = false) +private predicate regexSinkKindInfo(string kind, boolean full, int strArg) { + sinkModel(_, _, _, _, _, _, _, kind) and + exists(string fullStr, string strArgStr | + ( + full = true and fullStr = "f" + or + full = false and fullStr = "" + ) and + ( + strArgStr.toInt() = strArg + or + strArg = -2 and + strArgStr = "" + ) + | + kind = "regex-use[" + fullStr + strArgStr + "]" ) } -/** - * Holds if the regex that flows to `sink` is used to match against a full string, - * as though it was implicitly surrounded by ^ and $. - */ -private predicate matchesFullString(DataFlow::Node sink) { - sinkNode(sink, "regex-compile-match") - or - exists(DataFlow::Node matchSource, RegexCompileToMatchConf conf | - matchSource.asExpr().(MethodAccess).getAnArgument() = sink.asExpr() and - conf.hasFlow(matchSource, _) - ) -} +/** A sink that is relevant for regex flow. */ +private class RegexFlowSink extends DataFlow::Node { + boolean full; + int strArg; -private class RegexCompileToMatchConf extends DataFlow2::Configuration { - RegexCompileToMatchConf() { this = "RegexCompileToMatchConfig" } - - override predicate isSource(DataFlow::Node node) { sourceNode(node, "regex-compile") } - - override predicate isSink(DataFlow::Node node) { sinkNode(node, "regex-match") } - - override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - exists(MethodAccess ma | node2.asExpr() = ma and node1.asExpr() = ma.getQualifier() | - ma.getMethod().hasQualifiedName("java.util.regex", "Pattern", "matcher") + RegexFlowSink() { + exists(string kind | + regexSinkKindInfo(kind, full, strArg) and + sinkNode(this, kind) ) } -} -/** - * A method access that can match a regex against a string - */ -abstract class RegexMatchMethodAccess extends MethodAccess { - string package; - string type; - string name; - int regexArg; - int stringArg; - Method m; + /** Holds if a regex that flows here is matched against a full string (rather than a substring). */ + predicate matchesFullString() { full = true } - RegexMatchMethodAccess() { - this.getMethod().getSourceDeclaration().overrides*(m) and - m.hasQualifiedName(package, type, name) and - regexArg in [-1 .. m.getNumberOfParameters() - 1] and - stringArg in [-1 .. m.getNumberOfParameters() - 1] + /** Gets the string expression that a regex that flows here is matched against, if any. */ + Expr getStringArgument() { + exists(MethodAccess ma | + this.asExpr() = argOf(ma, _) and + result = argOf(ma, strArg) + ) } - - /** Gets the argument of this call that the regex to be matched against flows into. */ - Expr getRegexArg() { result = argOf(this, regexArg) } - - /** Gets the argument of this call that the string being matched flows into. */ - Expr getStringArg() { result = argOf(this, stringArg) } } private Expr argOf(MethodAccess ma, int arg) { @@ -115,35 +81,7 @@ class RegexAdditionalFlowStep extends Unit { abstract predicate step(DataFlow::Node node1, DataFlow::Node node2); } -// TODO: can this be done with the models-as-data framework? -private class JdkRegexMatchMethodAccess extends RegexMatchMethodAccess { - JdkRegexMatchMethodAccess() { - package = "java.util.regex" and - type = "Pattern" and - ( - name = "matcher" and regexArg = -1 and stringArg = 0 - or - name = "matches" and regexArg = 0 and stringArg = 1 - or - name = "split" and regexArg = -1 and stringArg = 0 - or - name = "splitAsStream" and regexArg = -1 and stringArg = 0 - ) - or - package = "java.lang" and - type = "String" and - name = ["matches", "split", "replaceAll", "replaceFirst"] and - regexArg = 0 and - stringArg = -1 - or - package = "java.util.function" and - type = "Predicate" and - name = "test" and - regexArg = -1 and - stringArg = 0 - } -} - +// TODO: This may be able to be done with models-as-data if query-specific flow steps beome supported. private class JdkRegexFlowStep extends RegexAdditionalFlowStep { override predicate step(DataFlow::Node node1, DataFlow::Node node2) { exists(MethodAccess ma, Method m, string package, string type, string name, int arg | @@ -155,7 +93,7 @@ private class JdkRegexFlowStep extends RegexAdditionalFlowStep { package = "java.util.regex" and type = "Pattern" and ( - name = ["asMatchPredicate", "asPredicate"] and + name = ["asMatchPredicate", "asPredicate", "matcher"] and arg = -1 or name = "compile" and @@ -170,16 +108,6 @@ private class JdkRegexFlowStep extends RegexAdditionalFlowStep { } } -private class GuavaRegexMatchMethodAccess extends RegexMatchMethodAccess { - GuavaRegexMatchMethodAccess() { - package = "com.google.common.base" and - regexArg = -1 and - stringArg = 0 and - type = ["Splitter", "Splitter$MapSplitter"] and - name = ["split", "splitToList"] - } -} - private class GuavaRegexFlowStep extends RegexAdditionalFlowStep { override predicate step(DataFlow::Node node1, DataFlow::Node node2) { exists(MethodAccess ma, Method m, string package, string type, string name, int arg | @@ -209,20 +137,46 @@ private class GuavaRegexFlowStep extends RegexAdditionalFlowStep { } } -private class RegexMatchFlowConf extends DataFlow2::Configuration { - RegexMatchFlowConf() { this = "RegexMatchFlowConf" } +private class RegexFlowConf extends DataFlow2::Configuration { + RegexFlowConf() { this = "RegexFlowConfig" } - override predicate isSource(DataFlow::Node src) { - src.asExpr() instanceof ExploitableStringLiteral + override predicate isSource(DataFlow::Node node) { + node.asExpr() instanceof ExploitableStringLiteral } - override predicate isSink(DataFlow::Node sink) { - exists(RegexMatchMethodAccess ma | sink.asExpr() = ma.getRegexArg()) - } + override predicate isSink(DataFlow::Node node) { node instanceof RegexFlowSink } override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { any(RegexAdditionalFlowStep s).step(node1, node2) } + + override predicate isBarrier(DataFlow::Node node) { + node.getEnclosingCallable().getDeclaringType() instanceof NonSecurityTestClass + } +} + +/** + * Holds if `regex` is used as a regex, with the mode `mode` (if known). + * If regex mode is not known, `mode` will be `"None"`. + * + * As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`) + * and therefore may be relevant for ReDoS queries are considered. + */ +predicate usedAsRegex(StringLiteral regex, string mode, boolean match_full_string) { + any(RegexFlowConf c).hasFlow(DataFlow2::exprNode(regex), _) and + mode = "None" and // TODO: proper mode detection + (if matchesFullString(regex) then match_full_string = true else match_full_string = false) +} + +/** + * Holds if `regex` is used as a regular expression that is matched against a full string, + * as though it was implicitly surrounded by ^ and $. + */ +private predicate matchesFullString(StringLiteral regex) { + exists(RegexFlowConf c, RegexFlowSink sink | + sink.matchesFullString() and + c.hasFlow(DataFlow2::exprNode(regex), sink) + ) } /** @@ -232,12 +186,8 @@ private class RegexMatchFlowConf extends DataFlow2::Configuration { * and therefore may be relevant for ReDoS queries are considered. */ predicate regexMatchedAgainst(StringLiteral regex, Expr str) { - exists( - DataFlow::Node src, DataFlow::Node sink, RegexMatchMethodAccess ma, RegexMatchFlowConf conf - | - src.asExpr() = regex and - sink.asExpr() = ma.getRegexArg() and - conf.hasFlow(src, sink) and - str = ma.getStringArg() + exists(RegexFlowConf c, RegexFlowSink sink | + str = sink.getStringArgument() and + c.hasFlow(DataFlow2::exprNode(regex), sink) ) } diff --git a/java/ql/lib/semmle/code/java/regex/RegexFlowModels.qll b/java/ql/lib/semmle/code/java/regex/RegexFlowModels.qll index fd0858639c4..6934540116f 100644 --- a/java/ql/lib/semmle/code/java/regex/RegexFlowModels.qll +++ b/java/ql/lib/semmle/code/java/regex/RegexFlowModels.qll @@ -3,33 +3,30 @@ import java import semmle.code.java.dataflow.ExternalFlow -private class RegexSourceCsv extends SourceModelCsv { - override predicate row(string row) { - row = - [ - //"namespace;type;subtypes;name;signature;ext;output;kind" - "java.util.regex;Pattern;false;compile;(String);;ReturnValue;regex-compile", - ] - } -} - private class RegexSinkCsv extends SinkModelCsv { override predicate row(string row) { row = [ //"namespace;type;subtypes;name;signature;ext;input;kind" - "java.util.regex;Pattern;false;compile;(String);;Argument[0];regex-compile", - "java.util.regex;Pattern;false;compile;(String,int);;Argument[0];regex-compile", - "java.util.regex;Pattern;false;matches;(String,CharSequence);;Argument[0];regex-compile-match", - "java.lang;String;false;matches;(String);;Argument[0];regex-compile-match", - "java.lang;String;false;split;(String);;Argument[0];regex-compile-find", - "java.lang;String;false;split;(String,int);;Argument[0];regex-compile-find", - "java.lang;String;false;replaceAll;(String,String);;Argument[0];regex-compile-find", - "java.lang;String;false;replaceFirst;(String,String);;Argument[0];regex-compile-find", - "com.google.common.base;Splitter;false;onPattern;(String);;Argument[0];regex-compile-find", - // regex-match sinks - "java.util.regex;Pattern;false;asMatchPredicate;();;Argument[-1];regex-match", - "java.util.regex;Matcher;false;matches;();;Argument[-1];regex-match", + "java.util.regex;Matcher;false;matches;();;Argument[-1];regex-use[f]", + "java.util.regex;Pattern;false;asMatchPredicate;();;Argument[-1];regex-use[f]", + "java.util.regex;Pattern;false;compile;(String);;Argument[0];regex-use[]", + "java.util.regex;Pattern;false;compile;(String,int);;Argument[0];regex-use[]", + "java.util.regex;Pattern;false;matcher;(CharSequence);;Argument[-1];regex-use[0]", + "java.util.regex;Pattern;false;matches;(String,CharSequence);;Argument[0];regex-use[f1]", + "java.util.regex;Pattern;false;split;(CharSequence);;Argument[-1];regex-use[0]", + "java.util.regex;Pattern;false;split;(CharSequence,int);;Argument[-1];regex-use[0]", + "java.util.regex;Pattern;false;splitAsStream;(CharSequence);;Argument[-1];regex-use[0]", + "java.util.function;Predicate;false;test;(Object);;Argument[-1];regex-use[0]", + "java.lang;String;false;matches;(String);;Argument[0];regex-use[f-1]", + "java.lang;String;false;split;(String);;Argument[0];regex-use[-1]", + "java.lang;String;false;split;(String,int);;Argument[0];regex-use[-1]", + "java.lang;String;false;replaceAll;(String,String);;Argument[0];regex-use[-1]", + "java.lang;String;false;replaceFirst;(String,String);;Argument[0];regex-use[-1]", + "com.google.common.base;Splitter;false;onPattern;(String);;Argument[0];regex-use[]", + "com.google.common.base;Splitter;false;split;(CharSequence);;Argument[-1];regex-use[0]", + "com.google.common.base;Splitter;false;splitToList;(CharSequence);;Argument[-1];regex-use[0]", + "com.google.common.base;Splitter$MapSplitter;false;split;(CharSequence);;Argument[-1];regex-use[0]", ] } }