From fd4f5096158734b6f5458f9b1a37b6874acd2cb6 Mon Sep 17 00:00:00 2001 From: Stephan Brandauer Date: Tue, 10 May 2022 15:19:54 +0200 Subject: [PATCH] add stringConcatenatedWith feature to help the model learn that string concatenation leaves are usually not sinks --- .../EndpointFeatures.qll | 65 ++++++++++++++++++- .../FeatureValue.expected | 12 ++++ .../test/generic_feature_testing/test.js | 6 ++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll index e9411d367a2..b637e119e0f 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll @@ -237,7 +237,8 @@ private newtype TEndpointFeature = TInputArgumentIndex() or TContextFunctionInterfaces() or TContextSurroundingFunctionParameters() or - TAssignedToPropName() + TAssignedToPropName() or + TStringConcatenatedWith() /** * An implementation of an endpoint feature: produces feature names and values for used in ML. @@ -492,6 +493,51 @@ class AssignedToPropName extends EndpointFeature, TAssignedToPropName { } } +/** + * The feature that shows the text an endpoint is being concatenated with.class + * + * ### Example + * + * ```javascript + * const x = 'foo' + endpoint + 'bar'; // feature value is `'foo' -endpoint- 'bar'` + */ +class StringConcatenatedWith extends EndpointFeature, TStringConcatenatedWith { + override string getName() { result = "stringConcatenatedWith" } + + override string getValue(DataFlow::Node endpoint) { + exists(StringOps::ConcatenationRoot root | + root.getALeaf() = endpoint and + result = + concat(StringOps::ConcatenationLeaf p | + p.getRoot() = root and + ( + p.getStartLine() < endpoint.getStartLine() + or + p.getStartLine() = endpoint.getStartLine() and + p.getStartColumn() < endpoint.getStartColumn() + ) + | + SyntacticUtilities::renderStringConcatOperand(p), " + " + order by + p.getStartLine(), p.getStartColumn() + ) + " -endpoint- " + + concat(StringOps::ConcatenationLeaf p | + p.getRoot() = root and + ( + p.getStartLine() > endpoint.getStartLine() + or + p.getStartLine() = endpoint.getStartLine() and + p.getStartColumn() > endpoint.getStartColumn() + ) + | + SyntacticUtilities::renderStringConcatOperand(p), " + " + order by + p.getStartLine(), p.getStartColumn() + ) + ) + } +} + /** * The feature for the imports used in the callee of an invocation. * @@ -555,6 +601,23 @@ class ContextFunctionInterfaces extends EndpointFeature, TContextFunctionInterfa * Syntactic utilities for feature value computation. */ private module SyntacticUtilities { + bindingset[start, end] + string renderStringConcatOperands(DataFlow::Node root, int start, int end) { + result = + concat(int i, string operand | + i = [start .. end] and + operand = renderStringConcatOperand(StringConcatenation::getOperand(root, i)) + | + operand, " + " order by i + ) + } + + string renderStringConcatOperand(DataFlow::Node operand) { + if exists(unique(string v | operand.mayHaveStringValue(v))) + then result = "'" + any(string v | operand.mayHaveStringValue(v)) + "'" + else result = getSimpleAccessPath(operand) + } + /** Gets all the imports defined in the file containing the endpoint. */ string getImportPathsForFile(File file) { result = diff --git a/javascript/ql/experimental/adaptivethreatmodeling/test/generic_feature_testing/FeatureValue.expected b/javascript/ql/experimental/adaptivethreatmodeling/test/generic_feature_testing/FeatureValue.expected index e13c3567826..7eca43ac2ce 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/test/generic_feature_testing/FeatureValue.expected +++ b/javascript/ql/experimental/adaptivethreatmodeling/test/generic_feature_testing/FeatureValue.expected @@ -188,3 +188,15 @@ | test.js:22:21:22:28 | endpoint | enclosingFunctionBody | f endpoint 12 f p endpoint f p q endpoint o m endpoint o m p endpoint o m p q endpoint F endpoint o m m m endpoint f endpoint o x m endpoint o m x p m endpoint p endpoint foo bar baz endpoint foo bar endpoint f f o m endpoint | | test.js:22:21:22:28 | endpoint | enclosingFunctionName | | | test.js:22:21:22:28 | endpoint | fileImports | foo lib1 lib2 lib3 | +| test.js:33:50:33:57 | endpoint | calleeAccessPath | | +| test.js:33:50:33:57 | endpoint | calleeAccessPathWithStructuralInfo | | +| test.js:33:50:33:57 | endpoint | contextFunctionInterfaces | f(?)\nfoo()\ng()\nm() | +| test.js:33:50:33:57 | endpoint | contextSurroundingFunctionParameters | | +| test.js:33:50:33:57 | endpoint | fileImports | foo lib1 lib2 lib3 | +| test.js:33:50:33:57 | endpoint | stringConcatenatedWith | f() + '' | +| test.js:35:18:35:25 | endpoint | calleeAccessPath | | +| test.js:35:18:35:25 | endpoint | calleeAccessPathWithStructuralInfo | | +| test.js:35:18:35:25 | endpoint | contextFunctionInterfaces | f(?)\nfoo()\ng()\nm() | +| test.js:35:18:35:25 | endpoint | contextSurroundingFunctionParameters | | +| test.js:35:18:35:25 | endpoint | fileImports | foo lib1 lib2 lib3 | +| test.js:35:18:35:25 | endpoint | stringConcatenatedWith | 'foo' -endpoint- 'bar' | diff --git a/javascript/ql/experimental/adaptivethreatmodeling/test/generic_feature_testing/test.js b/javascript/ql/experimental/adaptivethreatmodeling/test/generic_feature_testing/test.js index c9ac63b906f..05ef54cc993 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/test/generic_feature_testing/test.js +++ b/javascript/ql/experimental/adaptivethreatmodeling/test/generic_feature_testing/test.js @@ -27,3 +27,9 @@ function f({ endpoint }) {} const g = async () => undefined; const o = { m: () => undefined } + +const url = f(); + +const x = f() + ""; + +const y = "foo"+ endpoint + "bar";