add stringConcatenatedWith feature to help the model learn that string concatenation leaves are usually not sinks

This commit is contained in:
Stephan Brandauer
2022-05-10 15:19:54 +02:00
parent 4ba7243b1f
commit fd4f509615
3 changed files with 82 additions and 1 deletions

View File

@@ -237,7 +237,8 @@ private newtype TEndpointFeature =
TInputArgumentIndex() or
TContextFunctionInterfaces() or
TContextSurroundingFunctionParameters() or
TAssignedToPropName()
TAssignedToPropName() or
TStringConcatenatedWith()
/**
* An implementation of an endpoint feature: produces feature names and values for used in ML.
@@ -492,6 +493,51 @@ class AssignedToPropName extends EndpointFeature, TAssignedToPropName {
}
}
/**
* The feature that shows the text an endpoint is being concatenated with.class
*
* ### Example
*
* ```javascript
* const x = 'foo' + endpoint + 'bar'; // feature value is `'foo' -endpoint- 'bar'`
*/
class StringConcatenatedWith extends EndpointFeature, TStringConcatenatedWith {
override string getName() { result = "stringConcatenatedWith" }
override string getValue(DataFlow::Node endpoint) {
exists(StringOps::ConcatenationRoot root |
root.getALeaf() = endpoint and
result =
concat(StringOps::ConcatenationLeaf p |
p.getRoot() = root and
(
p.getStartLine() < endpoint.getStartLine()
or
p.getStartLine() = endpoint.getStartLine() and
p.getStartColumn() < endpoint.getStartColumn()
)
|
SyntacticUtilities::renderStringConcatOperand(p), " + "
order by
p.getStartLine(), p.getStartColumn()
) + " -endpoint- " +
concat(StringOps::ConcatenationLeaf p |
p.getRoot() = root and
(
p.getStartLine() > endpoint.getStartLine()
or
p.getStartLine() = endpoint.getStartLine() and
p.getStartColumn() > endpoint.getStartColumn()
)
|
SyntacticUtilities::renderStringConcatOperand(p), " + "
order by
p.getStartLine(), p.getStartColumn()
)
)
}
}
/**
* The feature for the imports used in the callee of an invocation.
*
@@ -555,6 +601,23 @@ class ContextFunctionInterfaces extends EndpointFeature, TContextFunctionInterfa
* Syntactic utilities for feature value computation.
*/
private module SyntacticUtilities {
bindingset[start, end]
string renderStringConcatOperands(DataFlow::Node root, int start, int end) {
result =
concat(int i, string operand |
i = [start .. end] and
operand = renderStringConcatOperand(StringConcatenation::getOperand(root, i))
|
operand, " + " order by i
)
}
string renderStringConcatOperand(DataFlow::Node operand) {
if exists(unique(string v | operand.mayHaveStringValue(v)))
then result = "'" + any(string v | operand.mayHaveStringValue(v)) + "'"
else result = getSimpleAccessPath(operand)
}
/** Gets all the imports defined in the file containing the endpoint. */
string getImportPathsForFile(File file) {
result =

View File

@@ -188,3 +188,15 @@
| test.js:22:21:22:28 | endpoint | enclosingFunctionBody | f endpoint 12 f p endpoint f p q endpoint o m endpoint o m p endpoint o m p q endpoint F endpoint o m m m endpoint f endpoint o x m endpoint o m x p m endpoint p endpoint foo bar baz endpoint foo bar endpoint f f o m endpoint |
| test.js:22:21:22:28 | endpoint | enclosingFunctionName | |
| test.js:22:21:22:28 | endpoint | fileImports | foo lib1 lib2 lib3 |
| test.js:33:50:33:57 | endpoint | calleeAccessPath | |
| test.js:33:50:33:57 | endpoint | calleeAccessPathWithStructuralInfo | |
| test.js:33:50:33:57 | endpoint | contextFunctionInterfaces | f(?)\nfoo()\ng()\nm() |
| test.js:33:50:33:57 | endpoint | contextSurroundingFunctionParameters | |
| test.js:33:50:33:57 | endpoint | fileImports | foo lib1 lib2 lib3 |
| test.js:33:50:33:57 | endpoint | stringConcatenatedWith | f() + '<a target="_blank" href="' -endpoint- '"></a>' |
| test.js:35:18:35:25 | endpoint | calleeAccessPath | |
| test.js:35:18:35:25 | endpoint | calleeAccessPathWithStructuralInfo | |
| test.js:35:18:35:25 | endpoint | contextFunctionInterfaces | f(?)\nfoo()\ng()\nm() |
| test.js:35:18:35:25 | endpoint | contextSurroundingFunctionParameters | |
| test.js:35:18:35:25 | endpoint | fileImports | foo lib1 lib2 lib3 |
| test.js:35:18:35:25 | endpoint | stringConcatenatedWith | 'foo' -endpoint- 'bar' |

View File

@@ -27,3 +27,9 @@ function f({ endpoint }) {}
const g = async () => undefined;
const o = { m: () => undefined }
const url = f();
const x = f() + "<a target=\"_blank\" href=\"" + endpoint + "\"></a>";
const y = "foo"+ endpoint + "bar";