diff --git a/docs/codeql/codeql-language-guides/customizing-library-models-for-javascript.rst b/docs/codeql/codeql-language-guides/customizing-library-models-for-javascript.rst index 2d58a4ba821..99f6edd055c 100644 --- a/docs/codeql/codeql-language-guides/customizing-library-models-for-javascript.rst +++ b/docs/codeql/codeql-language-guides/customizing-library-models-for-javascript.rst @@ -220,6 +220,59 @@ For example, the **mysql** model that is included with the CodeQL JS analysis in - ["mysql.Connection", "mysql", "Member[createConnection].ReturnValue"] +Example: Using fuzzy models to simplify modeling +------------------------------------------------ + +In this example, we'll show how to add the following SQL injection sink using a "fuzzy" model: + +.. code-block:: ts + + import * as mysql from 'mysql'; + const pool = mysql.createPool({...}); + pool.getConnection((err, conn) => { + conn.query(q, (err, rows) => {...}); // <-- add 'q' as a SQL injection sink + }); + +We can recognize this using a fuzzy model, as shown in the following extension: + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/javascript-all + extensible: sinkModel + data: + - ["mysql", "Fuzzy.Member[query].Argument[0]", "sql-injection"] + +- The first column, **"mysql"**, begins the search at places where the `mysql` package is imported. +- **Fuzzy** selects all objects that appear to originate from the `mysql` package, such as the `pool`, `conn`, `err`, and `rows` objects. +- **Member[query]** selects the **query** member from any of those objects. In this case, the only such member is `conn.query`. + In principle, this would also find expressions such as `pool.query` and `err.query`, but in practice such expressions + are not likely to occur, because the `pool` and `err` objects do not have a member named `query`. +- **Argument[0]** selects the first argument of a call to the selected member, that is, the `q` argument to `conn.query`. +- **sql-injection** indicates that this is considered as a sink for the SQL injection query. + +For reference, a more detailed model might look like this, as described in the preceding examples: + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/javascript-all + extensible: sinkModel + data: + - ["mysql.Connection", "Member[query].Argument[0]", "sql-injection"] + + - addsTo: + pack: codeql/javascript-all + extensible: typeModel + data: + - ["mysql.Pool", "mysql", "Member[createPool].ReturnValue"] + - ["mysql.Connection", "mysql.Pool", "Member[getConnection].Argument[0].Parameter[1]"] + +The model using the **Fuzzy** component is simpler, at the cost of being approximate. +This technique is useful when modeling a large or complex library, where it is difficult to write a detailed model. + Example: Adding flow through 'decodeURIComponent' ------------------------------------------------- @@ -431,6 +484,9 @@ The following components are supported: - **MapValue** selects a value of a map object. - **Awaited** selects the value of a promise. - **Instance** selects instances of a class. +- **Fuzzy** selects all values that are derived from the current value through a combination of the other operations described in this list. + For example, this can be used to find all values that appear to originate from a particular package. This can be useful for finding method calls + from a known package, but where the receiver type is not known or is difficult to model. The following components are called "call site filters". They select a subset of the previously-selected calls, if the call fits certain criteria: diff --git a/javascript/ql/lib/semmle/javascript/frameworks/Vue.qll b/javascript/ql/lib/semmle/javascript/frameworks/Vue.qll index e4929e4eb39..096877900c9 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/Vue.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/Vue.qll @@ -20,9 +20,7 @@ module Vue { private class VueExportEntryPoint extends API::EntryPoint { VueExportEntryPoint() { this = "VueExportEntryPoint" } - override DataFlow::Node getASink() { - result = any(SingleFileComponent c).getModule().getDefaultOrBulkExport() - } + override DataFlow::Node getASink() { result = getModuleFromVueFile(_).getDefaultOrBulkExport() } } /** @@ -455,6 +453,13 @@ module Vue { } } + private Module getModuleFromVueFile(VueFile file) { + exists(HTML::ScriptElement elem | + xmlElements(elem, _, _, _, file) and // Avoid materializing all of Locatable.getFile() + result.getTopLevel() = elem.getScript() + ) + } + /** * A single file Vue component in a `.vue` file. */ @@ -482,12 +487,7 @@ module Vue { } /** Gets the module defined by the `script` tag in this .vue file, if any. */ - Module getModule() { - exists(HTML::ScriptElement elem | - xmlElements(elem, _, _, _, file) and // Avoid materializing all of Locatable.getFile() - result.getTopLevel() = elem.getScript() - ) - } + Module getModule() { result = getModuleFromVueFile(file) } override API::Node getComponentRef() { // There is no explicit `new Vue()` call in .vue files, so instead get all the imports diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll index 2e598711fcc..1cb4e189339 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll @@ -454,6 +454,14 @@ private API::Node getNodeFromPath(string type, AccessPath path, int n) { or // Apply a type step typeStep(getNodeFromPath(type, path, n), result) + or + // Apply a fuzzy step (without advancing 'n') + path.getToken(n).getName() = "Fuzzy" and + result = Specific::getAFuzzySuccessor(getNodeFromPath(type, path, n)) + or + // Skip a fuzzy step (advance 'n' without changing the current node) + path.getToken(n - 1).getName() = "Fuzzy" and + result = getNodeFromPath(type, path, n - 1) } /** @@ -500,6 +508,14 @@ private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath, int n) // will themselves find by following type-steps. n > 0 and n < subPath.getNumToken() + or + // Apply a fuzzy step (without advancing 'n') + subPath.getToken(n).getName() = "Fuzzy" and + result = Specific::getAFuzzySuccessor(getNodeFromSubPath(base, subPath, n)) + or + // Skip a fuzzy step (advance 'n' without changing the current node) + subPath.getToken(n - 1).getName() = "Fuzzy" and + result = getNodeFromSubPath(base, subPath, n - 1) } /** @@ -561,7 +577,7 @@ private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path) */ bindingset[name] private predicate isValidTokenNameInIdentifyingAccessPath(string name) { - name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar"] + name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar", "Fuzzy"] or Specific::isExtraValidTokenNameInIdentifyingAccessPath(name) } @@ -572,7 +588,7 @@ private predicate isValidTokenNameInIdentifyingAccessPath(string name) { */ bindingset[name] private predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) { - name = "ReturnValue" + name = ["ReturnValue", "Fuzzy"] or Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name) } diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll index ee2fd74b5de..4c9c8e147eb 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -192,6 +192,43 @@ API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathToken toke result.asSink() = node.(DataFlow::CallNode).getReceiver() } +/** + * Holds if `name` is the name of a built-in method on Object, Array, or String. + */ +private predicate isCommonBuiltinMethodName(string name) { + exists(JS::ExternalInstanceMemberDecl member | + member.getBaseName() in ["Object", "Array", "String"] and + name = member.getName() + ) +} + +/** + * Holds if fuzzy evaluation should not traverse through `call`. + */ +private predicate blockFuzzyCall(DataFlow::CallNode call) { + isCommonBuiltinMethodName(call.getCalleeName()) +} + +pragma[inline] +API::Node getAFuzzySuccessor(API::Node node) { + result = node.getAMember() and + // Block traversal into calls to built-ins like .toString() and .substring() + // Since there is no API node representing the call itself, block flow into the callee node. + not exists(DataFlow::CallNode call | + node.asSource() = call.getCalleeNode() and + blockFuzzyCall(call) + ) + or + result = node.getAParameter() + or + result = node.getReturn() + or + result = node.getPromised() + or + // include 'this' parameters but not 'this' arguments + result = node.getReceiver() and result.asSource() instanceof DataFlow::ThisNode +} + /** * Holds if `invoke` matches the JS-specific call site filter in `token`. */ diff --git a/javascript/ql/test/library-tests/frameworks/data/test.expected b/javascript/ql/test/library-tests/frameworks/data/test.expected index 39439f64629..28d7229789d 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.expected +++ b/javascript/ql/test/library-tests/frameworks/data/test.expected @@ -66,6 +66,14 @@ taintFlow | test.js:231:59:231:66 | source() | test.js:231:59:231:66 | source() | | test.js:232:59:232:66 | source() | test.js:232:59:232:66 | source() | | test.js:233:59:233:66 | source() | test.js:233:59:233:66 | source() | +| test.js:237:21:237:28 | source() | test.js:237:21:237:28 | source() | +| test.js:238:25:238:32 | source() | test.js:238:25:238:32 | source() | +| test.js:239:27:239:34 | source() | test.js:239:27:239:34 | source() | +| test.js:241:17:241:24 | source() | test.js:241:17:241:24 | source() | +| test.js:244:33:244:40 | source() | test.js:244:33:244:40 | source() | +| test.js:249:28:249:35 | source() | test.js:249:28:249:35 | source() | +| test.js:252:15:252:22 | source() | test.js:252:15:252:22 | source() | +| test.js:254:32:254:39 | source() | test.js:254:32:254:39 | source() | isSink | test.js:54:18:54:25 | source() | test-sink | | test.js:55:22:55:29 | source() | test-sink | @@ -136,6 +144,14 @@ isSink | test.js:231:59:231:66 | source() | test-sink | | test.js:232:59:232:66 | source() | test-sink | | test.js:233:59:233:66 | source() | test-sink | +| test.js:237:21:237:28 | source() | test-sink | +| test.js:238:25:238:32 | source() | test-sink | +| test.js:239:27:239:34 | source() | test-sink | +| test.js:241:17:241:24 | source() | test-sink | +| test.js:244:33:244:40 | source() | test-sink | +| test.js:249:28:249:35 | source() | test-sink | +| test.js:252:15:252:22 | source() | test-sink | +| test.js:254:32:254:39 | source() | test-sink | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/javascript/ql/test/library-tests/frameworks/data/test.js b/javascript/ql/test/library-tests/frameworks/data/test.js index d34940bd065..ac702b82a8c 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.js +++ b/javascript/ql/test/library-tests/frameworks/data/test.js @@ -232,3 +232,27 @@ function typeVars() { testlib.typevar.left.x.getThis().getThis().right.mySink(source()); // NOT OK testlib.typevar.left.x.right.getThis().getThis().mySink(source()); // NOT OK } + +function fuzzy() { + testlib.fuzzyCall(source()); // NOT OK + testlib.foo.fuzzyCall(source()); // NOT OK + testlib.foo().fuzzyCall(source()); // NOT OK + new testlib.Blah().foo.bar(async p => { + p.fuzzyCall(source()); // NOT OK + p.otherCall(source()); // OK + p.fuzzyCall().laterMethod(source()); // OK + (await p.promise).fuzzyCall(source()); // NOT OK + }); + + const wrapped = _.partial(testlib.foo, [123]); + wrapped().fuzzyCall(source()); // NOT OK [INCONSISTENCY] - API graphs do not currently propagate return values through partial invocation + wrapped(p => p.fuzzyCall(source())); // NOT OK + + const wrappedSink = _.partial(testlib.fuzzyCall); + wrappedSink(source()); // NOT OK + + _.partial(testlib.fuzzyCall, source()); // NOT OK + + fuzzyCall(source()); // OK - does not come from 'testlib' + require('blah').fuzzyCall(source()); // OK - does not come from 'testlib' +} diff --git a/javascript/ql/test/library-tests/frameworks/data/test.ql b/javascript/ql/test/library-tests/frameworks/data/test.ql index c5e3ce9ea8c..5ee8d0e3f9c 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.ql +++ b/javascript/ql/test/library-tests/frameworks/data/test.ql @@ -54,6 +54,7 @@ class Sinks extends ModelInput::SinkModelCsv { "testlib;Member[typevar].TypeVar[ABC].Member[mySink].Argument[0];test-sink", "testlib;Member[typevar].TypeVar[ABC].TypeVar[ABC].Member[mySink].Argument[1];test-sink", "testlib;Member[typevar].TypeVar[LeftRight].Member[mySink].Argument[0];test-sink", + "testlib;Fuzzy.Member[fuzzyCall].Argument[0];test-sink" ] } } diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index 2e598711fcc..1cb4e189339 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -454,6 +454,14 @@ private API::Node getNodeFromPath(string type, AccessPath path, int n) { or // Apply a type step typeStep(getNodeFromPath(type, path, n), result) + or + // Apply a fuzzy step (without advancing 'n') + path.getToken(n).getName() = "Fuzzy" and + result = Specific::getAFuzzySuccessor(getNodeFromPath(type, path, n)) + or + // Skip a fuzzy step (advance 'n' without changing the current node) + path.getToken(n - 1).getName() = "Fuzzy" and + result = getNodeFromPath(type, path, n - 1) } /** @@ -500,6 +508,14 @@ private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath, int n) // will themselves find by following type-steps. n > 0 and n < subPath.getNumToken() + or + // Apply a fuzzy step (without advancing 'n') + subPath.getToken(n).getName() = "Fuzzy" and + result = Specific::getAFuzzySuccessor(getNodeFromSubPath(base, subPath, n)) + or + // Skip a fuzzy step (advance 'n' without changing the current node) + subPath.getToken(n - 1).getName() = "Fuzzy" and + result = getNodeFromSubPath(base, subPath, n - 1) } /** @@ -561,7 +577,7 @@ private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path) */ bindingset[name] private predicate isValidTokenNameInIdentifyingAccessPath(string name) { - name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar"] + name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar", "Fuzzy"] or Specific::isExtraValidTokenNameInIdentifyingAccessPath(name) } @@ -572,7 +588,7 @@ private predicate isValidTokenNameInIdentifyingAccessPath(string name) { */ bindingset[name] private predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) { - name = "ReturnValue" + name = ["ReturnValue", "Fuzzy"] or Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name) } diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index 38a1198193e..d0a5d1b9da5 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -108,6 +108,23 @@ API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathToken token) ) } +pragma[inline] +API::Node getAFuzzySuccessor(API::Node node) { + result = node.getAMember() + or + result = node.getParameter(_) + or + result = node.getKeywordParameter(_) + or + result = node.getReturn() + or + result = node.getASubscript() + or + result = node.getAwaited() + or + result = node.getASubclass() +} + /** * Holds if `invoke` matches the PY-specific call site filter in `token`. */ diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 68de6ecd878..dffe93790d0 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -11,6 +11,11 @@ taintFlow | test.py:83:50:83:60 | ControlFlowNode for getSource() | test.py:83:8:83:61 | ControlFlowNode for Attribute() | | test.py:86:49:86:59 | ControlFlowNode for getSource() | test.py:86:8:86:60 | ControlFlowNode for Attribute() | | test.py:87:56:87:66 | ControlFlowNode for getSource() | test.py:87:8:87:67 | ControlFlowNode for Attribute() | +| test.py:114:19:114:29 | ControlFlowNode for getSource() | test.py:114:19:114:29 | ControlFlowNode for getSource() | +| test.py:115:20:115:30 | ControlFlowNode for getSource() | test.py:115:20:115:30 | ControlFlowNode for getSource() | +| test.py:116:31:116:41 | ControlFlowNode for getSource() | test.py:116:31:116:41 | ControlFlowNode for getSource() | +| test.py:117:31:117:41 | ControlFlowNode for getSource() | test.py:117:31:117:41 | ControlFlowNode for getSource() | +| test.py:118:35:118:45 | ControlFlowNode for getSource() | test.py:118:35:118:45 | ControlFlowNode for getSource() | isSink | test.py:4:8:4:8 | ControlFlowNode for x | test-sink | | test.py:7:17:7:17 | ControlFlowNode for x | test-sink | @@ -50,6 +55,11 @@ isSink | test.py:91:21:91:23 | ControlFlowNode for one | test-sink | | test.py:91:30:91:32 | ControlFlowNode for two | test-sink | | test.py:98:6:98:9 | ControlFlowNode for baz2 | test-sink | +| test.py:114:19:114:29 | ControlFlowNode for getSource() | test-sink | +| test.py:115:20:115:30 | ControlFlowNode for getSource() | test-sink | +| test.py:116:31:116:41 | ControlFlowNode for getSource() | test-sink | +| test.py:117:31:117:41 | ControlFlowNode for getSource() | test-sink | +| test.py:118:35:118:45 | ControlFlowNode for getSource() | test-sink | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | @@ -89,6 +99,12 @@ isSource | test.py:104:32:104:37 | ControlFlowNode for param2 | test-source | | test.py:107:24:107:28 | ControlFlowNode for name1 | test-source | | test.py:107:31:107:35 | ControlFlowNode for name2 | test-source | +| test.py:114:19:114:29 | ControlFlowNode for getSource() | test-source | +| test.py:115:20:115:30 | ControlFlowNode for getSource() | test-source | +| test.py:116:31:116:41 | ControlFlowNode for getSource() | test-source | +| test.py:117:31:117:41 | ControlFlowNode for getSource() | test-source | +| test.py:118:35:118:45 | ControlFlowNode for getSource() | test-source | +| test.py:119:20:119:30 | ControlFlowNode for getSource() | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index ea1a6e0d4d4..ba08c0d6fb1 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -60,7 +60,7 @@ class SubClass (ArgPos.MyClass): def foo(self, arg, named=2, otherName=3): pass - def secondAndAfter(self, arg1, arg2, arg3, arg4, arg5): + def secondAndAfter(self, arg1, arg2, arg3, arg4, arg5): pass ArgPos.anyParam(arg1, arg2, name=namedThing) @@ -72,7 +72,7 @@ mySink(Steps.preserveTaint(getSource())) # FLOW mySink(Steps.preserveTaint("safe", getSource())) # NO FLOW Steps.taintIntoCallback( - getSource(), + getSource(), lambda x: mySink(x), # FLOW lambda y: mySink(y), # FLOW lambda z: mySink(z) # NO FLOW @@ -106,3 +106,14 @@ class OtherSubClass (ArgPos.MyClass): def anyNamed(self, name1, name2=2): # Parameter[any-named] matches all non-self named parameters pass + +import testlib as testlib +import testlib.nestedlib as testlib2 +import otherlib as otherlib + +testlib.fuzzyCall(getSource()) # NOT OK +testlib2.fuzzyCall(getSource()) # NOT OK +testlib.foo.bar.baz.fuzzyCall(getSource()) # NOT OK +testlib.foo().bar().fuzzyCall(getSource()) # NOT OK +testlib.foo(lambda x: x.fuzzyCall(getSource())) # NOT OK +otherlib.fuzzyCall(getSource()) # OK diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 6c7639dc0c3..d9f270e6caf 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -51,6 +51,8 @@ class Sinks extends ModelInput::SinkModelCsv { // testing package syntax "foo1.bar;Member[baz1].Argument[any];test-sink", // "foo2;Member[bar].Member[baz2].Argument[any];test-sink", // + // testing fuzzy + "testlib;Fuzzy.Member[fuzzyCall].Argument[0];test-sink", // ] } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll index 2e598711fcc..1cb4e189339 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll @@ -454,6 +454,14 @@ private API::Node getNodeFromPath(string type, AccessPath path, int n) { or // Apply a type step typeStep(getNodeFromPath(type, path, n), result) + or + // Apply a fuzzy step (without advancing 'n') + path.getToken(n).getName() = "Fuzzy" and + result = Specific::getAFuzzySuccessor(getNodeFromPath(type, path, n)) + or + // Skip a fuzzy step (advance 'n' without changing the current node) + path.getToken(n - 1).getName() = "Fuzzy" and + result = getNodeFromPath(type, path, n - 1) } /** @@ -500,6 +508,14 @@ private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath, int n) // will themselves find by following type-steps. n > 0 and n < subPath.getNumToken() + or + // Apply a fuzzy step (without advancing 'n') + subPath.getToken(n).getName() = "Fuzzy" and + result = Specific::getAFuzzySuccessor(getNodeFromSubPath(base, subPath, n)) + or + // Skip a fuzzy step (advance 'n' without changing the current node) + subPath.getToken(n - 1).getName() = "Fuzzy" and + result = getNodeFromSubPath(base, subPath, n - 1) } /** @@ -561,7 +577,7 @@ private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path) */ bindingset[name] private predicate isValidTokenNameInIdentifyingAccessPath(string name) { - name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar"] + name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar", "Fuzzy"] or Specific::isExtraValidTokenNameInIdentifyingAccessPath(name) } @@ -572,7 +588,7 @@ private predicate isValidTokenNameInIdentifyingAccessPath(string name) { */ bindingset[name] private predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) { - name = "ReturnValue" + name = ["ReturnValue", "Fuzzy"] or Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name) } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll index ed7a331c452..7247409612d 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -176,6 +176,25 @@ API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token) { ) } +pragma[inline] +API::Node getAFuzzySuccessor(API::Node node) { + result = node.getAMember() + or + result = node.getMethod(_) + or + result = + node.getArgumentAtPosition(any(DataFlowDispatch::ArgumentPosition apos | not apos.isSelf())) + or + result = + node.getParameterAtPosition(any(DataFlowDispatch::ParameterPosition ppos | not ppos.isSelf())) + or + result = node.getReturn() + or + result = node.getAnElement() + or + result = node.getInstance() +} + /** * Holds if `invoke` matches the Ruby-specific call site filter in `token`. */ diff --git a/ruby/ql/test/library-tests/dataflow/summaries/Summaries.expected b/ruby/ql/test/library-tests/dataflow/summaries/Summaries.expected index 0597947595a..0c02c5b2004 100644 --- a/ruby/ql/test/library-tests/dataflow/summaries/Summaries.expected +++ b/ruby/ql/test/library-tests/dataflow/summaries/Summaries.expected @@ -45,6 +45,14 @@ edges | summaries.rb:1:1:1:7 | tainted | summaries.rb:147:16:147:22 | tainted | | summaries.rb:1:1:1:7 | tainted | summaries.rb:150:39:150:45 | tainted | | summaries.rb:1:1:1:7 | tainted | summaries.rb:150:39:150:45 | tainted | +| summaries.rb:1:1:1:7 | tainted | summaries.rb:154:20:154:26 | tainted | +| summaries.rb:1:1:1:7 | tainted | summaries.rb:154:20:154:26 | tainted | +| summaries.rb:1:1:1:7 | tainted | summaries.rb:155:28:155:34 | tainted | +| summaries.rb:1:1:1:7 | tainted | summaries.rb:155:28:155:34 | tainted | +| summaries.rb:1:1:1:7 | tainted | summaries.rb:156:27:156:33 | tainted | +| summaries.rb:1:1:1:7 | tainted | summaries.rb:156:27:156:33 | tainted | +| summaries.rb:1:1:1:7 | tainted | summaries.rb:158:15:158:21 | tainted | +| summaries.rb:1:1:1:7 | tainted | summaries.rb:158:15:158:21 | tainted | | summaries.rb:1:11:1:36 | call to identity | summaries.rb:1:1:1:7 | tainted | | summaries.rb:1:11:1:36 | call to identity | summaries.rb:1:1:1:7 | tainted | | summaries.rb:1:20:1:36 | call to source | summaries.rb:1:11:1:36 | call to identity | @@ -232,6 +240,9 @@ edges | summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:145:26:145:32 | tainted | | summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:147:16:147:22 | tainted | | summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:150:39:150:45 | tainted | +| summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:154:20:154:26 | tainted | +| summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:155:28:155:34 | tainted | +| summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:156:27:156:33 | tainted | | summaries.rb:122:16:122:22 | tainted | summaries.rb:122:16:122:22 | [post] tainted | | summaries.rb:122:16:122:22 | tainted | summaries.rb:122:25:122:25 | [post] y | | summaries.rb:122:16:122:22 | tainted | summaries.rb:122:33:122:33 | [post] z | @@ -475,6 +486,18 @@ nodes | summaries.rb:147:16:147:22 | tainted | semmle.label | tainted | | summaries.rb:150:39:150:45 | tainted | semmle.label | tainted | | summaries.rb:150:39:150:45 | tainted | semmle.label | tainted | +| summaries.rb:154:20:154:26 | tainted | semmle.label | tainted | +| summaries.rb:154:20:154:26 | tainted | semmle.label | tainted | +| summaries.rb:155:28:155:34 | tainted | semmle.label | tainted | +| summaries.rb:155:28:155:34 | tainted | semmle.label | tainted | +| summaries.rb:156:27:156:33 | tainted | semmle.label | tainted | +| summaries.rb:156:27:156:33 | tainted | semmle.label | tainted | +| summaries.rb:158:15:158:21 | tainted | semmle.label | tainted | +| summaries.rb:158:15:158:21 | tainted | semmle.label | tainted | +| summaries.rb:163:20:163:36 | call to source | semmle.label | call to source | +| summaries.rb:163:20:163:36 | call to source | semmle.label | call to source | +| summaries.rb:166:20:166:36 | call to source | semmle.label | call to source | +| summaries.rb:166:20:166:36 | call to source | semmle.label | call to source | subpaths invalidSpecComponent #select @@ -574,6 +597,18 @@ invalidSpecComponent | summaries.rb:147:16:147:22 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:147:16:147:22 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | | summaries.rb:150:39:150:45 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:150:39:150:45 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | | summaries.rb:150:39:150:45 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:150:39:150:45 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | +| summaries.rb:154:20:154:26 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:154:20:154:26 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | +| summaries.rb:154:20:154:26 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:154:20:154:26 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | +| summaries.rb:155:28:155:34 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:155:28:155:34 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | +| summaries.rb:155:28:155:34 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:155:28:155:34 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | +| summaries.rb:156:27:156:33 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:156:27:156:33 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | +| summaries.rb:156:27:156:33 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:156:27:156:33 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | +| summaries.rb:158:15:158:21 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:158:15:158:21 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | +| summaries.rb:158:15:158:21 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:158:15:158:21 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source | +| summaries.rb:163:20:163:36 | call to source | summaries.rb:163:20:163:36 | call to source | summaries.rb:163:20:163:36 | call to source | $@ | summaries.rb:163:20:163:36 | call to source | call to source | +| summaries.rb:163:20:163:36 | call to source | summaries.rb:163:20:163:36 | call to source | summaries.rb:163:20:163:36 | call to source | $@ | summaries.rb:163:20:163:36 | call to source | call to source | +| summaries.rb:166:20:166:36 | call to source | summaries.rb:166:20:166:36 | call to source | summaries.rb:166:20:166:36 | call to source | $@ | summaries.rb:166:20:166:36 | call to source | call to source | +| summaries.rb:166:20:166:36 | call to source | summaries.rb:166:20:166:36 | call to source | summaries.rb:166:20:166:36 | call to source | $@ | summaries.rb:166:20:166:36 | call to source | call to source | warning | CSV type row should have 3 columns but has 1: TooFewColumns | | CSV type row should have 3 columns but has 6: TooManyColumns;;Member[Foo].Instance;too;many;columns | diff --git a/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql b/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql index 11145ed991c..89dce373b32 100644 --- a/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql +++ b/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql @@ -145,6 +145,7 @@ private class SinkFromModel extends ModelInput::SinkModelCsv { "Foo!;Method[getSinks].ReturnValue.Element[any].Method[mySink].Argument[0];test-sink", // "Foo!;Method[arraySink].Argument[0].Element[any];test-sink", // "Foo!;Method[secondArrayElementIsSink].Argument[0].Element[1];test-sink", // + "FuzzyLib!;Fuzzy.Method[fuzzyCall].Argument[0];test-sink" ] } } diff --git a/ruby/ql/test/library-tests/dataflow/summaries/summaries.rb b/ruby/ql/test/library-tests/dataflow/summaries/summaries.rb index cff6f00884e..12907abb0a3 100644 --- a/ruby/ql/test/library-tests/dataflow/summaries/summaries.rb +++ b/ruby/ql/test/library-tests/dataflow/summaries/summaries.rb @@ -150,3 +150,19 @@ Foo.secondArrayElementIsSink([tainted, "safe", "safe"]) Foo.secondArrayElementIsSink(["safe", tainted, "safe"]) # $ hasValueFlow=tainted Foo.secondArrayElementIsSink(["safe", "safe", tainted]) Foo.secondArrayElementIsSink([tainted] * 10) # $ MISSING: hasValueFlow=tainted + +FuzzyLib.fuzzyCall(tainted) # $ hasValueFlow=tainted +FuzzyLib.foo.bar.fuzzyCall(tainted) # $ hasValueFlow=tainted +FuzzyLib.foo[0].fuzzyCall(tainted) # $ hasValueFlow=tainted +FuzzyLib.foo do |x| + x.fuzzyCall(tainted) # $ hasValueFlow=tainted + x.otherCall(tainted) +end +class FuzzySub < FuzzyLib::Foo + def blah + self.fuzzyCall(source("tainted")) # $ hasValueFlow=tainted + end + def self.blah + self.fuzzyCall(source("tainted")) # $ hasValueFlow=tainted + end +end