Merge pull request #18467 from github/js/shared-dataflow-branch

JS: Migrate to shared data flow library (targeting main!) 🚀
This commit is contained in:
Asger F
2025-01-16 11:28:57 +01:00
committed by GitHub
531 changed files with 28648 additions and 35414 deletions

View File

@@ -204,58 +204,45 @@ data flow solver that can check whether there is (global) data flow from a sourc
Optionally, configurations may specify extra data flow edges to be added to the data flow graph, and may also specify `barriers`. Barriers are data flow nodes or edges through
which data should not be tracked for the purposes of this analysis.
To define a configuration, extend the class ``DataFlow::Configuration`` as follows:
To define a configuration, add a module that implements the signature ``DataFlow::ConfigSig`` and pass it to ``DataFlow::Global`` as follows:
.. code-block:: ql
class MyDataFlowConfiguration extends DataFlow::Configuration {
MyDataFlowConfiguration() { this = "MyDataFlowConfiguration" }
module MyAnalysisConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { /* ... */ }
override predicate isSource(DataFlow::Node source) { /* ... */ }
predicate isSink(DataFlow::Node sink) { /* ... */ }
override predicate isSink(DataFlow::Node sink) { /* ... */ }
// optional overrides:
override predicate isBarrier(DataFlow::Node nd) { /* ... */ }
override predicate isBarrierEdge(DataFlow::Node pred, DataFlow::Node succ) { /* ... */ }
override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { /* ... */ }
// optional predicates:
predicate isBarrier(DataFlow::Node nd) { /* ... */ }
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { /* ... */ }
}
The characteristic predicate ``MyDataFlowConfiguration()`` defines the name of the configuration, so ``"MyDataFlowConfiguration"`` should be replaced by a suitable
name describing your particular analysis configuration.
module MyAnalysisFlow = DataFlow::Global<MyAnalysisConfig>
The data flow analysis is performed using the predicate ``hasFlow(source, sink)``:
The data flow analysis is performed using the predicate ``MyAnalysisFlow::flow(source, sink)``:
.. code-block:: ql
from MyDataFlowConfiguration dataflow, DataFlow::Node source, DataFlow::Node sink
where dataflow.hasFlow(source, sink)
from DataFlow::Node source, DataFlow::Node sink
where MyAnalysisFlow::flow(source, sink)
select source, "Data flow from $@ to $@.", source, source.toString(), sink, sink.toString()
Using global taint tracking
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Global taint tracking extends global data flow with additional non-value-preserving steps, such as flow through string-manipulating operations. To use it, simply extend
``TaintTracking::Configuration`` instead of ``DataFlow::Configuration``:
Global taint tracking extends global data flow with additional non-value-preserving steps, such as flow through string-manipulating operations. To use it, simply
use ``TaintTracking::Global<...>`` instead of ``DataFlow::Global<...>``:
.. code-block:: ql
class MyTaintTrackingConfiguration extends TaintTracking::Configuration {
MyTaintTrackingConfiguration() { this = "MyTaintTrackingConfiguration" }
override predicate isSource(DataFlow::Node source) { /* ... */ }
override predicate isSink(DataFlow::Node sink) { /* ... */ }
module MyAnalysisConfig implements DataFlow::ConfigSig {
/* ... */
}
Analogous to ``isAdditionalFlowStep``, there is a predicate ``isAdditionalTaintStep`` that you can override to specify custom flow steps to consider in the analysis.
Instead of the ``isBarrier`` and ``isBarrierEdge`` predicates, the taint tracking configuration includes ``isSanitizer`` and ``isSanitizerEdge`` predicates that specify
data flow nodes or edges that act as taint sanitizers and hence stop flow from a source to a sink.
module MyAnalysisFlow = TaintTracking::Global<MyAnalysisConfig>
Similar to global data flow, the characteristic predicate ``MyTaintTrackingConfiguration()`` defines the unique name of the configuration, so ``"MyTaintTrackingConfiguration"``
should be replaced by an appropriate descriptive name.
The taint tracking analysis is again performed using the predicate ``hasFlow(source, sink)``.
The taint tracking analysis is again performed using the predicate ``MyAnalysisFlow::flow(source, sink)``.
Examples
~~~~~~~~
@@ -267,20 +254,20 @@ time using global taint tracking.
import javascript
class CommandLineFileNameConfiguration extends TaintTracking::Configuration {
CommandLineFileNameConfiguration() { this = "CommandLineFileNameConfiguration" }
override predicate isSource(DataFlow::Node source) {
module CommandLineFileNameConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
DataFlow::globalVarRef("process").getAPropertyRead("argv").getAPropertyRead() = source
}
override predicate isSink(DataFlow::Node sink) {
predicate isSink(DataFlow::Node sink) {
DataFlow::moduleMember("fs", "readFile").getACall().getArgument(0) = sink
}
}
from CommandLineFileNameConfiguration cfg, DataFlow::Node source, DataFlow::Node sink
where cfg.hasFlow(source, sink)
module CommandLineFileNameFlow = TaintTracking::Global<CommandLineFileNameConfig>;
from DataFlow::Node source, DataFlow::Node sink
where CommandLineFileNameFlow::flow(source, sink)
select source, sink
This query will now find flows that involve inter-procedural steps, like in the following example (where the individual steps have been marked with comments
@@ -325,15 +312,15 @@ with an error if it does not. We could then use that function in ``readFileHelpe
}
For the purposes of our above analysis, ``checkPath`` is a `sanitizer`: its output is always untainted, even if its input is tainted. To model this
we can add an override of ``isSanitizer`` to our taint-tracking configuration like this:
we can add an ``isBarrier`` predicate to our taint-tracking configuration like this:
.. code-block:: ql
class CommandLineFileNameConfiguration extends TaintTracking::Configuration {
module CommandLineFileNameConfig implements DataFlow::ConfigSig {
// ...
override predicate isSanitizer(DataFlow::Node nd) {
predicate isBarrier(DataFlow::Node nd) {
nd.(DataFlow::CallNode).getCalleeName() = "checkPath"
}
}
@@ -359,36 +346,36 @@ Note that ``checkPath`` is now no longer a sanitizer in the sense described abov
through ``checkPath`` any more. The flow is, however, `guarded` by ``checkPath`` in the sense that the expression ``checkPath(p)`` has to evaluate
to ``true`` (or, more precisely, to a truthy value) in order for the flow to happen.
Such sanitizer guards can be supported by defining a new subclass of ``TaintTracking::SanitizerGuardNode`` and overriding the predicate
``isSanitizerGuard`` in the taint-tracking configuration class to add all instances of this class as sanitizer guards to the configuration.
Such sanitizer guards can be supported by defining a class with a ``blocksExpr`` predicate and using the `DataFlow::MakeBarrierGuard`` module
to implement the ``isBarrier`` predicate.
For our above example, we would begin by defining a subclass of ``SanitizerGuardNode`` that identifies guards of the form ``checkPath(...)``:
For our above example, we would begin by defining a subclass of ``DataFlow::CallNode`` that identifies guards of the form ``checkPath(...)``:
.. code-block:: ql
class CheckPathSanitizerGuard extends TaintTracking::SanitizerGuardNode, DataFlow::CallNode {
class CheckPathSanitizerGuard extends DataFlow::CallNode {
CheckPathSanitizerGuard() { this.getCalleeName() = "checkPath" }
override predicate sanitizes(boolean outcome, Expr e) {
predicate blocksExpr(boolean outcome, Expr e) {
outcome = true and
e = getArgument(0).asExpr()
e = this.getArgument(0).asExpr()
}
}
The characteristic predicate of this class checks that the sanitizer guard is a call to a function named ``checkPath``. The overriding definition
of ``sanitizes`` says such a call sanitizes its first argument (that is, ``getArgument(0)``) if it evaluates to ``true`` (or rather, a truthy
The characteristic predicate of this class checks that the sanitizer guard is a call to a function named ``checkPath``. The definition
of ``blocksExpr`` says such a call sanitizes its first argument (that is, ``getArgument(0)``) if it evaluates to ``true`` (or rather, a truthy
value).
Now we can override ``isSanitizerGuard`` to add these sanitizer guards to our configuration:
Now we can implement ``isBarrier`` to add this sanitizer guard to our configuration:
.. code-block:: ql
class CommandLineFileNameConfiguration extends TaintTracking::Configuration {
module CommandLineFileNameConfig implements DataFlow::ConfigSig {
// ...
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode nd) {
nd instanceof CheckPathSanitizerGuard
predicate isBarrier(DataFlow::Node node) {
node = DataFlow::MakeBarrierGuard<CheckPathSanitizerGuard>::getABarrierNode()
}
}
@@ -399,7 +386,7 @@ reach there if ``checkPath(p)`` evaluates to a truthy value. Consequently, there
Additional taint steps
~~~~~~~~~~~~~~~~~~~~~~
Sometimes the default data flow and taint steps provided by ``DataFlow::Configuration`` and ``TaintTracking::Configuration`` are not sufficient
Sometimes the default data flow and taint steps provided by the data flow library are not sufficient
and we need to add additional flow or taint steps to our configuration to make it find the expected flow. For example, this can happen because
the analyzed program uses a function from an external library whose source code is not available to the analysis, or because it uses a function
that is too difficult to analyze.
@@ -420,20 +407,20 @@ to resolve any symlinks in the path ``p`` before passing it to ``readFile``:
Resolving symlinks does not make an unsafe path any safer, so we would still like our query to flag this, but since the standard library does
not have a model of ``resolve-symlinks`` it will no longer return any results.
We can fix this quite easily by adding an overriding definition of the ``isAdditionalTaintStep`` predicate to our configuration, introducing an
We can fix this quite easily by adding a definition of the ``isAdditionalFlowStep`` predicate to our configuration, introducing an
additional taint step from the first argument of ``resolveSymlinks`` to its result:
.. code-block:: ql
class CommandLineFileNameConfiguration extends TaintTracking::Configuration {
module CommandLineFileNameConfig implements DataFlow::ConfigSig {
// ...
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::CallNode c |
c = DataFlow::moduleImport("resolve-symlinks").getACall() and
pred = c.getArgument(0) and
succ = c
node1 = c.getArgument(0) and
node2 = c
)
}
}
@@ -444,11 +431,11 @@ to wrap it in a new subclass of ``TaintTracking::SharedTaintStep`` like this:
.. code-block:: ql
class StepThroughResolveSymlinks extends TaintTracking::SharedTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
override predicate step(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::CallNode c |
c = DataFlow::moduleImport("resolve-symlinks").getACall() and
pred = c.getArgument(0) and
succ = c
node1 = c.getArgument(0) and
node2 = c
)
}
}
@@ -494,18 +481,18 @@ Exercise 2
import javascript
class HardCodedTagNameConfiguration extends DataFlow::Configuration {
HardCodedTagNameConfiguration() { this = "HardCodedTagNameConfiguration" }
module HardCodedTagNameConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source.asExpr() instanceof ConstantString }
override predicate isSource(DataFlow::Node source) { source.asExpr() instanceof ConstantString }
override predicate isSink(DataFlow::Node sink) {
predicate isSink(DataFlow::Node sink) {
sink = DataFlow::globalVarRef("document").getAMethodCall("createElement").getArgument(0)
}
}
from HardCodedTagNameConfiguration cfg, DataFlow::Node source, DataFlow::Node sink
where cfg.hasFlow(source, sink)
module HardCodedTagNameFlow = DataFlow::Global<HardCodedTagNameConfig>;
from DataFlow::Node source, DataFlow::Node sink
where HardCodedTagNameFlow::flow(source, sink)
select source, sink
Exercise 3
@@ -540,18 +527,18 @@ Exercise 4
}
}
class HardCodedTagNameConfiguration extends DataFlow::Configuration {
HardCodedTagNameConfiguration() { this = "HardCodedTagNameConfiguration" }
module HardCodedTagNameConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof ArrayEntryCallResult }
override predicate isSource(DataFlow::Node source) { source instanceof ArrayEntryCallResult }
override predicate isSink(DataFlow::Node sink) {
predicate isSink(DataFlow::Node sink) {
sink = DataFlow::globalVarRef("document").getAMethodCall("createElement").getArgument(0)
}
}
from HardCodedTagNameConfiguration cfg, DataFlow::Node source, DataFlow::Node sink
where cfg.hasFlow(source, sink)
module HardCodedTagNameFlow = DataFlow::Global<HardCodedTagNameConfig>;
from DataFlow::Node source, DataFlow::Node sink
where HardCodedTagNameFlow::flow(source, sink)
select source, sink
Further reading

View File

@@ -18,6 +18,7 @@ Experiment and learn how to write effective and efficient queries for CodeQL dat
abstract-syntax-tree-classes-for-working-with-javascript-and-typescript-programs
data-flow-cheat-sheet-for-javascript
customizing-library-models-for-javascript
migrating-javascript-dataflow-queries
- :doc:`Basic query for JavaScript and TypeScript code <basic-query-for-javascript-code>`: Learn to write and run a simple CodeQL query.
@@ -37,4 +38,6 @@ Experiment and learn how to write effective and efficient queries for CodeQL dat
- :doc:`Data flow cheat sheet for JavaScript <data-flow-cheat-sheet-for-javascript>`: This article describes parts of the JavaScript libraries commonly used for variant analysis and in data flow queries.
- :doc:`Customizing library models for JavaScript <customizing-library-models-for-javascript>`: You can model frameworks and libraries that your codebase depends on using data extensions and publish them as CodeQL model packs.
- :doc:`Customizing library models for JavaScript <customizing-library-models-for-javascript>`: You can model frameworks and libraries that your codebase depends on using data extensions and publish them as CodeQL model packs.
- :doc:`Migrating JavaScript dataflow queries <migrating-javascript-dataflow-queries>`: Guide on migrating data flow queries to the new data flow library.

View File

@@ -700,19 +700,16 @@ The data flow graph-based analyses described so far are all intraprocedural: the
We distinguish here between data flow proper, and *taint tracking*: the latter not only considers value-preserving flow (such as from variable definitions to uses), but also cases where one value influences ("taints") another without determining it entirely. For example, in the assignment ``s2 = s1.substring(i)``, the value of ``s1`` influences the value of ``s2``, because ``s2`` is assigned a substring of ``s1``. In general, ``s2`` will not be assigned ``s1`` itself, so there is no data flow from ``s1`` to ``s2``, but ``s1`` still taints ``s2``.
It is a common pattern that we wish to specify data flow or taint analysis in terms of its *sources* (where flow starts), *sinks* (where it should be tracked), and *barriers* or *sanitizers* (where flow is interrupted). Sanitizers they are very common in security analyses: for example, an analysis that tracks the flow of untrusted user input into, say, a SQL query has to keep track of code that validates the input, thereby making it safe to use. Such a validation step is an example of a sanitizer.
It is a common pattern that we wish to specify data flow or taint analysis in terms of its *sources* (where flow starts), *sinks* (where it should be tracked), and *barriers* (also called *sanitizers*) where flow is interrupted. Sanitizers they are very common in security analyses: for example, an analysis that tracks the flow of untrusted user input into, say, a SQL query has to keep track of code that validates the input, thereby making it safe to use. Such a validation step is an example of a sanitizer.
The classes ``DataFlow::Configuration`` and ``TaintTracking::Configuration`` allow specifying a data flow or taint analysis, respectively, by overriding the following predicates:
A module implementing the signature `DataFlow::ConfigSig` may specify a data flow or taint analysis by implementing the following predicates:
- ``isSource(DataFlow::Node nd)`` selects all nodes ``nd`` from where flow tracking starts.
- ``isSink(DataFlow::Node nd)`` selects all nodes ``nd`` to which the flow is tracked.
- ``isBarrier(DataFlow::Node nd)`` selects all nodes ``nd`` that act as a barrier for data flow; ``isSanitizer`` is the corresponding predicate for taint tracking configurations.
- ``isBarrierEdge(DataFlow::Node src, DataFlow::Node trg)`` is a variant of ``isBarrier(nd)`` that allows specifying barrier *edges* in addition to barrier nodes; again, ``isSanitizerEdge`` is the corresponding predicate for taint tracking;
- ``isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg)`` allows specifying custom additional flow steps for this analysis; ``isAdditionalTaintStep`` is the corresponding predicate for taint tracking configurations.
- ``isBarrier(DataFlow::Node nd)`` selects all nodes ``nd`` that act as a barrier/sanitizer for data flow.
- ``isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg)`` allows specifying custom additional flow steps for this analysis.
Since for technical reasons both ``Configuration`` classes are subtypes of ``string``, you have to choose a unique name for each flow configuration and equate ``this`` with it in the characteristic predicate (as in the example below).
The predicate ``Configuration.hasFlow`` performs the actual flow tracking, starting at a source and looking for flow to a sink that does not pass through a barrier node or edge.
Such a module can be passed to ``DataFlow::Global<...>``. This will produce a module with a ``flow`` predicate that performs the actual flow tracking, starting at a source and looking for flow to a sink that does not pass through a barrier node.
For example, suppose that we are developing an analysis to find hard-coded passwords. We might write a simple query that looks for string constants flowing into variables named ``"password"``.
@@ -720,35 +717,27 @@ For example, suppose that we are developing an analysis to find hard-coded passw
import javascript
class PasswordTracker extends DataFlow::Configuration {
PasswordTracker() {
// unique identifier for this configuration
this = "PasswordTracker"
}
module PasswordConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node nd) { nd.asExpr() instanceof StringLiteral }
override predicate isSource(DataFlow::Node nd) {
nd.asExpr() instanceof StringLiteral
}
override predicate isSink(DataFlow::Node nd) {
passwordVarAssign(_, nd)
}
predicate passwordVarAssign(Variable v, DataFlow::Node nd) {
v.getAnAssignedExpr() = nd.asExpr() and
v.getName().toLowerCase() = "password"
}
predicate isSink(DataFlow::Node nd) { passwordVarAssign(_, nd) }
}
Now we can rephrase our query to use ``Configuration.hasFlow``:
predicate passwordVarAssign(Variable v, DataFlow::Node nd) {
v.getAnAssignedExpr() = nd.asExpr() and
v.getName().toLowerCase() = "password"
}
module PasswordFlow = DataFlow::Global<PasswordConfig>;
Now we can rephrase our query to use ``PasswordFlow::flow``:
.. code-block:: ql
from PasswordTracker pt, DataFlow::Node source, DataFlow::Node sink, Variable v
where pt.hasFlow(source, sink) and pt.passwordVarAssign(v, sink)
from DataFlow::Node source, DataFlow::Node sink, Variable v
where PasswordFlow::flow(_, sink) and passwordVarAssign(v, sink)
select sink, "Password variable " + v + " is assigned a constant string."
Syntax errors
~~~~~~~~~~~~~

View File

@@ -16,18 +16,17 @@ Use the following template to create a taint tracking path query:
* @kind path-problem
*/
import javascript
import DataFlow
import DataFlow::PathGraph
class MyConfig extends TaintTracking::Configuration {
MyConfig() { this = "MyConfig" }
override predicate isSource(Node node) { ... }
override predicate isSink(Node node) { ... }
override predicate isAdditionalTaintStep(Node pred, Node succ) { ... }
module MyConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { ... }
predicate isSink(DataFlow::Node node) { ... }
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { ... }
}
from MyConfig cfg, PathNode source, PathNode sink
where cfg.hasFlowPath(source, sink)
module MyFlow = TaintTracking::Global<MyConfig>;
from MyFlow::PathNode source, MyFlow::PathNode sink
where MyFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "taint from $@.", source.getNode(), "here"
This query reports flow paths which:

View File

@@ -0,0 +1,301 @@
.. _migrating-javascript-dataflow-queries:
Migrating JavaScript Dataflow Queries
=====================================
The JavaScript analysis used to have its own data flow library, which differed from the shared data flow
library used by other languages. This library has now been deprecated in favor of the shared library.
This article explains how to migrate JavaScript data flow queries to use the shared data flow library,
and some important differences to be aware of. Note that the article on :ref:`analyzing data flow in JavaScript and TypeScript <analyzing-data-flow-in-javascript-and-typescript>`
provides a general guide to the new data flow library, whereas this article aims to help with migrating existing queries from the old data flow library.
Note that the ``DataFlow::Configuration`` class is still backed by the original data flow library, but has been marked as deprecated.
This means data flow queries using this class will continue to work, albeit with deprecation warnings, until the 1-year deprecation period expires in early 2026.
It is recommended that all custom queries are migrated before this time, to ensure they continue to work in the future.
Data flow queries should be migrated to use ``DataFlow::ConfigSig``-style modules instead of the ``DataFlow::Configuration`` class.
This is identical to the interface found in other languages.
When making this switch, the query will become backed by the shared data flow library instead. That is, data flow queries will only work
with the shared data flow library when they have been migrated to ``ConfigSig``-style, as shown in the following table:
.. list-table:: Data flow libraries
:widths: 20 80
:header-rows: 1
* - API
- Implementation
* - ``DataFlow::Configuration``
- Old library (deprecated, to be removed in early 2026)
* - ``DataFlow::ConfigSig``
- Shared library
A straightforward translation to ``DataFlow::ConfigSig``-style is usually possible, although there are some complications
that may cause the query to behave differently.
We'll first cover some straightforward migration examples, and then go over some of the complications that may arise.
Simple migration example
------------------------
A simple example of a query using the old data flow library is shown below:
.. code-block:: ql
/** @kind path-problem */
import javascript
import DataFlow::PathGraph
class MyConfig extends DataFlow::Configuration {
MyConfig() { this = "MyConfig" }
override predicate isSource(DataFlow::Node node) { ... }
override predicate isSink(DataFlow::Node node) { ... }
}
from MyConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink, source, sink, "Flow found"
With the new style this would look like this:
.. code-block:: ql
/** @kind path-problem */
import javascript
module MyConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { ... }
predicate isSink(DataFlow::Node node) { ... }
}
module MyFlow = DataFlow::Global<MyConfig>;
import MyFlow::PathGraph
from MyFlow::PathNode source, MyFlow::PathNode sink
where MyFlow::flowPath(source, sink)
select sink, source, sink, "Flow found"
The changes can be summarized as:
- The ``DataFlow::Configuration`` class was replaced with a module implementing ``DataFlow::ConfigSig``.
- The characteristic predicate was removed (modules have no characteristic predicates).
- Predicates such as ``isSource`` no longer have the ``override`` keyword (as they are defined in a module now).
- The configuration module is being passed to ``DataFlow::Global``, resulting in a new module, called ``MyFlow`` in this example.
- The query imports ``MyFlow::PathGraph`` instead of ``DataFlow::PathGraph``.
- The ``MyConfig cfg`` variable was removed from the ``from`` clause.
- The ``hasFlowPath`` call was replaced with ``MyFlow::flowPath``.
- The type ``DataFlow::PathNode`` was replaced with ``MyFlow::PathNode``.
With these changes, we have produced an equivalent query that is backed by the new data flow library.
Taint tracking
--------------
For configuration classes extending ``TaintTracking::Configuration``, the migration is similar but with a few differences:
- The ``TaintTracking::Global`` module should be used instead of ``DataFlow::Global``.
- Some predicates originating from ``TaintTracking::Configuration`` should be renamed to match the ``DataFlow::ConfigSig`` interface:
- ``isSanitizer`` should be renamed to ``isBarrier``.
- ``isAdditionalTaintStep`` should be renamed to ``isAdditionalFlowStep``.
Note that there is no such thing as ``TaintTracking::ConfigSig``. The ``DataFlow::ConfigSig`` interface is used for both data flow and taint tracking.
For example:
.. code-block:: ql
class MyConfig extends TaintTracking::Configuration {
MyConfig() { this = "MyConfig" }
predicate isSanitizer(DataFlow::Node node) { ... }
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { ... }
...
}
The above configuration can be migrated to the shared data flow library as follows:
.. code-block:: ql
module MyConfig implements DataFlow::ConfigSig {
predicate isBarrier(DataFlow::Node node) { ... }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { ... }
...
}
module MyFlow = TaintTracking::Global<MyConfig>;
Flow labels and flow states
---------------------------
The ``DataFlow::FlowLabel`` class has been deprecated. Queries that relied on flow labels should use the new `flow state` concept instead.
This is done by implementing ``DataFlow::StateConfigSig`` instead of ``DataFlow::ConfigSig``, and passing the module to ``DataFlow::GlobalWithState``
or ``TaintTracking::GlobalWithState``. See :ref:`using flow state <using-flow-labels-for-precise-data-flow-analysis>` for more details about flow state.
Some changes to be aware of:
- The 4-argument version of ``isAdditionalFlowStep`` now takes parameters in a different order.
It now takes ``node1, state1, node2, state2`` instead of ``node1, node2, state1, state2``.
- Taint steps apply to all flow states, not just the ``taint`` flow label. See more details further down in this article.
Barrier guards
--------------
The predicates ``isBarrierGuard`` and ``isSanitizerGuard`` have been removed.
Instead, the ``isBarrier`` predicate must be used to define all barriers. To do this, barrier guards can be reduced to a set of barrier nodes using the ``DataFlow::MakeBarrierGuard`` module.
For example, consider this data flow configuration using a barrier guard:
.. code-block:: ql
class MyConfig extends DataFlow::Configuration {
override predicate isBarrierGuard(DataFlow::BarrierGuardNode node) {
node instanceof MyBarrierGuard
}
..
}
class MyBarrierGuard extends DataFlow::BarrierGuardNode {
MyBarrierGuard() { ... }
override predicate blocks(Expr e, boolean outcome) { ... }
}
This can be migrated to the shared data flow library as follows:
.. code-block:: ql
module MyConfig implements DataFlow::ConfigSig {
predicate isBarrier(DataFlow::Node node) {
node = DataFlow::MakeBarrierGuard<MyBarrierGuard>::getABarrierNode()
}
..
}
class MyBarrierGuard extends DataFlow::Node {
MyBarrierGuard() { ... }
predicate blocksExpr(Expr e, boolean outcome) { ... }
}
The changes can be summarized as:
- The contents of ``isBarrierGuard`` have been moved to ``isBarrier``.
- The ``node instanceof MyBarrierGuard`` check was replaced with ``node = DataFlow::MakeBarrierGuard<MyBarrierGuard>::getABarrierNode()``.
- The ``MyBarrierGuard`` class no longer has ``DataFlow::BarrierGuardNode`` as a base class. We simply use ``DataFlow::Node`` instead.
- The ``blocks`` predicate has been renamed to ``blocksExpr`` and no longer has the ``override`` keyword.
See :ref:`using flow state <using-flow-labels-for-precise-data-flow-analysis>` for examples of how to use barrier guards with flow state.
Query-specific load and store steps
-----------------------------------
The predicates ``isAdditionalLoadStep``, ``isAdditionalStoreStep``, and ``isAdditionalLoadStoreStep`` have been removed. There is no way to emulate the original behavior.
Library models can still contribute such steps, but they will be applicable to all queries. Also see the section on jump steps further down.
Changes in behavior
--------------------
When the query has been migrated to the new interface, it may seem to behave differently due to some technical differences in the internals of
the two data flow libraries. The most significant changes are described below.
Taint steps now propagate all flow states
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There's an important change from the old data flow library when using flow state and taint-tracking together.
When using ``TaintTracking::GlobalWithState``, all flow states can propagate along taint steps.
In the old data flow library, only the ``taint`` flow label could propagate along taint steps.
A straightforward translation of such a query may therefore result in new flow paths being found, which might be unexpected.
To emulate the old behavior, use ``DataFlow::GlobalWithState`` instead of ``TaintTracking::GlobalWithState``,
and manually add taint steps using ``isAdditionalFlowStep``. The predicate ``TaintTracking::defaultTaintStep`` can be used to access to the set of taint steps.
For example:
.. code-block:: ql
module MyConfig implements DataFlow::StateConfigSig {
class FlowState extends string {
FlowState() { this = ["taint", "foo"] }
}
predicate isAdditionalFlowStep(DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2) {
// Allow taint steps to propagate the "taint" flow state
TaintTracking::defaultTaintStep(node1, node2) and
state1 = "taint" and
state2 = state
}
...
}
module MyFlow = DataFlow::GlobalWithState<MyConfig>;
Jump steps across function boundaries
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When a flow step crosses a function boundary, that is, it starts and ends in two different functions, it will now be classified as a "jump" step.
Jump steps can be problematic in some cases. Roughly speaking, the data flow library will "forget" which call site it came from when following a jump step.
This can lead to spurious flow paths that go into a function through one call site, and back out of a different call site.
If the step was generated by a library model, that is, the step is applicable to all queries, this is best mitigated by converting the step to a flow summary.
For example, the following library model adds a taint step from ``x`` to ``y`` in ``foo.bar(x, y => {})``:
.. code-block:: ql
class MyStep extends TaintTracking::SharedTaintStep {
override predicate step(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::CallNode call |
call = DataFlow::moduleMember("foo", "bar").getACall() and
node1 = call.getArgument(0) and
node2 = call.getCallback(1).getParameter(0)
)
}
}
Because this step crosses a function boundary, it becomes a jump step. This can be avoided by converting it to a flow summary as follows:
.. code-block:: ql
class MySummary extends DataFlow::SummarizedCallable {
MySummary() { this = "MySummary" }
override DataFlow::CallNode getACall() { result = DataFlow::moduleMember("foo", "bar").getACall() }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[this]" and
output = "Argument[1].Parameter[0]" and
preservesValue = false // taint step
}
}
See :ref:`customizing library models for JavaScript <customizing-library-models-for-javascript>` for details about the format of the ``input`` and ``output`` strings.
The aforementioned article also provides guidance on how to store the flow summary in a data extension.
For query-specific steps that cross function boundaries, that is, steps added with ``isAdditionalFlowStep``, there is currently no way to emulate the original behavior.
A possible workaround is to convert the query-specific step to a flow summary. In this case it should be stored in a data extension to avoid performance issues, although this also means
that all other queries will be able to use the flow summary.
Barriers block all flows
~~~~~~~~~~~~~~~~~~~~~~~~
In the shared data flow library, a barrier blocks all flows, even if the tracked value is inside a content.
In the old data flow library, only barriers specific to the ``data`` flow label blocked flows when the tracked value was inside a content.
This rarely has significant impact, but some users may observe some result changes because of this.
There is currently no way to emulate the original behavior.
Further reading
---------------
- :ref:`Analyzing data flow in JavaScript and TypeScript <analyzing-data-flow-in-javascript-and-typescript>` provides a general guide to the new data flow library.
- :ref:`Using flow state for precise data flow analysis <using-flow-labels-for-precise-data-flow-analysis>` provides a general guide on using flow state.

View File

@@ -1,9 +1,9 @@
.. _using-flow-labels-for-precise-data-flow-analysis:
Using flow labels for precise data flow analysis
Using flow state for precise data flow analysis
================================================
You can associate flow labels with each value tracked by the flow analysis to determine whether the flow contains potential vulnerabilities.
You can associate a flow state with each value tracked by the flow analysis to determine whether the flow contains potential vulnerabilities.
Overview
--------
@@ -16,9 +16,9 @@ program, and associates a flag with every data value telling us whether it might
source node.
In some cases, you may want to track more detailed information about data values. This can be done
by associating flow labels with data values, as shown in this tutorial. We will first discuss the
general idea behind flow labels and then show how to use them in practice. Finally, we will give an
overview of the API involved and provide some pointers to standard queries that use flow labels.
by associating flow states with data values, as shown in this tutorial. We will first discuss the
general idea behind flow states and then show how to use them in practice. Finally, we will give an
overview of the API involved and provide some pointers to standard queries that use flow states.
Limitations of basic data-flow analysis
---------------------------------------
@@ -47,22 +47,21 @@ contain ``..`` components. Untrusted user input has both bits set initially, ind
off individual bits, and if a value that has at least one bit set is interpreted as a path, a
potential vulnerability is flagged.
Using flow labels
Using flow states
-----------------
You can handle these cases and others like them by associating a set of `flow labels` (sometimes
also referred to as `taint kinds`) with each value being tracked by the analysis. Value-preserving
You can handle these cases and others like them by associating a set of `flow states` (sometimes
also referred to as `flow labels` or `taint kinds`) with each value being tracked by the analysis. Value-preserving
data-flow steps (such as flow steps from writes to a variable to its reads) preserve the set of flow
labels, but other steps may add or remove flow labels. Sanitizers, in particular, are simply flow
steps that remove some or all flow labels. The initial set of flow labels for a value is determined
states, but other steps may add or remove flow states. The initial set of flow states for a value is determined
by the source node that gives rise to it. Similarly, sink nodes can specify that an incoming value
needs to have a certain flow label (or one of a set of flow labels) in order for the flow to be
needs to have a certain flow state (or one of a set of flow states) in order for the flow to be
flagged as a potential vulnerability.
Example
-------
As an example of using flow labels, we will show how to write a query that flags property accesses
As an example of using flow state, we will show how to write a query that flags property accesses
on JSON values that come from user-controlled input where we have not checked whether the value is
``null``, so that the property access may cause a runtime exception.
@@ -88,8 +87,8 @@ This code, on the other hand, should not be flagged:
}
}
We will first try to write a query to find this kind of problem without flow labels, and use the
difficulties we encounter as a motivation for bringing flow labels into play, which will make the
We will first try to write a query to find this kind of problem without flow state, and use the
difficulties we encounter as a motivation for bringing flow state into play, which will make the
query much easier to implement.
To get started, let's write a query that simply flags any flow from ``JSON.parse`` into the base of
@@ -99,24 +98,24 @@ a property access:
import javascript
class JsonTrackingConfig extends DataFlow::Configuration {
JsonTrackingConfig() { this = "JsonTrackingConfig" }
override predicate isSource(DataFlow::Node nd) {
module JsonTrackingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node nd) {
exists(JsonParserCall jpc |
nd = jpc.getOutput()
)
}
override predicate isSink(DataFlow::Node nd) {
predicate isSink(DataFlow::Node nd) {
exists(DataFlow::PropRef pr |
nd = pr.getBase()
)
}
}
from JsonTrackingConfig cfg, DataFlow::Node source, DataFlow::Node sink
where cfg.hasFlow(source, sink)
module JsonTrackingFlow = DataFlow::Global<JsonTrackingConfig>;
from DataFlow::Node source, DataFlow::Node sink
where JsonTrackingFlow::flow(source, sink)
select sink, "Property access on JSON value originating $@.", source, "here"
Note that we use the ``JsonParserCall`` class from the standard library to model various JSON
@@ -127,8 +126,7 @@ introduced any sanitizers yet.
There are many ways of checking for nullness directly or indirectly. Since this is not the main
focus of this tutorial, we will only show how to model one specific case: if some variable ``v`` is
known to be truthy, it cannot be ``null``. This kind of condition is easily expressed using a
``BarrierGuardNode`` (or its counterpart ``SanitizerGuardNode`` for taint-tracking configurations).
known to be truthy, it cannot be ``null``. This kind of condition is expressed using a "barrier guard".
A barrier guard node is a data-flow node ``b`` that blocks flow through some other node ``nd``,
provided that some condition checked at ``b`` is known to hold, that is, evaluate to a truthy value.
@@ -139,29 +137,29 @@ is a barrier guard blocking flow through the use of ``data`` on the right-hand s
At this point we know that ``data`` has evaluated to a truthy value, so it cannot be ``null``
anymore.
Implementing this additional condition is easy. We implement a subclass of ``DataFlow::BarrierGuardNode``:
Implementing this additional condition is easy. We implement a class with a predicate called ``blocksExpr``:
.. code-block:: ql
class TruthinessCheck extends DataFlow::BarrierGuardNode, DataFlow::ValueNode {
class TruthinessCheck extends DataFlow::ValueNode {
SsaVariable v;
TruthinessCheck() {
astNode = v.getAUse()
}
override predicate blocks(boolean outcome, Expr e) {
predicate blocksExpr(boolean outcome, Expr e) {
outcome = true and
e = astNode
}
}
and then use it to override predicate ``isBarrierGuard`` in our configuration class:
and then use it to implement the predicate ``isBarrier`` in our configuration module:
.. code-block:: ql
override predicate isBarrierGuard(DataFlow::BarrierGuardNode guard) {
guard instanceof TruthinessCheck
predicate isBarrier(DataFlow::Node node) {
node = DataFlow::MakeBarrierGuard<TruthinessCheck>::getABarrierNode()
}
With this change, we now flag the problematic case and don't flag the unproblematic case above.
@@ -182,11 +180,11 @@ checked for null-guardedness:
}
}
We could try to remedy the situation by overriding ``isAdditionalFlowStep`` in our configuration class to track values through property reads:
We could try to remedy the situation by adding ``isAdditionalFlowStep`` in our configuration module to track values through property reads:
.. code-block:: ql
override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
succ.(DataFlow::PropRead).getBase() = pred
}
@@ -199,79 +197,86 @@ altogether, it should simply record the fact that ``root`` itself is known to be
Any property read from ``root``, on the other hand, may well be null and needs to be checked
separately.
We can achieve this by introducing two different flow labels, ``json`` and ``maybe-null``. The former
We can achieve this by introducing two different flow states, ``json`` and ``maybe-null``. The former
means that the value we are dealing with comes from a JSON object, the latter that it may be
``null``. The result of any call to ``JSON.parse`` has both labels. A property read from a value
with label ``json`` also has both labels. Checking truthiness removes the ``maybe-null`` label.
Accessing a property on a value that has the ``maybe-null`` label should be flagged.
``null``. The result of any call to ``JSON.parse`` has both states. A property read from a value
with state ``json`` also results in a value with both states. Checking truthiness removes the ``maybe-null`` state.
Accessing a property on a value that has the ``maybe-null`` state should be flagged.
To implement this, we start by defining two new subclasses of the class ``DataFlow::FlowLabel``:
To implement this, we first change the signature of our configuration module to ``DataFlow::StateConfigSig``, and
replace ``DataFlow::Global<...>`` with ``DataFlow::GlobalWithState<...>``:
.. code-block:: ql
class JsonLabel extends DataFlow::FlowLabel {
JsonLabel() {
this = "json"
}
module JsonTrackingConfig implements DataFlow::StateConfigSig {
/* ... */
}
class MaybeNullLabel extends DataFlow::FlowLabel {
MaybeNullLabel() {
this = "maybe-null"
}
}
module JsonTrackingFlow = DataFlow::GlobalWithState<JsonTrackingConfig>;
Then we extend our ``isSource`` predicate from above to track flow labels by overriding the two-argument version instead of the one-argument version:
We then add a class called ``FlowState`` which has one value for each flow state:
.. code-block:: ql
override predicate isSource(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
module JsonTrackingConfig implements DataFlow::StateConfigSig {
class FlowState extends string {
FlowState() {
this = ["json", "maybe-null"]
}
}
/* ... */
}
Then we extend our ``isSource`` predicate with an additional parameter to specify the flow state:
.. code-block:: ql
predicate isSource(DataFlow::Node nd, FlowState state) {
exists(JsonParserCall jpc |
nd = jpc.getOutput() and
(lbl instanceof JsonLabel or lbl instanceof MaybeNullLabel)
state = ["json", "maybe-null"] // start in either state
)
}
Similarly, we make ``isSink`` flow-label aware and require the base of the property read to have the ``maybe-null`` label:
Similarly, we update ``isSink`` and require the base of the property read to have the ``maybe-null`` state:
.. code-block:: ql
override predicate isSink(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
predicate isSink(DataFlow::Node nd, FlowState state) {
exists(DataFlow::PropRef pr |
nd = pr.getBase() and
lbl instanceof MaybeNullLabel
state = "maybe-null"
)
}
Our overriding definition of ``isAdditionalFlowStep`` now needs to specify two flow labels, a
predecessor label ``predlbl`` and a successor label ``succlbl``. In addition to specifying flow from
the predecessor node ``pred`` to the successor node ``succ``, it requires that ``pred`` has label
``predlbl``, and adds label ``succlbl`` to ``succ``. In our case, we use this to add both the
``json`` label and the ``maybe-null`` label to any property read from a value labeled with ``json``
(no matter whether it has the ``maybe-null`` label):
Our definition of ``isAdditionalFlowStep`` now needs to specify two flow states, a
predecessor state ``predState`` and a successor state ``succState``. In addition to specifying flow from
the predecessor node ``pred`` to the successor node ``succ``, it requires that ``pred`` has state
``predState``, and adds state ``succState`` to ``succ``. In our case, we use this to add both the
``json`` state and the ``maybe-null`` state to any property read from a value in the ``json`` state
(no matter whether it has the ``maybe-null`` state):
.. code-block:: ql
override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ,
DataFlow::FlowLabel predlbl, DataFlow::FlowLabel succlbl) {
predicate isAdditionalFlowStep(DataFlow::Node pred, FlowState predState,
DataFlow::Node succ, FlowState succState) {
succ.(DataFlow::PropRead).getBase() = pred and
predlbl instanceof JsonLabel and
(succlbl instanceof JsonLabel or succlbl instanceof MaybeNullLabel)
predState = "json" and
succState = ["json", "maybe-null"]
}
Finally, we turn ``TruthinessCheck`` from a ``BarrierGuardNode`` into a ``LabeledBarrierGuardNode``,
specifying that it only removes the ``maybe-null`` label (but not the ``json`` label) from the
sanitized value:
Finally, we add an additional parameter to the ``isBarrier`` predicate to specify the flow state
to block at the ``TruthinessCheck`` barrier.
.. code-block:: ql
class TruthinessCheck extends DataFlow::LabeledBarrierGuardNode, DataFlow::ValueNode {
...
module JsonTrackingConfig implements DataFlow::StateConfigSig {
/* ... */
override predicate blocks(boolean outcome, Expr e, DataFlow::FlowLabel lbl) {
outcome = true and
e = astNode and
lbl instanceof MaybeNullLabel
predicate isBarrier(DataFlow::Node node, FlowState state) {
node = DataFlow::MakeBarrierGuard<TruthinessCheck>::getABarrierNode() and
state = "maybe-null"
}
}
@@ -283,66 +288,60 @@ step by step in the UI:
/** @kind path-problem */
import javascript
import DataFlow::PathGraph
class JsonLabel extends DataFlow::FlowLabel {
JsonLabel() {
this = "json"
}
}
class MaybeNullLabel extends DataFlow::FlowLabel {
MaybeNullLabel() {
this = "maybe-null"
}
}
class TruthinessCheck extends DataFlow::LabeledBarrierGuardNode, DataFlow::ValueNode {
class TruthinessCheck extends DataFlow::ValueNode {
SsaVariable v;
TruthinessCheck() {
astNode = v.getAUse()
}
override predicate blocks(boolean outcome, Expr e, DataFlow::FlowLabel lbl) {
predicate blocksExpr(boolean outcome, Expr e, JsonTrackingConfig::FlowState state) {
outcome = true and
e = astNode and
lbl instanceof MaybeNullLabel
state = "maybe-null"
}
}
class JsonTrackingConfig extends DataFlow::Configuration {
JsonTrackingConfig() { this = "JsonTrackingConfig" }
module JsonTrackingConfig implements DataFlow::StateConfigSig {
class FlowState extends string {
FlowState() {
this = ["json", "maybe-null"]
}
}
override predicate isSource(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
predicate isSource(DataFlow::Node nd, FlowState state) {
exists(JsonParserCall jpc |
nd = jpc.getOutput() and
(lbl instanceof JsonLabel or lbl instanceof MaybeNullLabel)
state = ["json", "maybe-null"] // start in either state
)
}
override predicate isSink(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
predicate isSink(DataFlow::Node nd, FlowState state) {
exists(DataFlow::PropRef pr |
nd = pr.getBase() and
lbl instanceof MaybeNullLabel
state = "maybe-null"
)
}
override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ,
DataFlow::FlowLabel predlbl, DataFlow::FlowLabel succlbl) {
predicate isAdditionalFlowStep(DataFlow::Node pred, FlowState predState,
DataFlow::Node succ, FlowState succState) {
succ.(DataFlow::PropRead).getBase() = pred and
predlbl instanceof JsonLabel and
(succlbl instanceof JsonLabel or succlbl instanceof MaybeNullLabel)
predState = "json" and
succState = ["json", "maybe-null"]
}
override predicate isBarrierGuard(DataFlow::BarrierGuardNode guard) {
guard instanceof TruthinessCheck
predicate isBarrier(DataFlow::Node node, FlowState state) {
node = DataFlow::MakeBarrierGuard<TruthinessCheck>::getABarrierNode() and
state = "maybe-null"
}
}
from JsonTrackingConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink, source, sink, "Property access on JSON value originating $@.", source, "here"
module JsonTrackingFlow = DataFlow::GlobalWithState<JsonTrackingConfig>;
from DataFlow::Node source, DataFlow::Node sink
where JsonTrackingFlow::flow(source, sink)
select sink, "Property access on JSON value originating $@.", source, "here"
We ran this query on the https://github.com/finos/plexus-interop repository. Many of the
results were false positives since the query does not currently model many ways in which we can check
@@ -354,52 +353,30 @@ this tutorial.
API
---
Plain data-flow configurations implicitly use a single flow label "data", which indicates that a
data value originated from a source. You can use the predicate ``DataFlow::FlowLabel::data()``,
which returns this flow label, as a symbolic name for it.
Flow state can be used in modules implementing the ``DataFlow::StateConfigSig`` signature. Compared to a ``DataFlow::ConfigSig`` the main differences are:
Taint-tracking configurations add a second flow label "taint" (``DataFlow::FlowLabel::taint()``),
which is similar to "data", but includes values that have passed through non-value preserving steps
such as string operations.
- The module must be passed to ``DataFlow::GlobalWithState<...>`` or ``TaintTracking::GlobalWithState<...>``.
instead of ``DataFlow::Global<...>`` or ``TaintTracking::Global<...>``.
- The module must contain a type named ``FlowState``.
- ``isSource`` expects an additional parameter specifying the flow state.
- ``isSink`` optionally can take an additional parameter specifying the flow state.
If omitted, the sinks are in effect for all flow states.
- ``isAdditionalFlowStep`` optionally can take two additional parameters specifying the predecessor and successor flow states.
If omitted, the generated steps apply for any flow state and preserve the current flow state.
- ``isBarrier`` optionally can take an additional parameter specifying the flow state to block.
If omitted, the barriers block all flow states.
Each of the three member predicates ``isSource``, ``isSink`` and
``isAdditionalFlowStep``/``isAdditionalTaintStep`` has one version that uses the default flow
labels, and one version that allows specifying custom flow labels through additional arguments.
For ``isSource``, there is one additional argument specifying which flow label(s) should be
associated with values originating from this source. If multiple flow labels are specified, each
value is associated with `all` of them.
For ``isSink``, the additional argument specifies which flow label(s) a value that flows into this
source may be associated with. If multiple flow labels are specified, then any value that is
associated with `at least one` of them will be considered by the configuration.
For ``isAdditionalFlowStep`` there are two additional arguments ``predlbl`` and ``succlbl``, which
allow flow steps to act as flow label transformers. If a value associated with ``predlbl`` arrives
at the start node of the additional step, it is propagated to the end node and associated with
``succlbl``. Of course, ``predlbl`` and ``succlbl`` may be the same, indicating that the flow step
preserves this label. There can also be multiple values of ``succlbl`` for a single ``predlbl`` or
vice versa.
Note that if you do not restrict ``succlbl`` then it will be allowed to range over all flow labels.
This may cause labels that were previously blocked on a path to reappear, which is not usually what
you want.
The flow label-aware version of ``isBarrier`` is called ``isLabeledBarrier``: unlike ``isBarrier``,
which prevents any flow past the given node, it only blocks flow of values associated with one of
the specified flow labels.
Standard queries using flow labels
Standard queries using flow state
----------------------------------
Some of our standard security queries use flow labels. You can look at their implementation
to get a feeling for how to use flow labels in practice.
Some of our standard security queries use flow state. You can look at their implementation
to get a feeling for how to use flow state in practice.
In particular, both of the examples mentioned in the section on limitations of basic data flow above
are from standard security queries that use flow labels. The `Prototype-polluting merge call
<https://codeql.github.com/codeql-query-help/javascript/js-prototype-pollution/>`_ query uses two flow labels to distinguish completely
are from standard security queries that use flow state. The `Prototype-polluting merge call
<https://codeql.github.com/codeql-query-help/javascript/js-prototype-pollution/>`_ query uses two flow states to distinguish completely
tainted objects from partially tainted objects. The `Uncontrolled data used in path expression
<https://codeql.github.com/codeql-query-help/javascript/js-path-injection/>`_ query uses four flow labels to track whether a user-controlled
<https://codeql.github.com/codeql-query-help/javascript/js-path-injection/>`_ query uses four flow states to track whether a user-controlled
string may be an absolute path and whether it may contain ``..`` components.
Further reading

View File

@@ -9,42 +9,42 @@
*/
import javascript
import DataFlow
import DataFlow::PathGraph
/**
* A taint-tracking configuration that tracks user-controlled values into a 'userId' property sent to a backend service.
*/
class IdorTaint extends TaintTracking::Configuration {
IdorTaint() { this = "IdorTaint" }
module IdorTaintConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node instanceof RemoteFlowSource }
override predicate isSource(Node node) { node instanceof RemoteFlowSource }
predicate isSink(DataFlow::Node node) { exists(ClientRequest req | node = req.getADataNode()) }
override predicate isSink(Node node) { exists(ClientRequest req | node = req.getADataNode()) }
override predicate isAdditionalTaintStep(Node pred, Node succ) {
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
// Step from x -> { userId: x }
succ.(SourceNode).getAPropertyWrite("userId").getRhs() = pred
node2.(DataFlow::SourceNode).getAPropertyWrite("userId").getRhs() = node1
}
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode node) {
predicate isBarrier(DataFlow::Node node) {
// After a check like `if (userId === session.user.id)`, the userId is considered safe.
node instanceof EqualityGuard
node = DataFlow::MakeBarrierGuard<EqualityGuard>::getABarrierNode()
}
}
/**
* A sanitizer for values that have successfully been compared to another value.
*/
class EqualityGuard extends TaintTracking::SanitizerGuardNode, ValueNode {
class EqualityGuard extends DataFlow::ValueNode {
override EqualityTest astNode;
override predicate sanitizes(boolean outcome, Expr e) {
predicate blocksExpr(boolean outcome, Expr e) {
e = astNode.getAnOperand() and
outcome = astNode.getPolarity()
}
}
from IdorTaint cfg, PathNode source, PathNode sink
where cfg.hasFlowPath(source, sink)
module IdorTaintFlow = TaintTracking::Global<IdorTaintConfig>;
import IdorTaintFlow::PathGraph
from IdorTaintFlow::PathNode source, IdorTaintFlow::PathNode sink
where IdorTaintFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "Unauthenticated user ID from $@.", source.getNode(), "here"

View File

@@ -9,23 +9,25 @@
*/
import javascript
import DataFlow
import DataFlow::PathGraph
class DecodingAfterSanitization extends TaintTracking::Configuration {
DecodingAfterSanitization() { this = "DecodingAfterSanitization" }
module DecodingAfterSanitizationConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
node.(DataFlow::CallNode).getCalleeName() = "escapeHtml"
}
override predicate isSource(Node node) { node.(CallNode).getCalleeName() = "escapeHtml" }
override predicate isSink(Node node) {
exists(CallNode call |
predicate isSink(DataFlow::Node node) {
exists(DataFlow::CallNode call |
call.getCalleeName().matches("decodeURI%") and
node = call.getArgument(0)
)
}
}
from DecodingAfterSanitization cfg, PathNode source, PathNode sink
where cfg.hasFlowPath(source, sink)
module DecodingAfterSanitizationFlow = TaintTracking::Global<DecodingAfterSanitizationConfig>;
import DecodingAfterSanitizationFlow::PathGraph
from DecodingAfterSanitizationFlow::PathNode source, DecodingAfterSanitizationFlow::PathNode sink
where DecodingAfterSanitizationFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "URI decoding invalidates the HTML sanitization performed $@.",
source.getNode(), "here"

View File

@@ -9,16 +9,14 @@
*/
import javascript
import DataFlow
import DataFlow::PathGraph
/**
* A call to a function that may introduce HTML meta-characters by
* replacing `%3C` or `\u003C` with `<`.
*/
class DecodingCall extends CallNode {
class DecodingCall extends DataFlow::CallNode {
string kind;
Node input;
DataFlow::Node input;
DecodingCall() {
this.getCalleeName().matches("decodeURI%") and
@@ -33,20 +31,24 @@ class DecodingCall extends CallNode {
string getKind() { result = kind }
/** Gets the input being decoded. */
Node getInput() { result = input }
DataFlow::Node getInput() { result = input }
}
class DecodingAfterSanitization extends TaintTracking::Configuration {
DecodingAfterSanitization() { this = "DecodingAfterSanitization" }
module DecodingAfterSanitizationConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node instanceof HtmlSanitizerCall }
override predicate isSource(Node node) { node instanceof HtmlSanitizerCall }
override predicate isSink(Node node) { node = any(DecodingCall c).getInput() }
predicate isSink(DataFlow::Node node) { node = any(DecodingCall c).getInput() }
}
from DecodingAfterSanitization cfg, PathNode source, PathNode sink, DecodingCall decoder
module DecodingAfterSanitizationFlow = TaintTracking::Global<DecodingAfterSanitizationConfig>;
import DecodingAfterSanitizationFlow::PathGraph
from
DecodingAfterSanitizationFlow::PathNode source, DecodingAfterSanitizationFlow::PathNode sink,
DecodingCall decoder
where
cfg.hasFlowPath(source, sink) and
DecodingAfterSanitizationFlow::flowPath(source, sink) and
decoder.getInput() = sink.getNode()
select sink.getNode(), source, sink, decoder.getKind() + " invalidates $@.", source.getNode(),
"this HTML sanitization"

View File

@@ -8,16 +8,17 @@
*/
import javascript
import DataFlow
class EvalTaint extends TaintTracking::Configuration {
EvalTaint() { this = "EvalTaint" }
module EvalTaintConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node instanceof RemoteFlowSource }
override predicate isSource(Node node) { node instanceof RemoteFlowSource }
override predicate isSink(Node node) { node = globalVarRef("eval").getACall().getArgument(0) }
predicate isSink(DataFlow::Node node) {
node = DataFlow::globalVarRef("eval").getACall().getArgument(0)
}
}
from EvalTaint cfg, Node source, Node sink
where cfg.hasFlow(source, sink)
module EvalTaintFlow = TaintTracking::Global<EvalTaintConfig>;
from DataFlow::Node source, DataFlow::Node sink
where EvalTaintFlow::flow(source, sink)
select sink, "Eval with user-controlled input from $@.", source, "here"

View File

@@ -9,18 +9,20 @@
*/
import javascript
import DataFlow
import DataFlow::PathGraph
class EvalTaint extends TaintTracking::Configuration {
EvalTaint() { this = "EvalTaint" }
module EvalTaintConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node instanceof RemoteFlowSource }
override predicate isSource(Node node) { node instanceof RemoteFlowSource }
override predicate isSink(Node node) { node = globalVarRef("eval").getACall().getArgument(0) }
predicate isSink(DataFlow::Node node) {
node = DataFlow::globalVarRef("eval").getACall().getArgument(0)
}
}
from EvalTaint cfg, PathNode source, PathNode sink
where cfg.hasFlowPath(source, sink)
module EvalTaintFlow = TaintTracking::Global<EvalTaintConfig>;
import EvalTaintFlow::PathGraph
from EvalTaintFlow::PathNode source, EvalTaintFlow::PathNode sink
where EvalTaintFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "Eval with user-controlled input from $@.", source.getNode(),
"here"

View File

@@ -9,8 +9,6 @@
*/
import javascript
import DataFlow
import DataFlow::PathGraph
/**
* A dataflow configuration that tracks authentication tokens ("authKey")
@@ -26,33 +24,37 @@ import DataFlow::PathGraph
* }), '*');
* ```
*/
class AuthKeyTracking extends DataFlow::Configuration {
AuthKeyTracking() { this = "AuthKeyTracking" }
module AuthKeyTrackingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
node.(DataFlow::PropRead).getPropertyName() = "authKey"
}
override predicate isSource(Node node) { node.(PropRead).getPropertyName() = "authKey" }
override predicate isSink(Node node) {
exists(MethodCallNode call |
predicate isSink(DataFlow::Node node) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = "postMessage" and
call.getArgument(1).getStringValue() = "*" and // no restriction on target origin
call.getArgument(0) = node
)
}
override predicate isAdditionalFlowStep(Node pred, Node succ) {
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
// Step into objects: x -> { f: x }
succ.(SourceNode).getAPropertyWrite().getRhs() = pred
node2.(DataFlow::SourceNode).getAPropertyWrite().getRhs() = node1
or
// Step through JSON serialization: x -> JSON.stringify(x)
// Note: TaintTracking::Configuration includes this step by default, but not DataFlow::Configuration
exists(CallNode call |
call = globalVarRef("JSON").getAMethodCall("stringify") and
pred = call.getArgument(0) and
succ = call
exists(DataFlow::CallNode call |
call = DataFlow::globalVarRef("JSON").getAMethodCall("stringify") and
node1 = call.getArgument(0) and
node2 = call
)
}
}
from AuthKeyTracking cfg, PathNode source, PathNode sink
where cfg.hasFlowPath(source, sink)
module AuthKeyTracking = DataFlow::Global<AuthKeyTrackingConfig>;
import AuthKeyTracking::PathGraph
from AuthKeyTracking::PathNode source, AuthKeyTracking::PathNode sink
where AuthKeyTracking::flowPath(source, sink)
select sink.getNode(), source, sink, "Message leaks the authKey from $@.", source.getNode(), "here"

View File

@@ -9,7 +9,7 @@
import javascript
import semmle.javascript.security.dataflow.StoredXssQuery
import DataFlow::PathGraph
import StoredXssFlow::PathGraph
/**
* The data returned from a MySQL query, such as the `data` parameter in this example:
@@ -31,6 +31,6 @@ class MysqlSource extends Source {
}
}
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
from StoredXssFlow::PathNode source, StoredXssFlow::PathNode sink
where StoredXssFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "Stored XSS from $@.", source.getNode(), "database value."

View File

@@ -10,7 +10,7 @@
import javascript
import semmle.javascript.security.dataflow.StoredXssQuery
import DataFlow::PathGraph
import StoredXssFlow::PathGraph
/**
* Gets an instance of `mysql.createConnection()`, tracked globally.
@@ -45,6 +45,6 @@ class MysqlSource extends Source {
MysqlSource() { this = mysqlConnection().getAMethodCall("query").getCallback(1).getParameter(1) }
}
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
from StoredXssFlow::PathNode source, StoredXssFlow::PathNode sink
where StoredXssFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "Stored XSS from $@.", source.getNode(), "database value."

View File

@@ -8,8 +8,6 @@
*/
import javascript
import DataFlow
import DataFlow::PathGraph
/**
* Gets the name of an unescaped placeholder in a lodash template.
@@ -21,13 +19,11 @@ string getAPlaceholderInString(string s) {
result = s.regexpCapture(".*<%=\\s*([a-zA-Z0-9_]+)\\s*%>.*", 1)
}
class TemplateInjection extends TaintTracking::Configuration {
TemplateInjection() { this = "TemplateInjection" }
module TemplateInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node instanceof RemoteFlowSource }
override predicate isSource(Node node) { node instanceof RemoteFlowSource }
override predicate isSink(Node node) {
exists(CallNode call, string placeholder |
predicate isSink(DataFlow::Node node) {
exists(DataFlow::CallNode call, string placeholder |
call = LodashUnderscore::member("template").getACall() and
placeholder = getAPlaceholderInString(call.getArgument(0).getStringValue()) and
node = call.getOptionArgument(1, placeholder)
@@ -35,7 +31,11 @@ class TemplateInjection extends TaintTracking::Configuration {
}
}
from TemplateInjection cfg, PathNode source, PathNode sink
where cfg.hasFlowPath(source, sink)
module TemplateInjectionFlow = TaintTracking::Global<TemplateInjectionConfig>;
import TemplateInjectionFlow::PathGraph
from TemplateInjectionFlow::PathNode source, TemplateInjectionFlow::PathNode sink
where TemplateInjectionFlow::flowPath(source, sink)
select sink.getNode(), source, sink,
"User-controlled value from $@ occurs unescaped in a lodash template.", source.getNode(), "here."

View File

@@ -0,0 +1,6 @@
---
category: deprecated
---
* Custom data flow queries will need to be migrated in order to use the shared data flow library. Until migrated, such queries will compile with deprecation warnings and run with a
deprecated copy of the old data flow library. The deprecation layer will be removed in early 2026, after which any unmigrated queries will stop working.
See more information in the [migration guide](https://codeql.github.com/docs/codeql-language-guides/migrating-javascript-dataflow-queries).

View File

@@ -0,0 +1,5 @@
---
category: majorAnalysis
---
* All data flow queries are now using the same underlying data flow library as the other languages analyses, replacing the old one written specifically for JavaScript/TypeScript.
This is a significant change and users may consequently observe differences in the alerts generated by the analysis.

View File

@@ -9,8 +9,10 @@ dependencies:
codeql/dataflow: ${workspace}
codeql/mad: ${workspace}
codeql/regex: ${workspace}
codeql/ssa: ${workspace}
codeql/threat-models: ${workspace}
codeql/tutorial: ${workspace}
codeql/typetracking: ${workspace}
codeql/util: ${workspace}
codeql/xml: ${workspace}
codeql/yaml: ${workspace}

View File

@@ -6,6 +6,7 @@
import javascript
private import semmle.javascript.internal.CachedStages
private import Expressions.ExprHasNoEffect
private import semmle.javascript.dataflow.internal.DataFlowNode
/**
* Companion module to the `AmdModuleDefinition` class.
@@ -84,10 +85,15 @@ class AmdModuleDefinition extends CallExpr instanceof AmdModuleDefinition::Range
result instanceof DataFlow::ValueNode
}
private DataFlow::Node getFactoryNodeInternal() {
// To avoid recursion, this should not depend on `SourceNode`.
result = DataFlow::valueNode(this.getLastArgument()) or
result = this.getFactoryNodeInternal().getAPredecessor()
/**
* Gets the factory function of this module definition.
*/
Function getFactoryFunction() { TValueNode(result) = this.getFactoryNodeInternal() }
private EarlyStageNode getFactoryNodeInternal() {
result = TValueNode(this.getLastArgument())
or
DataFlow::localFlowStep(result, this.getFactoryNodeInternal())
}
/** Gets the expression defining this module. */
@@ -139,7 +145,10 @@ class AmdModuleDefinition extends CallExpr instanceof AmdModuleDefinition::Range
* Gets the `i`th parameter of the factory function of this module.
*/
private Parameter getFactoryParameter(int i) {
this.getFactoryNodeInternal().asExpr().(Function).getParameter(i) = result
exists(Function fun |
this.getFactoryNodeInternal() = TValueNode(fun) and
result = fun.getParameter(i)
)
}
/**

View File

@@ -9,7 +9,7 @@ module ArrayTaintTracking {
/**
* A taint propagating data flow edge caused by the builtin array functions.
*/
private class ArrayFunctionTaintStep extends TaintTracking::SharedTaintStep {
private class ArrayFunctionTaintStep extends TaintTracking::LegacyTaintStep {
override predicate arrayStep(DataFlow::Node pred, DataFlow::Node succ) {
arrayFunctionTaintStep(pred, succ, _)
}
@@ -130,7 +130,7 @@ private module ArrayDataFlow {
* A step modeling the creation of an Array using the `Array.from(x)` method.
* The step copies the elements of the argument (set, array, or iterator elements) into the resulting array.
*/
private class ArrayFrom extends PreCallGraphStep {
private class ArrayFrom extends LegacyPreCallGraphStep {
override predicate loadStoreStep(
DataFlow::Node pred, DataFlow::SourceNode succ, string fromProp, string toProp
) {
@@ -150,7 +150,7 @@ private module ArrayDataFlow {
*
* Such a step can occur both with the `push` and `unshift` methods, or when creating a new array.
*/
private class ArrayCopySpread extends PreCallGraphStep {
private class ArrayCopySpread extends LegacyPreCallGraphStep {
override predicate loadStoreStep(
DataFlow::Node pred, DataFlow::SourceNode succ, string fromProp, string toProp
) {
@@ -171,7 +171,7 @@ private module ArrayDataFlow {
/**
* A step for storing an element on an array using `arr.push(e)` or `arr.unshift(e)`.
*/
private class ArrayAppendStep extends PreCallGraphStep {
private class ArrayAppendStep extends LegacyPreCallGraphStep {
override predicate storeStep(DataFlow::Node element, DataFlow::SourceNode obj, string prop) {
prop = arrayElement() and
exists(DataFlow::MethodCallNode call |
@@ -202,7 +202,7 @@ private module ArrayDataFlow {
* A step for reading/writing an element from an array inside a for-loop.
* E.g. a read from `foo[i]` to `bar` in `for(var i = 0; i < arr.length; i++) {bar = foo[i]}`.
*/
private class ArrayIndexingStep extends PreCallGraphStep {
private class ArrayIndexingStep extends LegacyPreCallGraphStep {
override predicate loadStep(DataFlow::Node obj, DataFlow::Node element, string prop) {
exists(ArrayIndexingAccess access |
prop = arrayElement() and
@@ -224,7 +224,7 @@ private module ArrayDataFlow {
* A step for retrieving an element from an array using `.pop()`, `.shift()`, or `.at()`.
* E.g. `array.pop()`.
*/
private class ArrayPopStep extends PreCallGraphStep {
private class ArrayPopStep extends LegacyPreCallGraphStep {
override predicate loadStep(DataFlow::Node obj, DataFlow::Node element, string prop) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = ["pop", "shift", "at"] and
@@ -245,7 +245,7 @@ private module ArrayDataFlow {
*
* And the second parameter in the callback is the array ifself, so there is a `loadStoreStep` from the array to that second parameter.
*/
private class ArrayIteration extends PreCallGraphStep {
private class ArrayIteration extends LegacyPreCallGraphStep {
override predicate loadStep(DataFlow::Node obj, DataFlow::Node element, string prop) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = ["map", "forEach"] and
@@ -277,7 +277,7 @@ private module ArrayDataFlow {
/**
* A step for creating an array and storing the elements in the array.
*/
private class ArrayCreationStep extends PreCallGraphStep {
private class ArrayCreationStep extends LegacyPreCallGraphStep {
override predicate storeStep(DataFlow::Node element, DataFlow::SourceNode obj, string prop) {
exists(DataFlow::ArrayCreationNode array, int i |
element = array.getElement(i) and
@@ -291,7 +291,7 @@ private module ArrayDataFlow {
* A step modeling that `splice` can insert elements into an array.
* For example in `array.splice(i, del, e1, e2, ...)`: if any item is tainted, then so is `array`
*/
private class ArraySpliceStep extends PreCallGraphStep {
private class ArraySpliceStep extends LegacyPreCallGraphStep {
override predicate storeStep(DataFlow::Node element, DataFlow::SourceNode obj, string prop) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = ["splice", "toSpliced"] and
@@ -319,7 +319,7 @@ private module ArrayDataFlow {
* A step for modeling `concat`.
* For example in `e = arr1.concat(arr2, arr3)`: if any of the `arr` is tainted, then so is `e`.
*/
private class ArrayConcatStep extends PreCallGraphStep {
private class ArrayConcatStep extends LegacyPreCallGraphStep {
override predicate loadStoreStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = "concat" and
@@ -333,7 +333,7 @@ private module ArrayDataFlow {
/**
* A step for modeling that elements from an array `arr` also appear in the result from calling `slice`/`splice`/`filter`/`toSpliced`.
*/
private class ArraySliceStep extends PreCallGraphStep {
private class ArraySliceStep extends LegacyPreCallGraphStep {
override predicate loadStoreStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = ["slice", "splice", "filter", "toSpliced"] and
@@ -347,7 +347,7 @@ private module ArrayDataFlow {
/**
* A step modeling that elements from an array `arr` are received by calling `find`.
*/
private class ArrayFindStep extends PreCallGraphStep {
private class ArrayFindStep extends LegacyPreCallGraphStep {
override predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
exists(DataFlow::CallNode call |
call = arrayFindCall(pred) and
@@ -397,7 +397,7 @@ private module ArrayLibraries {
/**
* A taint step through the `arrify` library, or other libraries that (maybe) convert values into arrays.
*/
private class ArrayifyStep extends TaintTracking::SharedTaintStep {
private class ArrayifyStep extends TaintTracking::LegacyTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(API::CallNode call | call = API::moduleImport(["arrify", "array-ify"]).getACall() |
pred = call.getArgument(0) and succ = call
@@ -417,7 +417,7 @@ private module ArrayLibraries {
/**
* A taint step for a library that copies the elements of an array into another array.
*/
private class ArrayCopyTaint extends TaintTracking::SharedTaintStep {
private class ArrayCopyTaint extends TaintTracking::LegacyTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::CallNode call |
call = arrayCopyCall(pred) and
@@ -429,7 +429,7 @@ private module ArrayLibraries {
/**
* A loadStoreStep for a library that copies the elements of an array into another array.
*/
private class ArrayCopyLoadStore extends PreCallGraphStep {
private class ArrayCopyLoadStore extends LegacyPreCallGraphStep {
override predicate loadStoreStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
exists(DataFlow::CallNode call |
call = arrayCopyCall(pred) and
@@ -442,7 +442,7 @@ private module ArrayLibraries {
/**
* A taint step through a call to `Array.prototype.flat` or a polyfill implementing array flattening.
*/
private class ArrayFlatStep extends TaintTracking::SharedTaintStep {
private class ArrayFlatStep extends TaintTracking::LegacyTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::CallNode call | succ = call |
call.(DataFlow::MethodCallNode).getMethodName() = "flat" and

View File

@@ -3,356 +3,4 @@
* liveness information for local variables.
*/
import javascript
private import internal.StmtContainers
private import semmle.javascript.internal.CachedStages
/**
* Holds if `nd` starts a new basic block.
*/
private predicate startsBB(ControlFlowNode nd) {
not exists(nd.getAPredecessor()) and exists(nd.getASuccessor())
or
nd.isJoin()
or
nd.getAPredecessor().isBranch()
}
/**
* Holds if the first node of basic block `succ` is a control flow
* successor of the last node of basic block `bb`.
*/
private predicate succBB(BasicBlock bb, BasicBlock succ) { succ = bb.getLastNode().getASuccessor() }
/**
* Holds if the first node of basic block `bb` is a control flow
* successor of the last node of basic block `pre`.
*/
private predicate predBB(BasicBlock bb, BasicBlock pre) { succBB(pre, bb) }
/** Holds if `bb` is an entry basic block. */
private predicate entryBB(BasicBlock bb) { bb.getFirstNode() instanceof ControlFlowEntryNode }
/** Holds if `bb` is an exit basic block. */
private predicate exitBB(BasicBlock bb) { bb.getLastNode() instanceof ControlFlowExitNode }
cached
private module Internal {
/**
* Holds if `succ` is a control flow successor of `nd` within the same basic block.
*/
private predicate intraBBSucc(ControlFlowNode nd, ControlFlowNode succ) {
succ = nd.getASuccessor() and
not succ instanceof BasicBlock
}
/**
* Holds if `nd` is the `i`th node in basic block `bb`.
*
* In other words, `i` is the shortest distance from a node `bb`
* that starts a basic block to `nd` along the `intraBBSucc` relation.
*/
cached
predicate bbIndex(BasicBlock bb, ControlFlowNode nd, int i) =
shortestDistances(startsBB/1, intraBBSucc/2)(bb, nd, i)
cached
int bbLength(BasicBlock bb) { result = strictcount(ControlFlowNode nd | bbIndex(bb, nd, _)) }
cached
predicate useAt(BasicBlock bb, int i, Variable v, VarUse u) {
Stages::BasicBlocks::ref() and
v = u.getVariable() and
bbIndex(bb, u, i)
}
cached
predicate defAt(BasicBlock bb, int i, Variable v, VarDef d) {
exists(VarRef lhs |
lhs = d.getTarget().(BindingPattern).getABindingVarRef() and
v = lhs.getVariable()
|
lhs = d.getTarget() and
bbIndex(bb, d, i)
or
exists(PropertyPattern pp |
lhs = pp.getValuePattern() and
bbIndex(bb, pp, i)
)
or
exists(ObjectPattern op |
lhs = op.getRest() and
bbIndex(bb, lhs, i)
)
or
exists(ArrayPattern ap |
lhs = ap.getAnElement() and
bbIndex(bb, lhs, i)
)
)
}
cached
predicate reachableBB(BasicBlock bb) {
entryBB(bb)
or
exists(BasicBlock predBB | succBB(predBB, bb) | reachableBB(predBB))
}
}
private import Internal
/** Holds if `dom` is an immediate dominator of `bb`. */
cached
private predicate bbIDominates(BasicBlock dom, BasicBlock bb) =
idominance(entryBB/1, succBB/2)(_, dom, bb)
/** Holds if `dom` is an immediate post-dominator of `bb`. */
cached
private predicate bbIPostDominates(BasicBlock dom, BasicBlock bb) =
idominance(exitBB/1, predBB/2)(_, dom, bb)
/**
* A basic block, that is, a maximal straight-line sequence of control flow nodes
* without branches or joins.
*
* At the database level, a basic block is represented by its first control flow node.
*/
class BasicBlock extends @cfg_node, NodeInStmtContainer {
cached
BasicBlock() { Stages::BasicBlocks::ref() and startsBB(this) }
/** Gets a basic block succeeding this one. */
BasicBlock getASuccessor() { succBB(this, result) }
/** Gets a basic block preceding this one. */
BasicBlock getAPredecessor() { result.getASuccessor() = this }
/** Gets a node in this block. */
ControlFlowNode getANode() { result = this.getNode(_) }
/** Gets the node at the given position in this block. */
ControlFlowNode getNode(int pos) { bbIndex(this, result, pos) }
/** Gets the first node in this block. */
ControlFlowNode getFirstNode() { result = this }
/** Gets the last node in this block. */
ControlFlowNode getLastNode() { result = this.getNode(this.length() - 1) }
/** Gets the length of this block. */
int length() { result = bbLength(this) }
/** Holds if this basic block uses variable `v` in its `i`th node `u`. */
predicate useAt(int i, Variable v, VarUse u) { useAt(this, i, v, u) }
/** Holds if this basic block defines variable `v` in its `i`th node `d`. */
predicate defAt(int i, Variable v, VarDef d) { defAt(this, i, v, d) }
/**
* Holds if `v` is live at entry to this basic block and `u` is a use of `v`
* witnessing the liveness.
*
* In other words, `u` is a use of `v` that is reachable from the
* entry node of this basic block without going through a redefinition
* of `v`. The use `u` may either be in this basic block, or in another
* basic block reachable from this one.
*/
predicate isLiveAtEntry(Variable v, VarUse u) {
// restrict `u` to be reachable from this basic block
u = this.getASuccessor*().getANode() and
(
// shortcut: if `v` is never defined, then it must be live
this.isDefinedInSameContainer(v)
implies
// otherwise, do full liveness computation
this.isLiveAtEntryImpl(v, u)
)
}
/**
* Holds if `v` is live at entry to this basic block and `u` is a use of `v`
* witnessing the liveness, where `v` is defined at least once in the enclosing
* function or script.
*/
private predicate isLiveAtEntryImpl(Variable v, VarUse u) {
this.isLocallyLiveAtEntry(v, u)
or
this.isDefinedInSameContainer(v) and
not this.defAt(_, v, _) and
this.getASuccessor().isLiveAtEntryImpl(v, u)
}
/**
* Holds if `v` is defined at least once in the function or script to which
* this basic block belongs.
*/
private predicate isDefinedInSameContainer(Variable v) {
exists(VarDef def | def.getAVariable() = v and def.getContainer() = this.getContainer())
}
/**
* Holds if `v` is a variable that is live at entry to this basic block.
*
* Note that this is equivalent to `bb.isLiveAtEntry(v, _)`, but may
* be more efficient on large databases.
*/
predicate isLiveAtEntry(Variable v) {
this.isLocallyLiveAtEntry(v, _)
or
not this.defAt(_, v, _) and this.getASuccessor().isLiveAtEntry(v)
}
/**
* Holds if local variable `v` is live at entry to this basic block and
* `u` is a use of `v` witnessing the liveness.
*/
predicate localIsLiveAtEntry(LocalVariable v, VarUse u) {
this.isLocallyLiveAtEntry(v, u)
or
not this.defAt(_, v, _) and this.getASuccessor().localIsLiveAtEntry(v, u)
}
/**
* Holds if local variable `v` is live at entry to this basic block.
*/
predicate localIsLiveAtEntry(LocalVariable v) {
this.isLocallyLiveAtEntry(v, _)
or
not this.defAt(_, v, _) and this.getASuccessor().localIsLiveAtEntry(v)
}
/**
* Holds if `d` is a definition of `v` that is reachable from the beginning of
* this basic block without going through a redefinition of `v`.
*/
predicate localMayBeOverwritten(LocalVariable v, VarDef d) {
this.isLocallyOverwritten(v, d)
or
not this.defAt(_, v, _) and this.getASuccessor().localMayBeOverwritten(v, d)
}
/**
* Gets the next index after `i` in this basic block at which `v` is
* defined or used, provided that `d` is a definition of `v` at index `i`.
* If there are no further uses or definitions of `v` after `i`, the
* result is the length of this basic block.
*/
private int nextDefOrUseAfter(PurelyLocalVariable v, int i, VarDef d) {
this.defAt(i, v, d) and
result =
min(int j |
(this.defAt(j, v, _) or this.useAt(j, v, _) or j = this.length()) and
j > i
)
}
/**
* Holds if `d` defines variable `v` at the `i`th node of this basic block, and
* the definition is live, that is, the variable may be read after this
* definition and before a re-definition.
*/
predicate localLiveDefAt(PurelyLocalVariable v, int i, VarDef d) {
exists(int j | j = this.nextDefOrUseAfter(v, i, d) |
this.useAt(j, v, _)
or
j = this.length() and this.getASuccessor().localIsLiveAtEntry(v)
)
}
/**
* Holds if `u` is a use of `v` in this basic block, and there are
* no definitions of `v` before it.
*/
private predicate isLocallyLiveAtEntry(Variable v, VarUse u) {
exists(int n | this.useAt(n, v, u) | not exists(int m | m < n | this.defAt(m, v, _)))
}
/**
* Holds if `d` is a definition of `v` in this basic block, and there are
* no other definitions of `v` before it.
*/
private predicate isLocallyOverwritten(Variable v, VarDef d) {
exists(int n | this.defAt(n, v, d) | not exists(int m | m < n | this.defAt(m, v, _)))
}
/**
* Gets the basic block that immediately dominates this basic block.
*/
ReachableBasicBlock getImmediateDominator() { bbIDominates(result, this) }
}
/**
* An unreachable basic block, that is, a basic block
* whose first node is unreachable.
*/
class UnreachableBlock extends BasicBlock {
UnreachableBlock() { this.getFirstNode().isUnreachable() }
}
/**
* An entry basic block, that is, a basic block
* whose first node is the entry node of a statement container.
*/
class EntryBasicBlock extends BasicBlock {
EntryBasicBlock() { entryBB(this) }
}
/**
* A basic block that is reachable from an entry basic block.
*/
class ReachableBasicBlock extends BasicBlock {
ReachableBasicBlock() { reachableBB(this) }
/**
* Holds if this basic block strictly dominates `bb`.
*/
pragma[inline]
predicate strictlyDominates(ReachableBasicBlock bb) { bbIDominates+(this, bb) }
/**
* Holds if this basic block dominates `bb`.
*
* This predicate is reflexive: each reachable basic block dominates itself.
*/
pragma[inline]
predicate dominates(ReachableBasicBlock bb) { bbIDominates*(this, bb) }
/**
* Holds if this basic block strictly post-dominates `bb`.
*/
pragma[inline]
predicate strictlyPostDominates(ReachableBasicBlock bb) { bbIPostDominates+(this, bb) }
/**
* Holds if this basic block post-dominates `bb`.
*
* This predicate is reflexive: each reachable basic block post-dominates itself.
*/
pragma[inline]
predicate postDominates(ReachableBasicBlock bb) { bbIPostDominates*(this, bb) }
}
/**
* A reachable basic block with more than one predecessor.
*/
class ReachableJoinBlock extends ReachableBasicBlock {
ReachableJoinBlock() { this.getFirstNode().isJoin() }
/**
* Holds if this basic block belongs to the dominance frontier of `b`, that is
* `b` dominates a predecessor of this block, but not this block itself.
*
* Algorithm from Cooper et al., "A Simple, Fast Dominance Algorithm" (Figure 5),
* who in turn attribute it to Ferrante et al., "The program dependence graph and
* its use in optimization".
*/
predicate inDominanceFrontierOf(ReachableBasicBlock b) {
b = this.getAPredecessor() and not b = this.getImmediateDominator()
or
exists(ReachableBasicBlock prev | this.inDominanceFrontierOf(prev) |
b = prev.getImmediateDominator() and
not b = this.getImmediateDominator()
)
}
}
import internal.BasicBlockInternal::Public

View File

@@ -16,7 +16,7 @@ private module CollectionDataFlow {
/**
* A step for `Set.add()` method, which adds an element to a Set.
*/
private class SetAdd extends PreCallGraphStep {
private class SetAdd extends LegacyPreCallGraphStep {
override predicate storeStep(DataFlow::Node element, DataFlow::SourceNode obj, string prop) {
exists(DataFlow::MethodCallNode call |
call = obj.getAMethodCall("add") and
@@ -29,7 +29,7 @@ private module CollectionDataFlow {
/**
* A step for the `Set` constructor, which copies any elements from the first argument into the resulting set.
*/
private class SetConstructor extends PreCallGraphStep {
private class SetConstructor extends LegacyPreCallGraphStep {
override predicate loadStoreStep(
DataFlow::Node pred, DataFlow::SourceNode succ, string fromProp, string toProp
) {
@@ -49,7 +49,7 @@ private module CollectionDataFlow {
* For sets and iterators the l-value are the elements of the set/iterator.
* For maps the l-value is a tuple containing a key and a value.
*/
private class ForOfStep extends PreCallGraphStep {
private class ForOfStep extends LegacyPreCallGraphStep {
override predicate loadStep(DataFlow::Node obj, DataFlow::Node e, string prop) {
exists(ForOfStmt forOf |
obj = forOf.getIterationDomain().flow() and
@@ -73,7 +73,7 @@ private module CollectionDataFlow {
/**
* A step for a call to `forEach` on a Set or Map.
*/
private class SetMapForEach extends PreCallGraphStep {
private class SetMapForEach extends LegacyPreCallGraphStep {
override predicate loadStep(DataFlow::Node obj, DataFlow::Node element, string prop) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = "forEach" and
@@ -88,7 +88,7 @@ private module CollectionDataFlow {
* A call to the `get` method on a Map.
* If the key of the call to `get` has a known string value, then only the value corresponding to that key will be retrieved. (The known string value is encoded as part of the pseudo-property)
*/
private class MapGet extends PreCallGraphStep {
private class MapGet extends LegacyPreCallGraphStep {
override predicate loadStep(DataFlow::Node obj, DataFlow::Node element, string prop) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = "get" and
@@ -108,7 +108,7 @@ private module CollectionDataFlow {
* Otherwise the value will be stored into a pseudo-property corresponding to values with unknown keys.
* The value will additionally be stored into a pseudo-property corresponding to all values.
*/
class MapSet extends PreCallGraphStep {
class MapSet extends LegacyPreCallGraphStep {
override predicate storeStep(DataFlow::Node element, DataFlow::SourceNode obj, string prop) {
exists(DataFlow::MethodCallNode call |
call = obj.getAMethodCall("set") and
@@ -121,7 +121,7 @@ private module CollectionDataFlow {
/**
* A step for a call to `values` on a Map or a Set.
*/
private class MapAndSetValues extends PreCallGraphStep {
private class MapAndSetValues extends LegacyPreCallGraphStep {
override predicate loadStoreStep(
DataFlow::Node pred, DataFlow::SourceNode succ, string fromProp, string toProp
) {
@@ -138,7 +138,7 @@ private module CollectionDataFlow {
/**
* A step for a call to `keys` on a Set.
*/
private class SetKeys extends PreCallGraphStep {
private class SetKeys extends LegacyPreCallGraphStep {
override predicate loadStoreStep(
DataFlow::Node pred, DataFlow::SourceNode succ, string fromProp, string toProp
) {

View File

@@ -11,7 +11,7 @@ private import semmle.javascript.dataflow.internal.PreCallGraphStep
private module GeneratorDataFlow {
private import DataFlow::PseudoProperties
private class ArrayIteration extends PreCallGraphStep {
private class ArrayIteration extends LegacyPreCallGraphStep {
override predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
exists(DataFlow::FunctionNode f | f.getFunction().isGenerator() |
prop = iteratorElement() and

View File

@@ -4,6 +4,7 @@ import javascript
private import NodeModuleResolutionImpl
private import semmle.javascript.DynamicPropertyAccess as DynamicPropertyAccess
private import semmle.javascript.internal.CachedStages
private import semmle.javascript.dataflow.internal.DataFlowNode
/**
* A Node.js module.
@@ -240,69 +241,78 @@ private class RequireVariable extends Variable {
*/
private predicate moduleInFile(Module m, File f) { m.getFile() = f }
private predicate isModuleModule(DataFlow::Node nd) {
exists(ImportDeclaration imp |
imp.getImportedPath().getValue() = "module" and
nd =
[
DataFlow::destructuredModuleImportNode(imp),
DataFlow::valueNode(imp.getASpecifier().(ImportNamespaceSpecifier))
]
private predicate isModuleModule(EarlyStageNode nd) {
exists(ImportDeclaration imp | imp.getImportedPath().getValue() = "module" |
nd = TDestructuredModuleImportNode(imp)
or
nd = TValueNode(imp.getASpecifier().(ImportNamespaceSpecifier))
)
or
isModuleModule(nd.getAPredecessor())
exists(EarlyStageNode other |
isModuleModule(other) and
DataFlow::localFlowStep(other, nd)
)
}
private predicate isCreateRequire(DataFlow::Node nd) {
private predicate isCreateRequire(EarlyStageNode nd) {
exists(PropAccess prop |
isModuleModule(prop.getBase().flow()) and
isModuleModule(TValueNode(prop.getBase())) and
prop.getPropertyName() = "createRequire" and
nd = prop.flow()
nd = TValueNode(prop)
)
or
exists(PropertyPattern prop |
isModuleModule(prop.getObjectPattern().flow()) and
isModuleModule(TValueNode(prop.getObjectPattern())) and
prop.getName() = "createRequire" and
nd = prop.getValuePattern().flow()
nd = TValueNode(prop.getValuePattern())
)
or
exists(ImportDeclaration decl, NamedImportSpecifier spec |
decl.getImportedPath().getValue() = "module" and
spec = decl.getASpecifier() and
spec.getImportedName() = "createRequire" and
nd = spec.flow()
nd = TValueNode(spec)
)
or
isCreateRequire(nd.getAPredecessor())
exists(EarlyStageNode other |
isCreateRequire(other) and
DataFlow::localFlowStep(other, nd)
)
}
/**
* Holds if `nd` may refer to `require`, either directly or modulo local data flow.
*/
cached
private predicate isRequire(DataFlow::Node nd) {
nd.asExpr() = any(RequireVariable req).getAnAccess() and
// `mjs` files explicitly disallow `require`
not nd.getFile().getExtension() = "mjs"
private predicate isRequire(EarlyStageNode nd) {
exists(VarAccess access |
access = any(RequireVariable v).getAnAccess() and
nd = TValueNode(access) and
// `mjs` files explicitly disallow `require`
not access.getFile().getExtension() = "mjs"
)
or
isRequire(nd.getAPredecessor())
exists(EarlyStageNode other |
isRequire(other) and
DataFlow::localFlowStep(other, nd)
)
or
// `import { createRequire } from 'module';`.
// specialized to ES2015 modules to avoid recursion in the `DataFlow::moduleImport()` predicate and to avoid
// negative recursion between `Import.getImportedModuleNode()` and `Import.getImportedModule()`, and
// to avoid depending on `SourceNode` as this would make `SourceNode::Range` recursive.
exists(CallExpr call |
isCreateRequire(call.getCallee().flow()) and
nd = call.flow()
isCreateRequire(TValueNode(call.getCallee())) and
nd = TValueNode(call)
)
or
// `$.require('underscore');`.
// NPM as supported in [XSJS files](https://www.npmjs.com/package/@sap/async-xsjs#npm-packages-support).
exists(MethodCallExpr require |
nd.getFile().getExtension() = ["xsjs", "xsjslib"] and
require.getFile().getExtension() = ["xsjs", "xsjslib"] and
require.getCalleeName() = "require" and
require.getReceiver().(GlobalVarAccess).getName() = "$" and
nd = require.getCallee().flow()
nd = TValueNode(require.getCallee())
)
}
@@ -316,7 +326,7 @@ private predicate isRequire(DataFlow::Node nd) {
* ```
*/
class Require extends CallExpr, Import {
Require() { isRequire(this.getCallee().flow()) }
Require() { isRequire(TValueNode(this.getCallee())) }
override PathExpr getImportedPath() { result = this.getArgument(0) }
@@ -410,7 +420,7 @@ private class RequirePath extends PathExprCandidate {
this = any(Require req).getArgument(0)
or
exists(MethodCallExpr reqres |
isRequire(reqres.getReceiver().flow()) and
isRequire(TValueNode(reqres.getReceiver())) and
reqres.getMethodName() = "resolve" and
this = reqres.getArgument(0)
)

View File

@@ -4,6 +4,7 @@
*/
import javascript
private import semmle.javascript.dataflow.internal.DataFlowNode
/**
* Internal representation of paths as lists of components.
@@ -381,16 +382,16 @@ private class PathExprString extends PathString {
}
pragma[nomagic]
private DataFlow::Node getAPathExprAlias(PathExpr expr) {
result.getImmediatePredecessor().asExpr() = expr
private EarlyStageNode getAPathExprAlias(PathExpr expr) {
DataFlow::Impl::earlyStageImmediateFlowStep(TValueNode(expr), result)
or
result.getImmediatePredecessor() = getAPathExprAlias(expr)
DataFlow::Impl::earlyStageImmediateFlowStep(getAPathExprAlias(expr), result)
}
private class PathExprFromAlias extends PathExpr {
private PathExpr other;
PathExprFromAlias() { this = getAPathExprAlias(other).asExpr() }
PathExprFromAlias() { TValueNode(this) = getAPathExprAlias(other) }
override string getValue() { result = other.getValue() }
@@ -435,13 +436,15 @@ abstract class PathExprCandidate extends Expr {
pragma[nomagic]
private Expr getAPart1() { result = this or result = this.getAPart().getAChildExpr() }
private EarlyStageNode getAnAliasedPart1() {
result = TValueNode(this.getAPart1())
or
DataFlow::Impl::earlyStageImmediateFlowStep(result, this.getAnAliasedPart1())
}
/**
* Gets an expression that is nested inside this expression.
*
* Equivalent to `getAChildExpr*()`, but useful to enforce a better join order (in spite of
* what the optimizer thinks, there are generally far fewer `PathExprCandidate`s than
* `ConstantString`s).
* Gets an expression that is depended on by an expression nested inside this expression.
*/
pragma[nomagic]
Expr getAPart() { result = this.getAPart1().flow().getImmediatePredecessor*().asExpr() }
Expr getAPart() { TValueNode(result) = this.getAnAliasedPart1() }
}

View File

@@ -6,7 +6,9 @@ import javascript
private import dataflow.internal.StepSummary
/**
* A definition of a `Promise` object.
* A call to the `Promise` constructor, such as `new Promise((resolve, reject) => { ... })`.
*
* This includes calls to the built-in `Promise` constructor as well as promise implementations from known libraries, such as `bluebird`.
*/
abstract class PromiseDefinition extends DataFlow::SourceNode {
/** Gets the executor function of this promise object. */
@@ -196,6 +198,8 @@ module Promises {
override string getAProperty() { result = [valueProp(), errorProp()] }
}
predicate promiseConstructorRef = getAPromiseObject/0;
}
/**
@@ -267,7 +271,7 @@ private import semmle.javascript.dataflow.internal.PreCallGraphStep
* These steps are for `await p`, `new Promise()`, `Promise.resolve()`,
* `Promise.then()`, `Promise.catch()`, and `Promise.finally()`.
*/
private class PromiseStep extends PreCallGraphStep {
private class PromiseStep extends LegacyPreCallGraphStep {
override predicate loadStep(DataFlow::Node obj, DataFlow::Node element, string prop) {
PromiseFlow::loadStep(obj, element, prop)
}
@@ -459,7 +463,7 @@ module PromiseFlow {
}
}
private class PromiseTaintStep extends TaintTracking::SharedTaintStep {
private class PromiseTaintStep extends TaintTracking::LegacyTaintStep {
override predicate promiseStep(DataFlow::Node pred, DataFlow::Node succ) {
// from `x` to `new Promise((res, rej) => res(x))`
pred = succ.(PromiseDefinition).getResolveParameter().getACall().getArgument(0)
@@ -530,7 +534,7 @@ private module AsyncReturnSteps {
/**
* A data-flow step for ordinary and exceptional returns from async functions.
*/
private class AsyncReturn extends PreCallGraphStep {
private class AsyncReturn extends LegacyPreCallGraphStep {
override predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
exists(DataFlow::FunctionNode f | f.getFunction().isAsync() |
// ordinary return
@@ -548,7 +552,7 @@ private module AsyncReturnSteps {
/**
* A data-flow step for ordinary return from an async function in a taint configuration.
*/
private class AsyncTaintReturn extends TaintTracking::SharedTaintStep {
private class AsyncTaintReturn extends TaintTracking::LegacyTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(Function f |
f.isAsync() and
@@ -665,7 +669,7 @@ private module ClosurePromise {
/**
* Taint steps through closure promise methods.
*/
private class ClosurePromiseTaintStep extends TaintTracking::SharedTaintStep {
private class ClosurePromiseTaintStep extends TaintTracking::LegacyTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
// static methods in goog.Promise
exists(DataFlow::CallNode call, string name |
@@ -699,7 +703,7 @@ private module DynamicImportSteps {
* let Foo = await import('./foo');
* ```
*/
class DynamicImportStep extends PreCallGraphStep {
class DynamicImportStep extends LegacyPreCallGraphStep {
override predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
exists(DynamicImportExpr imprt |
pred = imprt.getImportedModule().getAnExportedValue("default") and

View File

@@ -69,7 +69,7 @@ private class ArrayIterationCallbackAsPartialInvoke extends DataFlow::PartialInv
* A flow step propagating the exception thrown from a callback to a method whose name coincides
* a built-in Array iteration method, such as `forEach` or `map`.
*/
private class IteratorExceptionStep extends DataFlow::SharedFlowStep {
private class IteratorExceptionStep extends DataFlow::LegacyFlowStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = ["forEach", "each", "map", "filter", "some", "every", "fold", "reduce"] and
@@ -160,6 +160,15 @@ class StringReplaceCall extends DataFlow::MethodCallNode {
new = ret.getStringValue()
)
}
/**
* Holds if this call takes a regexp containing a wildcard-like term such as `.`.
*
* Also see `RegExp::isWildcardLike`.
*/
final predicate hasRegExpContainingWildcard() {
RegExp::isWildcardLike(this.getRegExp().getRoot().getAChild*())
}
}
/**

View File

@@ -0,0 +1,397 @@
/**
* This contains three step-contribution classes, in order to support graceful deprecation of the old data flow library.
*
* - `class AdditionalFlowStep`: steps used only by the new dataflow library
* - `class LegacyFlowStep`: steps used only by the old data flow library
* - `class SharedFlowStep`: steps used by both
*
* The latter two will be deprecated in the future, but are currently not marked as `deprecated`.
* This is because a library model should be able to support both data flow libraries simultaneously, without itself getting
* deprecation warnings.
*
* To simplify correct consumption of these steps there is a correspondingly-named module for each:
*
* - `module AdditionalFlowStep`: exposes steps from `AdditionalFlowStep` and `SharedFlowStep` subclasses.
* - `module LegacyFlowStep`: exposes steps from `LegacyFlowStep` and `SharedFlowStep` subclasses.
* - `module SharedFlowStep`: exposes steps from all three classes.
*
* This design is intended to simplify consumption of steps, and to ensure existing consumers of `SharedFlowStep`
* outside this codebase will continue to work with as few surprises as possible.
*/
private import javascript
private import semmle.javascript.internal.CachedStages
/**
* A value-preserving data flow edge that should be used in all data flow configurations in
* addition to standard data flow edges.
*
* This class is a singleton, and thus subclasses do not need to specify a characteristic predicate.
*
* As an alternative to this class, consider using `DataFlow::SummarizedCallable`.
*
* Note: For performance reasons, all subclasses of this class should be part
* of the standard library. Use `isAdditionalFlowStep` for query-specific flow steps.
*/
class AdditionalFlowStep extends Unit {
/**
* Holds if `pred` &rarr; `succ` should be considered a value-preserving data flow edge.f
*/
predicate step(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a value-preserving data flow edge that
* crosses calling contexts.
*/
predicate jumpStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` should be stored in the given `content` of the object `succ`.
*/
predicate storeStep(DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ) {
none()
}
/**
* Holds if the given `content` of the object in `pred` should be read into `succ`.
*/
predicate readStep(DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ) {
none()
}
}
/**
* Contains predicates for accessing the steps contributed by `AdditionalFlowStep` and `SharedFlowStep` subclasses.
*/
cached
module AdditionalFlowStep {
cached
private module Internal {
// Forces this to be part of the `FlowSteps` stage.
// We use a public predicate in a private module to avoid warnings about this being unused.
cached
predicate forceStage() { Stages::FlowSteps::ref() }
}
bindingset[a, b]
pragma[inline_late]
private predicate sameContainer(DataFlow::Node a, DataFlow::Node b) {
a.getContainer() = b.getContainer()
}
/**
* Holds if `pred` &rarr; `succ` should be considered a data flow edge.
*/
cached
predicate step(DataFlow::Node pred, DataFlow::Node succ) {
any(AdditionalFlowStep s).step(pred, succ)
or
any(SharedFlowStep s).step(pred, succ) and
sameContainer(pred, succ)
}
/**
* Holds if `pred` &rarr; `succ` should be considered a value-preserving data flow edge that
* crosses calling contexts.
*/
cached
predicate jumpStep(DataFlow::Node pred, DataFlow::Node succ) {
any(AdditionalFlowStep s).jumpStep(pred, succ)
or
any(SharedFlowStep s).step(pred, succ) and
not sameContainer(pred, succ)
}
/**
* Holds if `pred` should be stored in the object `succ` under the property `prop`.
*/
cached
predicate storeStep(DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ) {
any(AdditionalFlowStep s).storeStep(pred, contents, succ)
or
exists(string prop |
any(SharedFlowStep s).storeStep(pred, succ, prop) and
contents = DataFlow::ContentSet::fromLegacyProperty(prop)
)
}
/**
* Holds if the property `prop` of the object `pred` should be read into `succ`.
*/
cached
predicate readStep(DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ) {
any(AdditionalFlowStep s).readStep(pred, contents, succ)
or
exists(string prop |
any(SharedFlowStep s).loadStep(pred, succ, prop) and
contents = DataFlow::ContentSet::fromLegacyProperty(prop)
)
}
}
/**
* A data flow edge that is only seen by the old, deprecated data flow library.
*
* This class is typically used when a step has been replaced by a flow summary. Since the old data flow
* library does not support flow summaries, such a step should remain as a legacy step, until the old data flow
* library can be removed.
*
* Note: For performance reasons, all subclasses of this class should be part
* of the standard library. Override `Configuration::isAdditionalFlowStep`
* for analysis-specific flow steps.
*/
class LegacyFlowStep extends Unit {
/**
* Holds if `pred` &rarr; `succ` should be considered a data flow edge.
*/
predicate step(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* DEPRECATED. The `FlowLabel` class and steps involving flow labels are no longer used by any queries.
*
* Holds if `pred` &rarr; `succ` should be considered a data flow edge
* transforming values with label `predlbl` to have label `succlbl`.
*/
deprecated predicate step(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel predlbl,
DataFlow::FlowLabel succlbl
) {
none()
}
/**
* Holds if `pred` should be stored in the object `succ` under the property `prop`.
* The object `succ` must be a `DataFlow::SourceNode` for the object wherein the value is stored.
*/
predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) { none() }
/**
* Holds if the property `prop` of the object `pred` should be loaded into `succ`.
*/
predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }
/**
* Holds if the property `prop` should be copied from the object `pred` to the object `succ`.
*/
predicate loadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }
/**
* Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
*/
predicate loadStoreStep(
DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
) {
none()
}
}
/**
* Contains predicates for accessing the steps contributed by `LegacyFlowStep` and `SharedFlowStep` subclasses.
*/
cached
module LegacyFlowStep {
/**
* Holds if `pred` &rarr; `succ` should be considered a data flow edge.
*/
cached
predicate step(DataFlow::Node pred, DataFlow::Node succ) {
any(LegacyFlowStep s).step(pred, succ)
or
any(SharedFlowStep s).step(pred, succ)
}
/**
* DEPRECATED. The `FlowLabel` class and steps involving flow labels are no longer used by any queries.
*
* Holds if `pred` &rarr; `succ` should be considered a data flow edge
* transforming values with label `predlbl` to have label `succlbl`.
*/
cached
deprecated predicate step(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel predlbl,
DataFlow::FlowLabel succlbl
) {
any(LegacyFlowStep s).step(pred, succ, predlbl, succlbl)
or
any(SharedFlowStep s).step(pred, succ, predlbl, succlbl)
}
/**
* Holds if `pred` should be stored in the object `succ` under the property `prop`.
* The object `succ` must be a `DataFlow::SourceNode` for the object wherein the value is stored.
*/
cached
predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
any(LegacyFlowStep s).storeStep(pred, succ, prop)
or
any(SharedFlowStep s).storeStep(pred, succ, prop)
}
/**
* Holds if the property `prop` of the object `pred` should be loaded into `succ`.
*/
cached
predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
any(LegacyFlowStep s).loadStep(pred, succ, prop)
or
any(SharedFlowStep s).loadStep(pred, succ, prop)
}
/**
* Holds if the property `prop` should be copied from the object `pred` to the object `succ`.
*/
cached
predicate loadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
any(LegacyFlowStep s).loadStoreStep(pred, succ, prop)
or
any(SharedFlowStep s).loadStoreStep(pred, succ, prop)
}
/**
* Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
*/
cached
predicate loadStoreStep(
DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
) {
any(LegacyFlowStep s).loadStoreStep(pred, succ, loadProp, storeProp)
or
any(SharedFlowStep s).loadStoreStep(pred, succ, loadProp, storeProp)
}
}
/**
* A data flow edge that should be added to all data flow configurations in
* addition to standard data flow edges.
*
* This class is a singleton, and thus subclasses do not need to specify a characteristic predicate.
*
* Note: For performance reasons, all subclasses of this class should be part
* of the standard library. Override `Configuration::isAdditionalFlowStep`
* for analysis-specific flow steps.
*/
class SharedFlowStep extends Unit {
/**
* Holds if `pred` &rarr; `succ` should be considered a data flow edge.
*/
predicate step(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* DEPRECATED. The `FlowLabel` class and steps involving flow labels are no longer used by any queries.
*
* Holds if `pred` &rarr; `succ` should be considered a data flow edge
* transforming values with label `predlbl` to have label `succlbl`.
*/
deprecated predicate step(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel predlbl,
DataFlow::FlowLabel succlbl
) {
none()
}
/**
* Holds if `pred` should be stored in the object `succ` under the property `prop`.
* The object `succ` must be a `DataFlow::SourceNode` for the object wherein the value is stored.
*/
predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) { none() }
/**
* Holds if the property `prop` of the object `pred` should be loaded into `succ`.
*/
predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }
/**
* Holds if the property `prop` should be copied from the object `pred` to the object `succ`.
*/
predicate loadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }
/**
* Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
*/
predicate loadStoreStep(
DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
) {
none()
}
}
/**
* Contains predicates for accessing the steps contributed by `SharedFlowStep`, `LegacyFlowStep`, and `AdditionalFlowStep` subclasses.
*/
module SharedFlowStep {
/**
* Holds if `pred` &rarr; `succ` should be considered a data flow edge.
*/
pragma[inline]
predicate step(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedFlowStep s).step(pred, succ)
or
any(AdditionalFlowStep s).step(pred, succ)
or
any(LegacyFlowStep s).step(pred, succ)
}
/**
* Holds if `pred` should be stored in the object `succ` under the property `prop`.
* The object `succ` must be a `DataFlow::SourceNode` for the object wherein the value is stored.
*/
pragma[inline]
predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
any(SharedFlowStep s).storeStep(pred, succ, prop)
or
any(AdditionalFlowStep s)
.storeStep(pred, DataFlow::ContentSet::property(prop), succ.getALocalUse())
or
any(LegacyFlowStep s).storeStep(pred, succ, prop)
}
/**
* Holds if the property `prop` of the object `pred` should be loaded into `succ`.
*/
pragma[inline]
predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
any(SharedFlowStep s).loadStep(pred, succ, prop)
or
any(AdditionalFlowStep s).readStep(pred, DataFlow::ContentSet::property(prop), succ)
or
any(LegacyFlowStep s).loadStep(pred, succ, prop)
}
// The following are aliases for old step predicates that have no corresponding predicate in AdditionalFlowStep
/**
* DEPRECATED. The `FlowLabel` class and steps involving flow labels are no longer used by any queries.
*
* Holds if `pred` &rarr; `succ` should be considered a data flow edge
* transforming values with label `predlbl` to have label `succlbl`.
*/
deprecated predicate step(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel predlbl,
DataFlow::FlowLabel succlbl
) {
any(SharedFlowStep s).step(pred, succ, predlbl, succlbl)
or
any(LegacyFlowStep s).step(pred, succ, predlbl, succlbl)
}
/**
* Holds if the property `prop` should be copied from the object `pred` to the object `succ`.
*/
cached
predicate loadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
any(SharedFlowStep s).loadStoreStep(pred, succ, prop)
or
any(LegacyFlowStep s).loadStoreStep(pred, succ, prop)
}
/**
* Holds if the property `loadProp` should be copied from the object `pred` to the property `storeProp` of object `succ`.
*/
cached
predicate loadStoreStep(
DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
) {
any(SharedFlowStep s).loadStoreStep(pred, succ, loadProp, storeProp)
or
any(LegacyFlowStep s).loadStoreStep(pred, succ, loadProp, storeProp)
}
}

View File

@@ -0,0 +1,424 @@
/**
* Note: The contents of this file are exposed with the `TaintTracking::` prefix, via an import in `TaintTracking.qll`.
*/
private import javascript
private import semmle.javascript.internal.CachedStages
/**
* A taint-propagating data flow edge that should be added to all taint tracking
* configurations, but only those that use the new data flow library.
*
* This class is a singleton, and thus subclasses do not need to specify a characteristic predicate.
*
* As an alternative to this class, consider using `DataFlow::SummarizedCallable`.
*
* Note: For performance reasons, all subclasses of this class should be part
* of the standard library. Use `isAdditionalFlowStep` for query-specific taint steps.
*/
class AdditionalTaintStep extends Unit {
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge.
*/
predicate step(DataFlow::Node pred, DataFlow::Node succ) { none() }
}
/**
* A taint-propagating data flow edge that should be added to all taint tracking
* configurations in addition to standard data flow edges.
*
* This class is a singleton, and thus subclasses do not need to specify a characteristic predicate.
*
* Note: For performance reasons, all subclasses of this class should be part
* of the standard library. Override `Configuration::isAdditionalTaintStep`
* for analysis-specific taint steps.
*
* This class has multiple kinds of `step` predicates; these all have the same
* effect on taint-tracking configurations. However, the categorization of steps
* allows some data-flow configurations to opt in to specific kinds of taint steps.
*/
class SharedTaintStep extends Unit {
// Each step relation in this class should have a cached version in the `Cached` module
// and be included in the `sharedTaintStep` predicate.
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge.
*/
predicate step(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through URI manipulation.
*
* Does not include string operations that aren't specific to URIs, such
* as concatenation and substring operations.
*/
predicate uriStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge contributed by the heuristics library.
*
* Such steps are provided by the `semmle.javascript.heuristics` libraries
* and will default to be being empty if those libraries are not imported.
*/
predicate heuristicStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through persistent storage.
*/
predicate persistentStorageStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through the heap.
*/
predicate heapStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through arrays.
*
* These steps considers an array to be tainted if it contains tainted elements.
*/
predicate arrayStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through the `state` or `props` or a React component.
*/
predicate viewComponentStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through string concatenation.
*/
predicate stringConcatenationStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through string manipulation (other than concatenation).
*/
predicate stringManipulationStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through data serialization, such as `JSON.stringify`.
*/
predicate serializeStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through data deserialization, such as `JSON.parse`.
*/
predicate deserializeStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through a promise.
*
* These steps consider a promise object to tainted if it can resolve to
* a tainted value.
*/
predicate promiseStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
}
/**
* A taint-propagating data flow edge that should be used with the old data flow library.
*
* This class is a singleton, and thus subclasses do not need to specify a characteristic predicate.
*
* Note: For performance reasons, all subclasses of this class should be part
* of the standard library. Override `Configuration::isAdditionalTaintStep`
* for analysis-specific taint steps.
*
* This class has multiple kinds of `step` predicates; these all have the same
* effect on taint-tracking configurations. However, the categorization of steps
* allows some data-flow configurations to opt in to specific kinds of taint steps.
*/
class LegacyTaintStep extends Unit {
// Each step relation in this class should have a cached version in the `Cached` module
// and be included in the `sharedTaintStep` predicate.
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge.
*/
predicate step(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through URI manipulation.
*
* Does not include string operations that aren't specific to URIs, such
* as concatenation and substring operations.
*/
predicate uriStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge contributed by the heuristics library.
*
* Such steps are provided by the `semmle.javascript.heuristics` libraries
* and will default to be being empty if those libraries are not imported.
*/
predicate heuristicStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through persistent storage.
*/
predicate persistentStorageStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through the heap.
*/
predicate heapStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through arrays.
*
* These steps considers an array to be tainted if it contains tainted elements.
*/
predicate arrayStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through the `state` or `props` or a React component.
*/
predicate viewComponentStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through string concatenation.
*/
predicate stringConcatenationStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through string manipulation (other than concatenation).
*/
predicate stringManipulationStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through data serialization, such as `JSON.stringify`.
*/
predicate serializeStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through data deserialization, such as `JSON.parse`.
*/
predicate deserializeStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through a promise.
*
* These steps consider a promise object to tainted if it can resolve to
* a tainted value.
*/
predicate promiseStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
}
/**
* Module existing only to ensure all taint steps are cached as a single stage,
* and without the the `Unit` type column.
*/
cached
private module Cached {
cached
predicate forceStage() { Stages::Taint::ref() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge, which doesn't fit into a more specific category.
*/
cached
predicate genericStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).step(pred, succ)
or
any(LegacyTaintStep step).step(pred, succ)
}
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge, contribued by the heuristics library.
*/
cached
predicate heuristicStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).heuristicStep(pred, succ)
or
any(LegacyTaintStep step).heuristicStep(pred, succ)
}
/**
* Public taint step relations.
*/
cached
module Public {
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through a URI library function.
*/
cached
predicate uriStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).uriStep(pred, succ)
or
any(LegacyTaintStep step).uriStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through persistent storage.
*/
cached
predicate persistentStorageStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).persistentStorageStep(pred, succ)
or
any(LegacyTaintStep step).persistentStorageStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through the heap.
*/
cached
predicate heapStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).heapStep(pred, succ)
or
any(LegacyTaintStep step).heapStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through an array.
*/
cached
predicate arrayStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).arrayStep(pred, succ)
or
any(LegacyTaintStep step).arrayStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through the
* properties of a view compenent, such as the `state` or `props` of a React component.
*/
cached
predicate viewComponentStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).viewComponentStep(pred, succ)
or
any(LegacyTaintStep step).viewComponentStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through string
* concatenation.
*/
cached
predicate stringConcatenationStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).stringConcatenationStep(pred, succ)
or
any(LegacyTaintStep step).stringConcatenationStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through string manipulation
* (other than concatenation).
*/
cached
predicate stringManipulationStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).stringManipulationStep(pred, succ)
or
any(LegacyTaintStep step).stringManipulationStep(pred, succ)
}
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through data serialization, such as `JSON.stringify`.
*/
cached
predicate serializeStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).serializeStep(pred, succ)
or
any(LegacyTaintStep step).serializeStep(pred, succ)
}
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through data deserialization, such as `JSON.parse`.
*/
cached
predicate deserializeStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).deserializeStep(pred, succ)
or
any(LegacyTaintStep step).deserializeStep(pred, succ)
}
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through a promise.
*
* These steps consider a promise object to tainted if it can resolve to
* a tainted value.
*/
cached
predicate promiseStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).promiseStep(pred, succ)
or
any(LegacyTaintStep step).promiseStep(pred, succ)
}
}
}
import Cached::Public
/**
* Holds if `pred -> succ` is an edge used by all taint-tracking configurations in
* the old data flow library.
*
* The new data flow library uses a different set of steps, exposed by `AdditionalTaintStep::step`.
*/
predicate sharedTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
Cached::genericStep(pred, succ) or
Cached::heuristicStep(pred, succ) or
uriStep(pred, succ) or
persistentStorageStep(pred, succ) or
heapStep(pred, succ) or
arrayStep(pred, succ) or
viewComponentStep(pred, succ) or
stringConcatenationStep(pred, succ) or
stringManipulationStep(pred, succ) or
serializeStep(pred, succ) or
deserializeStep(pred, succ) or
promiseStep(pred, succ)
}
/**
* Contains predicates for accessing the taint steps used by taint-tracking configurations
* in the new data flow library.
*/
module AdditionalTaintStep {
/**
* Holds if `pred` &rarr; `succ` is considered a taint-propagating data flow edge when
* using the new data flow library.
*/
cached
predicate step(DataFlow::Node pred, DataFlow::Node succ) {
any(AdditionalTaintStep step).step(pred, succ) or
any(SharedTaintStep step).step(pred, succ) or
any(SharedTaintStep step).heuristicStep(pred, succ) or
any(SharedTaintStep step).uriStep(pred, succ) or
any(SharedTaintStep step).persistentStorageStep(pred, succ) or
any(SharedTaintStep step).heapStep(pred, succ) or
any(SharedTaintStep step).arrayStep(pred, succ) or
any(SharedTaintStep step).viewComponentStep(pred, succ) or
any(SharedTaintStep step).stringConcatenationStep(pred, succ) or
any(SharedTaintStep step).stringManipulationStep(pred, succ) or
any(SharedTaintStep step).serializeStep(pred, succ) or
any(SharedTaintStep step).deserializeStep(pred, succ) or
any(SharedTaintStep step).promiseStep(pred, succ)
}
}

View File

@@ -1,5 +1,6 @@
/**
* Alias for the library `semmle.javascript.explore.BackwardDataFlow`.
*/
deprecated module;
import semmle.javascript.explore.BackwardDataFlow

View File

@@ -25,6 +25,8 @@ private import internal.DataFlowNode
private import internal.AnalyzedParameters
private import internal.PreCallGraphStep
private import semmle.javascript.internal.CachedStages
private import semmle.javascript.dataflow.internal.DataFlowPrivate as Private
private import semmle.javascript.dataflow.internal.VariableOrThis
module DataFlow {
/**
@@ -182,29 +184,8 @@ module DataFlow {
*/
cached
DataFlow::Node getImmediatePredecessor() {
lvalueFlowStep(result, this) and
not lvalueDefaultFlowStep(_, this)
or
immediateFlowStep(result, this)
or
// Refinement of variable -> original definition of variable
exists(SsaRefinementNode refinement |
this = TSsaDefNode(refinement) and
result = TSsaDefNode(refinement.getAnInput())
)
or
exists(SsaPhiNode phi |
this = TSsaDefNode(phi) and
result = TSsaDefNode(phi.getRephinedVariable())
)
or
// IIFE call -> return value of IIFE
exists(Function fun |
localCall(this.asExpr(), fun) and
result = unique(Expr ret | ret = fun.getAReturnedExpr()).flow() and
not fun.getExit().isJoin() // can only reach exit by the return statement
)
or
FlowSteps::identityFunctionStep(result, this)
}
@@ -271,6 +252,11 @@ module DataFlow {
or
this.getFallbackTypeAnnotation().getAnUnderlyingType().hasQualifiedName(moduleName, typeName)
}
/**
* Gets the post-update node corresponding to this node, if any.
*/
final PostUpdateNode getPostUpdateNode() { result.getPreUpdateNode() = this }
}
/**
@@ -744,9 +730,7 @@ module DataFlow {
private class ParameterFieldAsPropWrite extends PropWrite, PropNode {
override ParameterField prop;
override Node getBase() {
thisNode(result, prop.getDeclaringClass().getConstructor().getBody())
}
override Node getBase() { result = TImplicitThisUse(prop, false) }
override Expr getPropertyNameExpr() {
none() // The parameter value is not the name of the field
@@ -754,16 +738,11 @@ module DataFlow {
override string getPropertyName() { result = prop.getName() }
override Node getRhs() {
exists(Parameter param, Node paramNode |
param = prop.getParameter() and
parameterNode(paramNode, param)
|
result = paramNode
)
}
override Node getRhs() { result = TValueNode(prop.getParameter()) }
override ControlFlowNode getWriteNode() { result = prop.getParameter() }
override StmtContainer getContainer() { parameter_fields(prop, result, _) }
}
/**
@@ -778,9 +757,7 @@ module DataFlow {
exists(prop.getInit())
}
override Node getBase() {
thisNode(result, prop.getDeclaringClass().getConstructor().getBody())
}
override Node getBase() { result = TImplicitThisUse(prop, false) }
override Expr getPropertyNameExpr() { result = prop.getNameExpr() }
@@ -971,6 +948,12 @@ module DataFlow {
override BasicBlock getBasicBlock() { result = function.getExit().getBasicBlock() }
override StmtContainer getContainer() {
// Override this to ensure a container exists even for unreachable returns,
// since an unreachable exit CFG node will not have a basic block
result = function
}
/**
* Gets the function corresponding to this exceptional return node.
*/
@@ -993,6 +976,12 @@ module DataFlow {
override BasicBlock getBasicBlock() { result = function.getExit().getBasicBlock() }
override StmtContainer getContainer() {
// Override this to ensure a container exists even for unreachable returns,
// since an unreachable exit CFG node will not have a basic block
result = function
}
/**
* Gets the function corresponding to this return node.
*/
@@ -1052,6 +1041,41 @@ module DataFlow {
override string toString() { result = "global access path" }
}
/**
* A node representing the value passed as `this` argument in a `new` call.
*/
class NewCallThisArgumentNode extends TNewCallThisArgument, DataFlow::Node {
private NewExpr expr;
NewCallThisArgumentNode() { this = TNewCallThisArgument(expr) }
override string toString() { result = "implicit 'this' argument of " + expr }
override StmtContainer getContainer() { result = expr.getContainer() }
override Location getLocation() { result = expr.getLocation() }
}
/**
* A node representing an implicit use of `this` or its post-update node.
*/
private class ImplicitThisUseNode extends TImplicitThisUse, DataFlow::Node {
private ImplicitThisUse use;
private boolean isPost;
ImplicitThisUseNode() { this = TImplicitThisUse(use, isPost) }
override string toString() {
if isPost = false
then result = "implicit 'this'"
else result = "[post-update] implicit 'this'"
}
override StmtContainer getContainer() { result = use.getUseContainer() }
override Location getLocation() { result = use.getLocation() }
}
/**
* INTERNAL. DO NOT USE.
*
@@ -1076,6 +1100,14 @@ module DataFlow {
* instead.
*/
module Impl {
/**
* INTERNAL. DO NOT USE.
*
* An alias for `Node.getImmediatePredecessor` that can be used at an earlier stage
* that does not depend on `DataFlow::Node`.
*/
predicate earlyStageImmediateFlowStep = immediateFlowStep/2;
/**
* A data flow node representing a function invocation, either explicitly or reflectively,
* and either with or without `new`.
@@ -1342,6 +1374,61 @@ module DataFlow {
override Location getLocation() { result = this.getTag().getLocation() }
override string toString() { result = this.getTag().toString() }
override StmtContainer getContainer() { result = this.getTag().getInnerTopLevel() }
}
/**
* A node representing the hidden parameter of a function by which a function can refer to itself.
*/
class FunctionSelfReferenceNode extends DataFlow::Node, TFunctionSelfReferenceNode {
private Function function;
FunctionSelfReferenceNode() { this = TFunctionSelfReferenceNode(function) }
/** Gets the function. */
Function getFunction() { result = function }
override StmtContainer getContainer() { result = function }
override BasicBlock getBasicBlock() { result = function.getEntryBB() }
override string toString() { result = "[function self-reference] " + function.toString() }
override Location getLocation() { result = function.getLocation() }
}
/**
* A post-update node whose pre-node corresponds to an expression. See `DataFlow::PostUpdateNode` for more details.
*/
class ExprPostUpdateNode extends DataFlow::Node, TExprPostUpdateNode, Private::PostUpdateNode {
private AST::ValueNode expr;
ExprPostUpdateNode() { this = TExprPostUpdateNode(expr) }
/** Gets the expression for which this is the post-update node. */
AST::ValueNode getExpr() { result = expr }
override StmtContainer getContainer() { result = expr.getContainer() }
override Location getLocation() { result = expr.getLocation() }
override string toString() { result = "[post update] " + expr.toString() }
}
/**
* A post-update node.
*
* This is a data-flow node that represents the new state of an object after its contents have been mutated.
* Most notably such nodes exist for arguments to a call and for the base of a property reference.
*/
class PostUpdateNode extends DataFlow::Node {
PostUpdateNode() { Private::postUpdatePair(_, this) }
/**
* Gets the corresponding pre-update node, which is usually the argument to a call or the base of a property reference.
*/
final DataFlow::Node getPreUpdateNode() { Private::postUpdatePair(result, this) }
}
/**
@@ -1374,12 +1461,12 @@ module DataFlow {
/**
* INTERNAL: Use `parameterNode(Parameter)` instead.
*/
predicate parameterNode(DataFlow::Node nd, Parameter p) { nd = valueNode(p) }
predicate parameterNode(EarlyStageNode nd, Parameter p) { nd = TValueNode(p) }
/**
* INTERNAL: Use `thisNode(StmtContainer container)` instead.
*/
predicate thisNode(DataFlow::Node node, StmtContainer container) { node = TThisNode(container) }
predicate thisNode(EarlyStageNode node, StmtContainer container) { node = TThisNode(container) }
/**
* Gets the node representing the receiver of the given function, or `this` in the given top-level.
@@ -1441,7 +1528,15 @@ module DataFlow {
* _before_ the l-value is assigned to, whereas `DataFlow::lvalueNode()`
* represents the value _after_ the assignment.
*/
Node lvalueNode(BindingPattern lvalue) {
Node lvalueNode(BindingPattern lvalue) { result = lvalueNodeInternal(lvalue) }
/**
* INTERNAL: Do not use outside standard library.
*
* Same as `lvalueNode()` except the return type is `EarlyStageNode`, which allows it to be used
* before all data flow nodes have been materialised.
*/
EarlyStageNode lvalueNodeInternal(BindingPattern lvalue) {
exists(SsaExplicitDefinition ssa |
ssa.defines(lvalue.(LValue).getDefNode(), lvalue.(VarRef).getVariable()) and
result = TSsaDefNode(ssa)
@@ -1489,31 +1584,31 @@ module DataFlow {
* Holds if there is a step from `pred -> succ` due to an assignment
* to an expression in l-value position.
*/
private predicate lvalueFlowStep(Node pred, Node succ) {
private predicate lvalueFlowStep(EarlyStageNode pred, EarlyStageNode succ) {
exists(VarDef def |
pred = valueNode(defSourceNode(def)) and
succ = lvalueNode(def.getTarget())
pred = TValueNode(defSourceNode(def)) and
succ = lvalueNodeInternal(def.getTarget())
)
or
exists(SimpleParameter param |
pred = valueNode(param) and // The value node represents the incoming argument
succ = lvalueNode(param) // The SSA node represents the parameters's local variable
pred = TValueNode(param) and // The value node represents the incoming argument
succ = lvalueNodeInternal(param) // The SSA node represents the parameters's local variable
)
or
exists(Expr arg, Parameter param |
localArgumentPassing(arg, param) and
pred = valueNode(arg) and
succ = valueNode(param)
pred = TValueNode(arg) and
succ = TValueNode(param)
)
or
exists(PropertyPattern pattern |
pred = TPropNode(pattern) and
succ = lvalueNode(pattern.getValuePattern())
succ = lvalueNodeInternal(pattern.getValuePattern())
)
or
exists(Expr element |
pred = TElementPatternNode(_, element) and
succ = lvalueNode(element)
succ = lvalueNodeInternal(element)
)
}
@@ -1521,37 +1616,37 @@ module DataFlow {
* Holds if there is a step from `pred -> succ` from the default
* value of a destructuring pattern or parameter.
*/
private predicate lvalueDefaultFlowStep(Node pred, Node succ) {
private predicate lvalueDefaultFlowStep(EarlyStageNode pred, EarlyStageNode succ) {
exists(PropertyPattern pattern |
pred = TValueNode(pattern.getDefault()) and
succ = lvalueNode(pattern.getValuePattern())
succ = lvalueNodeInternal(pattern.getValuePattern())
)
or
exists(ArrayPattern array, int i |
pred = TValueNode(array.getDefault(i)) and
succ = lvalueNode(array.getElement(i))
succ = lvalueNodeInternal(array.getElement(i))
)
or
exists(Parameter param |
pred = TValueNode(param.getDefault()) and
parameterNode(succ, param)
succ = TValueNode(param)
)
}
/**
* Flow steps shared between `getImmediatePredecessor` and `localFlowStep`.
* Flow steps shared between `immediateFlowStep` and `localFlowStep`.
*
* Inlining is forced because the two relations are indexed differently.
*/
pragma[inline]
private predicate immediateFlowStep(Node pred, Node succ) {
private predicate immediateFlowStepShared(EarlyStageNode pred, EarlyStageNode succ) {
exists(SsaVariable v |
pred = TSsaDefNode(v.getDefinition()) and
succ = valueNode(v.getAUse())
succ = TValueNode(v.getAUse())
)
or
exists(Expr predExpr, Expr succExpr |
pred = valueNode(predExpr) and succ = valueNode(succExpr)
pred = TValueNode(predExpr) and succ = TValueNode(succExpr)
|
predExpr = succExpr.(ParExpr).getExpression()
or
@@ -1581,25 +1676,61 @@ module DataFlow {
// flow from 'this' parameter into 'this' expressions
exists(ThisExpr thiz |
pred = TThisNode(thiz.getBindingContainer()) and
succ = valueNode(thiz)
succ = TValueNode(thiz)
)
or
// `f.call(...)` and `f.apply(...)` evaluate to the result of the reflective call they perform
pred = TReflectiveCallNode(succ.asExpr(), _)
exists(MethodCallExpr call |
pred = TReflectiveCallNode(call, _) and
succ = TValueNode(call)
)
or
// Pass 'this' into implicit uses of 'this'
exists(ImplicitThisUse use |
pred = TThisNode(use.getBindingContainer()) and
succ = TImplicitThisUse(use, false)
)
}
pragma[nomagic]
private predicate immediateFlowStep(EarlyStageNode pred, EarlyStageNode succ) {
lvalueFlowStep(pred, succ) and
not lvalueDefaultFlowStep(_, succ)
or
immediateFlowStepShared(pred, succ)
or
// Refinement of variable -> original definition of variable
exists(SsaRefinementNode refinement |
succ = TSsaDefNode(refinement) and
pred = TSsaDefNode(refinement.getAnInput())
)
or
exists(SsaPhiNode phi |
succ = TSsaDefNode(phi) and
pred = TSsaDefNode(phi.getRephinedVariable())
)
or
// IIFE call -> return value of IIFE
exists(Function fun, Expr expr |
succ = TValueNode(expr) and
localCall(expr, fun) and
pred = TValueNode(unique(Expr ret | ret = fun.getAReturnedExpr())) and
not fun.getExit().isJoin() // can only reach exit by the return statement
)
}
/**
* Holds if data can flow from `pred` to `succ` in one local step.
*/
cached
predicate localFlowStep(Node pred, Node succ) {
Stages::DataFlowStage::ref() and
predicate localFlowStep(EarlyStageNode pred, EarlyStageNode succ) {
Stages::EarlyDataFlowStage::ref() and
// flow from RHS into LHS
lvalueFlowStep(pred, succ)
or
lvalueDefaultFlowStep(pred, succ)
or
immediateFlowStep(pred, succ)
immediateFlowStepShared(pred, succ)
or
// From an assignment or implicit initialization of a captured variable to its flow-insensitive node.
exists(SsaDefinition predDef |
@@ -1623,7 +1754,7 @@ module DataFlow {
)
or
exists(Expr predExpr, Expr succExpr |
pred = valueNode(predExpr) and succ = valueNode(succExpr)
pred = TValueNode(predExpr) and succ = TValueNode(succExpr)
|
predExpr = succExpr.(LogicalOrExpr).getAnOperand()
or
@@ -1641,18 +1772,17 @@ module DataFlow {
or
// from returned expr to the FunctionReturnNode.
exists(Function f | not f.isAsyncOrGenerator() |
DataFlow::functionReturnNode(succ, f) and pred = valueNode(f.getAReturnedExpr())
succ = TFunctionReturnNode(f) and pred = TValueNode(f.getAReturnedExpr())
)
or
// from a reflective params node to a reference to the arguments object.
exists(DataFlow::ReflectiveParametersNode params, Function f | f = params.getFunction() |
succ = f.getArgumentsVariable().getAnAccess().flow() and
pred = params
exists(Function f |
pred = TReflectiveParametersNode(f) and
succ = TValueNode(f.getArgumentsVariable().getAnAccess())
)
}
/** A load step from a reflective parameter node to each parameter. */
private class ReflectiveParamsStep extends PreCallGraphStep {
private class ReflectiveParamsStep extends LegacyPreCallGraphStep {
override predicate loadStep(DataFlow::Node obj, DataFlow::Node element, string prop) {
exists(DataFlow::ReflectiveParametersNode params, DataFlow::FunctionNode f, int i |
f.getFunction() = params.getFunction() and
@@ -1664,7 +1794,7 @@ module DataFlow {
}
/** A taint step from the reflective parameters node to any parameter. */
private class ReflectiveParamsTaintStep extends TaintTracking::SharedTaintStep {
private class ReflectiveParamsTaintStep extends TaintTracking::LegacyTaintStep {
override predicate step(DataFlow::Node obj, DataFlow::Node element) {
exists(DataFlow::ReflectiveParametersNode params, DataFlow::FunctionNode f |
f.getFunction() = params.getFunction() and
@@ -1799,7 +1929,11 @@ module DataFlow {
import Nodes
import Sources
import TypeInference
import Configuration
deprecated import Configuration
import TypeTracking
import AdditionalFlowSteps
import internal.FunctionWrapperSteps
import internal.sharedlib.DataFlow
import internal.BarrierGuards
import FlowSummary
}

View File

@@ -0,0 +1,83 @@
/** Provides classes and predicates for defining flow summaries. */
private import javascript
private import semmle.javascript.dataflow.internal.sharedlib.FlowSummaryImpl as Impl
private import semmle.javascript.dataflow.internal.FlowSummaryPrivate
private import semmle.javascript.dataflow.internal.sharedlib.DataFlowImplCommon as DataFlowImplCommon
private import semmle.javascript.dataflow.internal.DataFlowPrivate
/**
* A model for a function that can propagate data flow.
*
* This class makes it possible to model flow through functions, using the same mechanism as
* `summaryModel` as described in the [library customization docs](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript).
*
* Extend this class to define summary models directly in CodeQL.
* Data extensions and `summaryModel` are usually preferred; but there are a few cases where direct use of this class may be needed:
*
* - The relevant call sites cannot be matched by the access path syntax, and require the full power of CodeQL.
* For example, complex overloading patterns might require more local reasoning at the call site.
* - The input/output behaviour cannot be described statically in the access path syntax, but the relevant access paths
* can be generated dynamically in CodeQL, based on the usages found in the codebase.
*
* Subclasses should bind `this` to a unique identifier for the function being modelled. There is no special
* interpreation of the `this` value, it should just not clash with the `this`-value used by other classes.
*
* For example, this models flow through calls such as `require("my-library").myFunction()`:
* ```codeql
* class MyFunction extends SummarizedCallable {
* MyFunction() { this = "MyFunction" }
*
* override predicate propagatesFlow(string input, string output, boolean preservesValues) {
* input = "Argument[0]" and
* output = "ReturnValue" and
* preservesValue = false
* }
*
* override DataFlow::InvokeNode getACall() {
* result = API::moduleImport("my-library").getMember("myFunction").getACall()
* }
* }
* ```
* This would be equivalent to the following model written as a data extension:
* ```yaml
* extensions:
* - addsTo:
* pack: codeql/javascript-all
* extensible: summaryModel
* data:
* - ["my-library", "Member[myFunction]", "Argument[0]", "ReturnValue", "taint"]
* ```
*/
abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable {
bindingset[this]
SummarizedCallable() { any() }
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step or a taint-step.
*
* See the [library customization docs](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript) for
* the syntax of the `input` and `output` parameters.
*/
pragma[nomagic]
predicate propagatesFlow(string input, string output, boolean preservesValue) { none() }
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
this.propagatesFlow(input, output, preservesValue) and model = this
}
/**
* Gets the synthesized parameter that results from an input specification
* that starts with `Argument[s]` for this library callable.
*/
DataFlow::ParameterNode getParameter(string s) {
exists(ParameterPosition pos |
DataFlowImplCommon::parameterNode(result, MkLibraryCallable(this), pos) and
s = encodeParameterPosition(pos)
)
}
}

View File

@@ -1,5 +1,6 @@
/**
* Alias for the library `semmle.javascript.explore.ForwardDataFlow`.
*/
deprecated module;
import semmle.javascript.explore.ForwardDataFlow

View File

@@ -1611,7 +1611,12 @@ class RegExpConstructorInvokeNode extends DataFlow::InvokeNode {
* Gets the AST of the regular expression created here, provided that the
* first argument is a string literal.
*/
RegExpTerm getRoot() { result = this.getArgument(0).asExpr().(StringLiteral).asRegExp() }
RegExpTerm getRoot() {
result = this.getArgument(0).asExpr().(StringLiteral).asRegExp()
or
// In case someone writes `new RegExp(/foo/)` for some reason
result = this.getArgument(0).asExpr().(RegExpLiteral).getRoot()
}
/**
* Gets the flags provided in the second argument, or an empty string if no
@@ -1703,3 +1708,18 @@ class RegExpCreationNode extends DataFlow::SourceNode {
result = this.getAReference(DataFlow::TypeTracker::end())
}
}
/**
* A guard node for a variable in a negative condition, such as `x` in `if(!x)`.
* Can be added to a `isBarrier` in a data-flow configuration to block flow through such checks.
*/
class VarAccessBarrier extends DataFlow::Node {
VarAccessBarrier() {
exists(ConditionGuardNode guard, SsaRefinementNode refinement |
this = DataFlow::ssaDefinitionNode(refinement) and
refinement.getGuard() = guard and
guard.getTest() instanceof VarAccess and
guard.getOutcome() = false
)
}
}

View File

@@ -8,9 +8,6 @@
* substrings. As for data flow configurations, additional flow edges can be
* specified, and conversely certain nodes or edges can be designated as taint
* _sanitizers_ that block flow.
*
* NOTE: The API of this library is not stable yet and may change in
* the future.
*/
import javascript
@@ -18,19 +15,29 @@ private import semmle.javascript.dataflow.internal.FlowSteps as FlowSteps
private import semmle.javascript.Unit
private import semmle.javascript.dataflow.InferredTypes
private import semmle.javascript.internal.CachedStages
private import semmle.javascript.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
/**
* Provides classes for modeling taint propagation.
*/
module TaintTracking {
import AdditionalTaintSteps
/**
* DEPRECATED.
* Subclasses of this class should be replaced by a module implementing the new `ConfigSig` or `StateConfigSig` interface.
* See the [migration guide](https://codeql.github.com/docs/codeql-language-guides/migrating-javascript-dataflow-queries) for more details.
*
* When migrating a `TaintTracking::Configuration` to `DataFlow::ConfigSig`, use `TaintTracking::Global<...>` instead of `DataFlow::Global<...>`.
*
* #### Legacy documentation
* A data flow tracking configuration that considers taint propagation through
* objects, arrays, promises and strings in addition to standard data flow.
*
* If a different set of flow edges is desired, extend this class and override
* `isAdditionalTaintStep`.
*/
abstract class Configuration extends DataFlow::Configuration {
abstract deprecated class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }
@@ -171,20 +178,88 @@ module TaintTracking {
}
/**
* A `SanitizerGuardNode` that controls which taint tracking
* configurations it is used in.
* A barrier guard that applies to all taint-tracking configurations.
*
* Note: For performance reasons, all subclasses of this class should be part
* of the standard library. Override `Configuration::isSanitizerGuard`
* for analysis-specific taint sanitizer guards.
* of the standard library. To define a query-specific barrier guard, instead override
* `isBarrier` and use the `DataFlow::MakeBarrierGuard` module. For example:
* ```codeql
* module MyConfig implements DataFlow::ConfigSig {
* predicate isBarrier(DataFlow::Node node) {
* node = DataFlow::MakeBarrierGuard<MyGuard>
* }
* }
* class MyGuard extends DataFlow::Node {
* MyGuard() { ... }
* predicate blocksExpr(boolean outcome, Expr e) { ... }
* }
*/
abstract class AdditionalBarrierGuard extends DataFlow::Node {
/**
* Holds if this node blocks expression `e`, provided it evaluates to `outcome`.
*/
abstract predicate blocksExpr(boolean outcome, Expr e);
}
/**
* Internal barrier guard class that populates both the new `AdditionalBarrierGuard` class
* and the legacy `AdditionalSanitizerGuardNode` class.
*
* It exposes the member predicates of `AdditionalSanitizerGuardNode` for backwards compatibility.
*/
abstract private class LegacyAdditionalBarrierGuard extends AdditionalBarrierGuard,
AdditionalSanitizerGuardNodeDeprecated
{
deprecated override predicate sanitizes(boolean outcome, Expr e) { this.blocksExpr(outcome, e) }
deprecated override predicate appliesTo(Configuration cfg) { any() }
}
/**
* DEPRECATED. This class was part of the old data flow library which is now deprecated.
* Use `TaintTracking::AdditionalBarrierGuard` instead.
*/
deprecated class AdditionalSanitizerGuardNode = AdditionalSanitizerGuardNodeDeprecated;
cached
abstract class AdditionalSanitizerGuardNode extends SanitizerGuardNode {
abstract private class AdditionalSanitizerGuardNodeDeprecated extends DataFlow::Node {
// For backwards compatibility, this contains a copy of the SanitizerGuard interface,
// but is does not inherit from it as that would cause re-evaluation of cached barriers.
/**
* Holds if this node blocks expression `e`, provided it evaluates to `outcome`.
*/
cached
deprecated predicate blocks(boolean outcome, Expr e) { none() }
/**
* Holds if this node sanitizes expression `e`, provided it evaluates
* to `outcome`.
*/
cached
abstract deprecated predicate sanitizes(boolean outcome, Expr e);
/**
* Holds if this node blocks expression `e` from flow of type `label`, provided it evaluates to `outcome`.
*/
cached
deprecated predicate blocks(boolean outcome, Expr e, DataFlow::FlowLabel label) {
this.sanitizes(outcome, e) and label.isTaint()
or
this.sanitizes(outcome, e, label)
}
/**
* Holds if this node sanitizes expression `e`, provided it evaluates
* to `outcome`.
*/
cached
deprecated predicate sanitizes(boolean outcome, Expr e, DataFlow::FlowLabel label) { none() }
/**
* Holds if this guard applies to the flow in `cfg`.
*/
cached
abstract predicate appliesTo(Configuration cfg);
abstract deprecated predicate appliesTo(Configuration cfg);
}
/**
@@ -199,7 +274,7 @@ module TaintTracking {
* implementations of `sanitizes` will _both_ apply to any configuration that includes either of
* them.
*/
abstract class SanitizerGuardNode extends DataFlow::BarrierGuardNode {
abstract deprecated class SanitizerGuardNode extends DataFlow::BarrierGuardNode {
override predicate blocks(boolean outcome, Expr e) { none() }
/**
@@ -224,255 +299,12 @@ module TaintTracking {
/**
* A sanitizer guard node that only blocks specific flow labels.
*/
abstract class LabeledSanitizerGuardNode extends SanitizerGuardNode, DataFlow::BarrierGuardNode {
abstract deprecated class LabeledSanitizerGuardNode extends SanitizerGuardNode,
DataFlow::BarrierGuardNode
{
override predicate sanitizes(boolean outcome, Expr e) { none() }
}
/**
* A taint-propagating data flow edge that should be added to all taint tracking
* configurations in addition to standard data flow edges.
*
* This class is a singleton, and thus subclasses do not need to specify a characteristic predicate.
*
* Note: For performance reasons, all subclasses of this class should be part
* of the standard library. Override `Configuration::isAdditionalTaintStep`
* for analysis-specific taint steps.
*
* This class has multiple kinds of `step` predicates; these all have the same
* effect on taint-tracking configurations. However, the categorization of steps
* allows some data-flow configurations to opt in to specific kinds of taint steps.
*/
class SharedTaintStep extends Unit {
// Each step relation in this class should have a cached version in the `Cached` module
// and be included in the `sharedTaintStep` predicate.
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge.
*/
predicate step(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through URI manipulation.
*
* Does not include string operations that aren't specific to URIs, such
* as concatenation and substring operations.
*/
predicate uriStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge contributed by the heuristics library.
*
* Such steps are provided by the `semmle.javascript.heuristics` libraries
* and will default to be being empty if those libraries are not imported.
*/
predicate heuristicStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through persistent storage.
*/
predicate persistentStorageStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through the heap.
*/
predicate heapStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through arrays.
*
* These steps considers an array to be tainted if it contains tainted elements.
*/
predicate arrayStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through the `state` or `props` or a React component.
*/
predicate viewComponentStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through string concatenation.
*/
predicate stringConcatenationStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through string manipulation (other than concatenation).
*/
predicate stringManipulationStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through data serialization, such as `JSON.stringify`.
*/
predicate serializeStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through data deserialization, such as `JSON.parse`.
*/
predicate deserializeStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through a promise.
*
* These steps consider a promise object to tainted if it can resolve to
* a tainted value.
*/
predicate promiseStep(DataFlow::Node pred, DataFlow::Node succ) { none() }
}
/**
* Module existing only to ensure all taint steps are cached as a single stage,
* and without the the `Unit` type column.
*/
cached
private module Cached {
cached
predicate forceStage() { Stages::Taint::ref() }
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge, which doesn't fit into a more specific category.
*/
cached
predicate genericStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).step(pred, succ)
}
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge, contribued by the heuristics library.
*/
cached
predicate heuristicStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).heuristicStep(pred, succ)
}
/**
* Public taint step relations.
*/
cached
module Public {
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through a URI library function.
*/
cached
predicate uriStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).uriStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through persistent storage.
*/
cached
predicate persistentStorageStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).persistentStorageStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through the heap.
*/
cached
predicate heapStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).heapStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through an array.
*/
cached
predicate arrayStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).arrayStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through the
* properties of a view compenent, such as the `state` or `props` of a React component.
*/
cached
predicate viewComponentStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).viewComponentStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through string
* concatenation.
*/
cached
predicate stringConcatenationStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).stringConcatenationStep(pred, succ)
}
/**
* Holds if `pred -> succ` is a taint propagating data flow edge through string manipulation
* (other than concatenation).
*/
cached
predicate stringManipulationStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).stringManipulationStep(pred, succ)
}
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through data serialization, such as `JSON.stringify`.
*/
cached
predicate serializeStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).serializeStep(pred, succ)
}
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through data deserialization, such as `JSON.parse`.
*/
cached
predicate deserializeStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).deserializeStep(pred, succ)
}
/**
* Holds if `pred` &rarr; `succ` should be considered a taint-propagating
* data flow edge through a promise.
*
* These steps consider a promise object to tainted if it can resolve to
* a tainted value.
*/
cached
predicate promiseStep(DataFlow::Node pred, DataFlow::Node succ) {
any(SharedTaintStep step).promiseStep(pred, succ)
}
}
}
import Cached::Public
/**
* Holds if `pred -> succ` is an edge used by all taint-tracking configurations.
*/
predicate sharedTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
Cached::genericStep(pred, succ) or
Cached::heuristicStep(pred, succ) or
uriStep(pred, succ) or
persistentStorageStep(pred, succ) or
heapStep(pred, succ) or
arrayStep(pred, succ) or
viewComponentStep(pred, succ) or
stringConcatenationStep(pred, succ) or
stringManipulationStep(pred, succ) or
serializeStep(pred, succ) or
deserializeStep(pred, succ) or
promiseStep(pred, succ)
}
/** Gets a data flow node referring to the client side URL. */
private DataFlow::SourceNode clientSideUrlRef(DataFlow::TypeTracker t) {
t.start() and
@@ -497,11 +329,19 @@ module TaintTracking {
exists(StringSplitCall c |
c.getBaseString().getALocalSource() =
[DOM::locationRef(), DOM::locationRef().getAPropertyRead("href")] and
c.getSeparator() = "?" and
c.getSeparator() = ["?", "#"] and
read = c.getAPropertyRead("0")
)
}
private class HeapLegacyTaintStep extends LegacyTaintStep {
override predicate heapStep(DataFlow::Node pred, DataFlow::Node succ) {
// arrays with tainted elements are tainted (in old data flow)
succ.(DataFlow::ArrayCreationNode).getAnElement() = pred and
not any(PromiseAllCreation call).getArrayNode() = succ
}
}
/**
* A taint propagating data flow edge through object or array elements and
* promises.
@@ -516,10 +356,6 @@ module TaintTracking {
// spreading a tainted value into an array literal gives a tainted array
succ.(DataFlow::ArrayCreationNode).getASpreadArgument() = pred
or
// arrays with tainted elements and objects with tainted property names are tainted
succ.(DataFlow::ArrayCreationNode).getAnElement() = pred and
not any(PromiseAllCreation call).getArrayNode() = succ
or
// reading from a tainted object yields a tainted result
succ.(DataFlow::PropRead).getBase() = pred and
not (
@@ -594,6 +430,16 @@ module TaintTracking {
}
}
private class LegacySplitTaintStep extends LegacyTaintStep {
override predicate stringManipulationStep(DataFlow::Node pred, DataFlow::Node target) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = "split" and
pred = call.getReceiver() and
target = call
)
}
}
/**
* A taint propagating data flow edge arising from string manipulation
* functions defined in the standard library.
@@ -610,9 +456,9 @@ module TaintTracking {
[
"anchor", "big", "blink", "bold", "concat", "fixed", "fontcolor", "fontsize",
"italics", "link", "padEnd", "padStart", "repeat", "replace", "replaceAll", "slice",
"small", "split", "strike", "sub", "substr", "substring", "sup",
"toLocaleLowerCase", "toLocaleUpperCase", "toLowerCase", "toUpperCase", "trim",
"trimLeft", "trimRight", "toWellFormed"
"small", "strike", "sub", "substr", "substring", "sup", "toLocaleLowerCase",
"toLocaleUpperCase", "toLowerCase", "toUpperCase", "trim", "trimLeft", "trimRight",
"toWellFormed"
]
or
// sorted, interesting, properties of Object.prototype
@@ -652,26 +498,29 @@ module TaintTracking {
]).getACall() and
pred = c.getArgument(0)
)
or
// In and out of .replace callbacks
exists(StringReplaceCall call |
// Into the callback if the regexp does not sanitize matches
hasWildcardReplaceRegExp(call) and
pred = call.getReceiver() and
succ = call.getReplacementCallback().getParameter(0)
or
// Out of the callback
pred = call.getReplacementCallback().getReturnNode() and
succ = call
)
)
}
}
/** Holds if the given call takes a regexp containing a wildcard. */
pragma[noinline]
private predicate hasWildcardReplaceRegExp(StringReplaceCall call) {
RegExp::isWildcardLike(call.getRegExp().getRoot().getAChild*())
/**
* A taint propagating edge for the string `replace` function.
*
* This is a legacy step as it crosses a function boundary, and would thus be converted to a jump step.
*/
private class ReplaceCallbackSteps extends LegacyTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
// In and out of .replace callbacks
exists(StringReplaceCall call |
// Into the callback if the regexp does not sanitize matches
call.hasRegExpContainingWildcard() and
pred = call.getReceiver() and
succ = call.getReplacementCallback().getParameter(0)
or
// Out of the callback
pred = call.getReplacementCallback().getReturnNode() and
succ = call
)
}
}
/**
@@ -969,7 +818,7 @@ module TaintTracking {
* A conditional checking a tainted string against a regular expression, which is
* considered to be a sanitizer for all configurations.
*/
class SanitizingRegExpTest extends AdditionalSanitizerGuardNode, DataFlow::ValueNode {
class SanitizingRegExpTest extends LegacyAdditionalBarrierGuard, DataFlow::ValueNode {
Expr expr;
boolean sanitizedOutcome;
@@ -1002,12 +851,10 @@ module TaintTracking {
private boolean getSanitizedOutcome() { result = sanitizedOutcome }
override predicate sanitizes(boolean outcome, Expr e) {
override predicate blocksExpr(boolean outcome, Expr e) {
outcome = sanitizedOutcome and
e = expr
}
override predicate appliesTo(Configuration cfg) { any() }
}
/**
@@ -1017,14 +864,14 @@ module TaintTracking {
*
* Note that the `includes` method is covered by `MembershipTestSanitizer`.
*/
class WhitelistContainmentCallSanitizer extends AdditionalSanitizerGuardNode,
class WhitelistContainmentCallSanitizer extends LegacyAdditionalBarrierGuard,
DataFlow::MethodCallNode
{
WhitelistContainmentCallSanitizer() {
this.getMethodName() = ["contains", "has", "hasOwnProperty", "hasOwn"]
}
override predicate sanitizes(boolean outcome, Expr e) {
override predicate blocksExpr(boolean outcome, Expr e) {
exists(int propertyIndex |
if this.getMethodName() = "hasOwn" then propertyIndex = 1 else propertyIndex = 0
|
@@ -1032,8 +879,6 @@ module TaintTracking {
e = this.getArgument(propertyIndex).asExpr()
)
}
override predicate appliesTo(Configuration cfg) { any() }
}
/**
@@ -1043,33 +888,40 @@ module TaintTracking {
*
* This sanitizer is not enabled by default.
*/
class AdHocWhitelistCheckSanitizer extends SanitizerGuardNode, DataFlow::CallNode {
class AdHocWhitelistCheckSanitizer extends DataFlow::CallNode {
AdHocWhitelistCheckSanitizer() {
this.getCalleeName()
.regexpMatch("(?i).*((?<!un)safe|whitelist|(?<!in)valid|allow|(?<!un)auth(?!or\\b)).*") and
this.getNumArgument() = 1
}
override predicate sanitizes(boolean outcome, Expr e) {
/** Holds if this node blocks flow through `e`, provided it evaluates to `outcome`. */
predicate blocksExpr(boolean outcome, Expr e) {
outcome = true and
e = this.getArgument(0).asExpr()
}
}
deprecated private class AdHocWhitelistCheckSanitizerAsSanitizerGuardNode extends SanitizerGuardNode instanceof AdHocWhitelistCheckSanitizer
{
override predicate sanitizes(boolean outcome, Expr e) { super.blocksExpr(outcome, e) }
}
/** Barrier nodes derived from the `AdHocWhitelistCheckSanitizer` class. */
module AdHocWhitelistCheckSanitizer = DataFlow::MakeBarrierGuard<AdHocWhitelistCheckSanitizer>;
/** A check of the form `if(x in o)`, which sanitizes `x` in its "then" branch. */
class InSanitizer extends AdditionalSanitizerGuardNode, DataFlow::ValueNode {
class InSanitizer extends LegacyAdditionalBarrierGuard, DataFlow::ValueNode {
override InExpr astNode;
override predicate sanitizes(boolean outcome, Expr e) {
override predicate blocksExpr(boolean outcome, Expr e) {
outcome = true and
e = astNode.getLeftOperand()
}
override predicate appliesTo(Configuration cfg) { any() }
}
/** A check of the form `if(o[x] != undefined)`, which sanitizes `x` in its "then" branch. */
class UndefinedCheckSanitizer extends AdditionalSanitizerGuardNode, DataFlow::ValueNode {
class UndefinedCheckSanitizer extends LegacyAdditionalBarrierGuard, DataFlow::ValueNode {
Expr x;
override EqualityTest astNode;
@@ -1085,27 +937,23 @@ module TaintTracking {
)
}
override predicate sanitizes(boolean outcome, Expr e) {
override predicate blocksExpr(boolean outcome, Expr e) {
outcome = astNode.getPolarity().booleanNot() and
e = x
}
override predicate appliesTo(Configuration cfg) { any() }
}
/** A check of the form `type x === "undefined"`, which sanitized `x` in its "then" branch. */
class TypeOfUndefinedSanitizer extends AdditionalSanitizerGuardNode, DataFlow::ValueNode {
class TypeOfUndefinedSanitizer extends LegacyAdditionalBarrierGuard, DataFlow::ValueNode {
Expr x;
override EqualityTest astNode;
TypeOfUndefinedSanitizer() { isTypeofGuard(astNode, x, "undefined") }
override predicate sanitizes(boolean outcome, Expr e) {
override predicate blocksExpr(boolean outcome, Expr e) {
outcome = astNode.getPolarity() and
e = x
}
override predicate appliesTo(Configuration cfg) { any() }
}
/**
@@ -1166,7 +1014,7 @@ module TaintTracking {
/**
* A test of form `x.length === "0"`, preventing `x` from being tainted.
*/
class IsEmptyGuard extends AdditionalSanitizerGuardNode, DataFlow::ValueNode {
class IsEmptyGuard extends LegacyAdditionalBarrierGuard, DataFlow::ValueNode {
override EqualityTest astNode;
boolean polarity;
Expr operand;
@@ -1180,24 +1028,20 @@ module TaintTracking {
)
}
override predicate sanitizes(boolean outcome, Expr e) { polarity = outcome and e = operand }
override predicate appliesTo(Configuration cfg) { any() }
override predicate blocksExpr(boolean outcome, Expr e) { polarity = outcome and e = operand }
}
/**
* A check of the form `whitelist.includes(x)` or equivalent, which sanitizes `x` in its "then" branch.
*/
class MembershipTestSanitizer extends AdditionalSanitizerGuardNode {
class MembershipTestSanitizer extends LegacyAdditionalBarrierGuard {
MembershipCandidate candidate;
MembershipTestSanitizer() { this = candidate.getTest() }
override predicate sanitizes(boolean outcome, Expr e) {
override predicate blocksExpr(boolean outcome, Expr e) {
candidate = e.flow() and candidate.getTestPolarity() = outcome
}
override predicate appliesTo(Configuration cfg) { any() }
}
/**
@@ -1205,7 +1049,7 @@ module TaintTracking {
*
* The more typical case of `x.indexOf(y) >= 0` is covered by `MembershipTestSanitizer`.
*/
class PositiveIndexOfSanitizer extends AdditionalSanitizerGuardNode, DataFlow::ValueNode {
class PositiveIndexOfSanitizer extends LegacyAdditionalBarrierGuard, DataFlow::ValueNode {
MethodCallExpr indexOf;
override RelationalComparison astNode;
@@ -1218,19 +1062,17 @@ module TaintTracking {
)
}
override predicate sanitizes(boolean outcome, Expr e) {
override predicate blocksExpr(boolean outcome, Expr e) {
outcome = true and
e = indexOf.getArgument(0)
}
override predicate appliesTo(Configuration cfg) { any() }
}
/**
* An equality test on `e.origin` or `e.source` where `e` is a `postMessage` event object,
* considered as a sanitizer for `e`.
*/
private class PostMessageEventSanitizer extends AdditionalSanitizerGuardNode {
private class PostMessageEventSanitizer extends LegacyAdditionalBarrierGuard {
VarAccess event;
boolean polarity;
@@ -1247,11 +1089,29 @@ module TaintTracking {
)
}
override predicate sanitizes(boolean outcome, Expr e) {
override predicate blocksExpr(boolean outcome, Expr e) {
outcome = polarity and
e = event
}
}
override predicate appliesTo(Configuration cfg) { any() }
import internal.sharedlib.TaintTracking
/**
* Holds if there is a taint step from `node1` to `node2`.
*
* This includes steps between synthesized nodes generated by flow summaries.
*/
pragma[inline]
predicate defaultTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
TaintTrackingPrivate::defaultAdditionalTaintStep(node1, node2)
}
/**
* Holds if `node` is seen as a barrier for taint-tracking.
*/
pragma[inline]
predicate defaultSanitizer(DataFlow::Node node) {
TaintTrackingPrivate::defaultTaintSanitizer(node)
}
}

View File

@@ -92,7 +92,7 @@ class AccessPath extends TAccessPath {
* Gets an expression in `bb` represented by this access path.
*/
cached
Expr getAnInstanceIn(BasicBlock bb) {
Expr getAnInstanceIn(ReachableBasicBlock bb) {
Stages::DataFlowStage::ref() and
exists(SsaVariable var |
this = MkSsaRoot(var) and

View File

@@ -0,0 +1,34 @@
private import javascript
private import semmle.javascript.dataflow.internal.DataFlowNode
private import semmle.javascript.dataflow.internal.DataFlowPrivate
/**
* Gets a data-flow node synthesized using `AdditionalFlowInternal#needsSynthesizedNode`.
*/
DataFlow::Node getSynthesizedNode(AstNode node, string tag) {
result = TGenericSynthesizedNode(node, tag, _)
}
/**
* An extension to `AdditionalFlowStep` with additional internal-only predicates.
*/
class AdditionalFlowInternal extends DataFlow::AdditionalFlowStep {
/**
* Holds if a data-flow node should be synthesized for the pair `(node, tag)`.
*
* The node can be obtained using `getSynthesizedNode(node, tag)`.
*
* `container` will be seen as the node's enclosing container.
*/
predicate needsSynthesizedNode(AstNode node, string tag, DataFlowCallable container) { none() }
/**
* Holds if `node` should only permit flow of values stored in `contents`.
*/
predicate expectsContent(DataFlow::Node node, DataFlow::ContentSet contents) { none() }
/**
* Holds if `node` should not permit flow of values stored in `contents`.
*/
predicate clearsContent(DataFlow::Node node, DataFlow::ContentSet contents) { none() }
}

View File

@@ -0,0 +1,474 @@
/**
* A copy of the barrier guard logic from `Configuration.qll` in the JS data flow library.
*
* This version considers all barrier guards to be relevant.
*/
private import javascript
private import semmle.javascript.dataflow.internal.AccessPaths
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.javascript.dataflow.internal.sharedlib.Ssa as Ssa2
private signature class BarrierGuardSig extends DataFlow::Node {
/**
* Holds if this node acts as a barrier for data flow, blocking further flow from `e` if `this` evaluates to `outcome`.
*/
predicate blocksExpr(boolean outcome, Expr e);
}
/**
* Converts a barrier guard class to a set of nodes to include in an implementation of `isBarrier(node)`.
*/
module MakeBarrierGuard<BarrierGuardSig BaseGuard> {
final private class FinalBaseGuard = BaseGuard;
private class Adapter extends FinalBaseGuard {
predicate blocksExpr(boolean outcome, Expr e, Unit state) {
super.blocksExpr(outcome, e) and exists(state)
}
}
/**
* Gets a node that is blocked by a barrier guard.
*/
DataFlow::Node getABarrierNode() {
result = MakeStateBarrierGuard<Unit, Adapter>::getABarrierNode(_)
}
}
deprecated private module DeprecationWrapper {
signature class LabeledBarrierGuardSig extends DataFlow::Node {
/**
* Holds if this node acts as a barrier for `label`, blocking further flow from `e` if `this` evaluates to `outcome`.
*/
predicate blocksExpr(boolean outcome, Expr e, DataFlow::FlowLabel label);
}
}
/**
* Converts a barrier guard class to a set of nodes to include in an implementation of `isBarrier(node, label)`.
*/
deprecated module MakeLabeledBarrierGuard<DeprecationWrapper::LabeledBarrierGuardSig BaseGuard> {
final private class FinalBaseGuard = BaseGuard;
private class Adapter extends FinalBaseGuard {
predicate blocksExpr(boolean outcome, Expr e, DataFlow::FlowLabel label) {
super.blocksExpr(outcome, e, label)
}
}
/**
* Gets a node and flow label that is blocked by a barrier guard.
*/
DataFlow::Node getABarrierNode(DataFlow::FlowLabel label) {
result = MakeStateBarrierGuard<DataFlow::FlowLabel, Adapter>::getABarrierNode(label)
}
}
/**
* Contains deprecated signatures.
*
* This module is a workaround for the fact that deprecated signatures can't refer to deprecated classes
* without getting a deprecation warning
*/
deprecated private module DeprecatedSigs {
signature predicate isBarrierGuardSig(DataFlow::BarrierGuardNode node);
}
/**
* Converts a labeled barrier guard class to a set of nodes to include in an implementation of `isBarrier(node)` and `isBarrier(node, label)`
* in a `DataFlow::StateConfigSig` implementation.
*/
deprecated module MakeLegacyBarrierGuardLabeled<DeprecatedSigs::isBarrierGuardSig/1 isBarrierGuard> {
final private class FinalNode = DataFlow::Node;
private class Adapter extends FinalNode instanceof DataFlow::BarrierGuardNode {
Adapter() { isBarrierGuard(this) }
predicate blocksExpr(boolean outcome, Expr e, string label) {
super.blocks(outcome, e, label)
or
super.blocks(outcome, e) and label = ""
}
}
private module Guards = MakeStateBarrierGuard<string, Adapter>;
/**
* Gets a node that is blocked by a barrier guard.
*/
DataFlow::Node getABarrierNode() { result = Guards::getABarrierNode("") }
/**
* Gets a node and flow label that is blocked by a barrier guard.
*/
DataFlow::Node getABarrierNode(DataFlow::FlowLabel label) {
result = Guards::getABarrierNode(label)
}
}
/**
* Converts a barrier guard class to a set of nodes to include in an implementation of `isBarrier(node)` in a `DataFlow::ConfigSig` implementation.
*/
deprecated module MakeLegacyBarrierGuard<DeprecatedSigs::isBarrierGuardSig/1 isBarrierGuard> {
final private class FinalNode = DataFlow::Node;
private class Adapter extends FinalNode instanceof DataFlow::BarrierGuardNode {
Adapter() { isBarrierGuard(this) }
predicate blocksExpr(boolean outcome, Expr e, string label) {
super.blocks(outcome, e, label)
or
super.blocks(outcome, e) and label = ""
}
}
private module Guards = MakeStateBarrierGuard<string, Adapter>;
/**
* Gets a node that is blocked by a barrier guard.
*/
DataFlow::Node getABarrierNode() { result = Guards::getABarrierNode(["", "data", "taint"]) }
}
bindingset[this]
private signature class FlowStateSig;
private module WithFlowState<FlowStateSig FlowState> {
signature class BarrierGuardSig extends DataFlow::Node {
/**
* Holds if this node acts as a barrier for `state`, blocking further flow from `e` if `this` evaluates to `outcome`.
*/
predicate blocksExpr(boolean outcome, Expr e, FlowState state);
}
}
/**
* Projects the dominator tree onto a tree that only considers dominance between `ConditionGuardNode`s.
*
* This exists to speeds up the dominance check for barrier guards acting on an access path, avoiding the following two
* bad join orders:
*
* - Enumerate all basic blocks dominated by a barrier guard, and then find uses of the access path in those blocks.
* - Enumerate all uses of an access path and then select those that are in a dominated block.
*
* Both joins have pathological cases in different benchmarks.
*
* We use a join order that is essentially the first one above, except we only enumerate condition guards, not all the blocks.
*/
cached
private module ConditionGuardDominators {
/** Gets the condition guard that most-immediately dominates `bb`. */
private ConditionGuardNode getDominatingCondition(ReachableBasicBlock bb) {
result.getBasicBlock() = bb
or
not bb = any(ConditionGuardNode guard).getBasicBlock() and
result = getDominatingCondition(bb.getImmediateDominator())
}
private predicate immediateDom(ConditionGuardNode dominator, ConditionGuardNode dominated) {
dominator = getDominatingCondition(dominated.getBasicBlock().getImmediateDominator())
or
dominator = dominated // make the fastTC below reflexive
}
/** Gets a condition guard dominated by `node` */
cached
ConditionGuardNode getADominatedConditionGuard(ConditionGuardNode node) =
fastTC(immediateDom/2)(node, result)
/** Gets a use of `ap` and binds `guard` to its immediately-dominating condition guard (if any). */
cached
Expr getAnAccessPathUseUnderCondition(AccessPath ap, ConditionGuardNode guard) {
exists(ReachableBasicBlock bb |
result = ap.getAnInstanceIn(bb) and
guard = getDominatingCondition(bb)
)
}
}
/**
* Converts a barrier guard class to a set of nodes to include in an implementation of `isBarrier(node, state)`.
*/
module MakeStateBarrierGuard<
FlowStateSig FlowState, WithFlowState<FlowState>::BarrierGuardSig BaseGuard>
{
final private class FinalNode = DataFlow::Node;
abstract private class BarrierGuard extends FinalNode {
abstract predicate blocksExpr(boolean outcome, Expr test, FlowState state);
}
private class ExplicitBarrierGuard extends BarrierGuard instanceof BaseGuard {
override predicate blocksExpr(boolean outcome, Expr test, FlowState state) {
BaseGuard.super.blocksExpr(outcome, test, state)
}
}
/**
* Gets a node and flow state that is blocked by a barrier guard.
*/
pragma[nomagic]
DataFlow::Node getABarrierNode(FlowState state) { barrierGuardBlocksNode(result, state) }
//
// ================================================================================================
// NOTE
// The rest of this file is a copy of the barrier-guard logic in Configuration.qll except:
// - FlowLabel is replaced by FlowState
// - BarrierGuardNode and AdditionalBarrierGuardNode are replaced by the BarrierGuard class defined above
// - `barrierGuardBlocksEdge` is missing as dataflow2 does not support barrier edges
// - `barrierGuardIsRelevant` does not check pruning results as we can't access that from here
// - `barrierGuardBlocksNode` has been rewritten to perform better without pruning.
// ================================================================================================
//
/**
* Holds if data flow node `guard` acts as a barrier for data flow.
*
* `state` is bound to the blocked state, or the empty FlowState if all labels should be blocked.
*/
pragma[nomagic]
private predicate barrierGuardBlocksExpr(
BarrierGuard guard, boolean outcome, Expr test, FlowState state
) {
guard.blocksExpr(outcome, test, state)
}
/**
* Holds if `guard` may block the flow of a value reachable through exploratory flow.
*/
pragma[nomagic]
private predicate barrierGuardIsRelevant(BarrierGuard guard) {
exists(Expr e |
barrierGuardBlocksExpr(guard, _, e, _)
// All guards are considered relevant (this is the difference from the main JS lib)
// isRelevantForward(e.flow(), _)
)
}
/**
* Holds if data flow node `guard` acts as a barrier for data flow due to aliasing through
* an access path.
*
* `state` is bound to the blocked state, or the empty FlowState if all labels should be blocked.
*/
pragma[nomagic]
private predicate barrierGuardBlocksAccessPath(
BarrierGuard guard, boolean outcome, AccessPath ap, FlowState state
) {
barrierGuardIsRelevant(guard) and
barrierGuardBlocksExpr(guard, outcome, ap.getAnInstance(), state)
}
/**
* Holds if there exists an input variable of `ref` that blocks the state `state`.
*
* This predicate is outlined to give the optimizer a hint about the join ordering.
*/
pragma[nomagic]
private predicate barrierGuardBlocksSsaRefinement(
BarrierGuard guard, boolean outcome, SsaRefinementNode ref, FlowState state
) {
barrierGuardIsRelevant(guard) and
guard.getEnclosingExpr() = ref.getGuard().getTest() and
forex(SsaVariable input | input = ref.getAnInput() |
barrierGuardBlocksExpr(guard, outcome, input.getAUse(), state)
)
}
/**
* Holds if the result of `guard` is used in the branching condition `cond`.
*
* `outcome` is bound to the outcome of `cond` for join-ordering purposes.
*/
pragma[nomagic]
private predicate barrierGuardUsedInCondition(
BarrierGuard guard, ConditionGuardNode cond, boolean outcome
) {
barrierGuardIsRelevant(guard) and
outcome = cond.getOutcome() and
(
cond.getTest() = guard.getEnclosingExpr()
or
cond.getTest().flow().getImmediatePredecessor+() = guard
)
}
private predicate ssa2GuardChecks(
Ssa2::SsaDataflowInput::Guard guard, Ssa2::SsaDataflowInput::Expr test, boolean branch,
FlowState state
) {
exists(BarrierGuard g |
g.asExpr() = guard and
g.blocksExpr(branch, test, state)
)
}
private module Ssa2Barrier = Ssa2::BarrierGuardWithState<FlowState, ssa2GuardChecks/4>;
private predicate ssa2BlocksNode(DataFlow::Node node, FlowState state) {
node = DataFlowPrivate::getNodeFromSsa2(Ssa2Barrier::getABarrierNode(state))
}
/** Holds if a barrier guard blocks uses of `ap` in basic blocks dominated by `cond`. */
pragma[nomagic]
private predicate barrierGuardBlocksAccessPathIn(
AccessPath ap, ConditionGuardNode cond, FlowState state
) {
exists(BarrierGuard guard, boolean outcome |
barrierGuardBlocksAccessPath(guard, outcome, ap, state) and
barrierGuardUsedInCondition(guard, cond, outcome)
)
}
/**
* Holds if `expr` is an access path reference that is blocked by a barrier guard.
*/
pragma[noopt]
private predicate barrierGuardBlocksAccessPathUse(Expr use, FlowState state) {
exists(AccessPath p, ConditionGuardNode cond, ConditionGuardNode useDominator |
barrierGuardBlocksAccessPathIn(p, cond, state) and
useDominator = ConditionGuardDominators::getADominatedConditionGuard(cond) and
use = ConditionGuardDominators::getAnAccessPathUseUnderCondition(p, useDominator)
)
}
/**
* Holds if data flow node `nd` acts as a barrier for data flow, possibly due to aliasing
* through an access path.
*
* `state` is bound to the blocked state.
*/
pragma[nomagic]
private predicate barrierGuardBlocksNode(DataFlow::Node nd, FlowState state) {
exists(BarrierGuard guard, SsaRefinementNode ref, boolean outcome |
nd = DataFlow::ssaDefinitionNode(ref) and
outcome = ref.getGuard().(ConditionGuardNode).getOutcome() and
barrierGuardBlocksSsaRefinement(guard, outcome, ref, state)
)
or
exists(Expr use |
barrierGuardBlocksAccessPathUse(use, state) and
nd = DataFlow::valueNode(use)
)
or
ssa2BlocksNode(nd, state)
}
/**
* Gets a logical `and` expression, or parenthesized expression, that contains `guard`.
*/
private Expr getALogicalAndParent(BarrierGuard guard) {
barrierGuardIsRelevant(guard) and result = guard.asExpr()
or
result.(LogAndExpr).getAnOperand() = getALogicalAndParent(guard)
or
result.getUnderlyingValue() = getALogicalAndParent(guard)
}
/**
* Gets a logical `or` expression, or parenthesized expression, that contains `guard`.
*/
private Expr getALogicalOrParent(BarrierGuard guard) {
barrierGuardIsRelevant(guard) and result = guard.asExpr()
or
result.(LogOrExpr).getAnOperand() = getALogicalOrParent(guard)
or
result.getUnderlyingValue() = getALogicalOrParent(guard)
}
final private class FinalFunction = Function;
/**
* A function that returns the result of a barrier guard.
*/
private class BarrierGuardFunction extends FinalFunction {
DataFlow::ParameterNode sanitizedParameter;
BarrierGuard guard;
boolean guardOutcome;
FlowState state;
int paramIndex;
BarrierGuardFunction() {
barrierGuardIsRelevant(guard) and
exists(Expr e |
exists(Expr returnExpr |
returnExpr = guard.asExpr()
or
// ad hoc support for conjunctions:
getALogicalAndParent(guard) = returnExpr and guardOutcome = true
or
// ad hoc support for disjunctions:
getALogicalOrParent(guard) = returnExpr and guardOutcome = false
|
exists(SsaExplicitDefinition ssa |
ssa.getDef().getSource() = returnExpr and
ssa.getVariable().getAUse() = this.getAReturnedExpr()
)
or
returnExpr = this.getAReturnedExpr()
) and
sanitizedParameter.flowsToExpr(e) and
barrierGuardBlocksExpr(guard, guardOutcome, e, state)
) and
sanitizedParameter.getParameter() = this.getParameter(paramIndex)
}
/**
* Holds if this function sanitizes argument `e` of call `call`, provided the call evaluates to `outcome`.
*/
predicate isBarrierCall(DataFlow::CallNode call, Expr e, boolean outcome, FlowState st) {
exists(DataFlow::Node arg |
DataFlow::argumentPassingStep(pragma[only_bind_into](call), pragma[only_bind_into](arg),
pragma[only_bind_into](this), pragma[only_bind_into](sanitizedParameter)) and
arg.asExpr() = e and
arg = call.getArgument(paramIndex) and
outcome = guardOutcome and
state = st
)
}
}
/**
* A call that sanitizes an argument.
*/
private class AdditionalBarrierGuardCall extends BarrierGuard instanceof DataFlow::CallNode {
BarrierGuardFunction f;
AdditionalBarrierGuardCall() { f.isBarrierCall(this, _, _, _) }
override predicate blocksExpr(boolean outcome, Expr e, FlowState state) {
f.isBarrierCall(this, e, outcome, state)
}
}
/**
* A sanitizer where an inner sanitizer is compared against a boolean.
* E.g. (assuming `sanitizes(e)` is an existing sanitizer):
* ```javascript
* if (sanitizes(e) === true) {
* // e is sanitized
* }
* ```
*/
private class CallAgainstEqualityCheck extends BarrierGuard {
BarrierGuard prev;
boolean polarity;
CallAgainstEqualityCheck() {
prev instanceof DataFlow::CallNode and
exists(EqualityTest test, BooleanLiteral bool |
this.asExpr() = test and
test.hasOperands(prev.asExpr(), bool) and
polarity = test.getPolarity().booleanXor(bool.getBoolValue())
)
}
override predicate blocksExpr(boolean outcome, Expr e, FlowState state) {
exists(boolean prevOutcome |
barrierGuardBlocksExpr(prev, prevOutcome, e, state) and
outcome = prevOutcome.booleanXor(polarity)
)
}
}
}

View File

@@ -0,0 +1,513 @@
private import javascript
private import semmle.javascript.frameworks.data.internal.ApiGraphModels as ApiGraphModels
private import semmle.javascript.dataflow.internal.FlowSummaryPrivate as FlowSummaryPrivate
private import semmle.javascript.dataflow.internal.VariableOrThis
private import codeql.dataflow.internal.AccessPathSyntax as AccessPathSyntax
module Private {
import Public
/**
* Gets the largest array index should be propagated precisely through flow summaries.
*
* Note that all known array indices have a corresponding singleton content, but some will
* be collapsed in flow summaries that operate on array elements.
*/
int getMaxPreciseArrayIndex() { result = 9 }
/** Gets the largest array index should be propagated precisely through flow summaries. */
int getAPreciseArrayIndex() { result = [0 .. getMaxPreciseArrayIndex()] }
/**
* Holds if a MaD access path token of form `name[arg]` exists.
*/
predicate isAccessPathTokenPresent(string name, string arg) {
arg = any(FlowSummaryPrivate::AccessPathToken tok).getAnArgument(name)
or
arg = any(ApiGraphModels::AccessPathToken tok).getAnArgument(name)
}
/**
* Holds if values associated with `key` should be tracked as a individual contents of a `Map` object.
*/
private predicate isKnownMapKey(string key) {
exists(MethodCallExpr call |
call.getMethodName() = "get" and
call.getNumArgument() = 1 and
call.getArgument(0).getStringValue() = key
)
or
isAccessPathTokenPresent("MapValue", key)
}
/**
* A known property name.
*/
class PropertyName extends string {
// Note: unlike the similarly-named class in StepSummary.qll, this class must not depend on DataFlow::Node
PropertyName() {
this = any(PropAccess access).getPropertyName()
or
this = any(Property p).getName()
or
this = any(PropertyPattern p).getName()
or
this = any(GlobalVariable v).getName()
or
this = getAPreciseArrayIndex().toString()
or
isAccessPathTokenPresent("Member", this)
}
/** Gets the array index corresponding to this property name. */
pragma[nomagic]
int asArrayIndex() { result = this.toInt() and result >= 0 and this = result.toString() }
}
cached
newtype TContent =
MkPropertyContent(PropertyName name) or
MkArrayElementUnknown() or // note: array elements with known index are just properties
MkMapKey() or
MkMapValueWithUnknownKey() or
MkMapValueWithKnownKey(string key) { isKnownMapKey(key) } or
MkSetElement() or
MkIteratorElement() or
MkIteratorError() or
MkPromiseValue() or
MkPromiseError() or
MkCapturedContent(LocalVariableOrThis v) { v.isCaptured() }
cached
newtype TContentSet =
MkSingletonContent(Content content) or
MkArrayElementKnown(int index) { index = any(PropertyName name).asArrayIndex() } or
MkArrayElementLowerBound(int index) { index = [0 .. getMaxPreciseArrayIndex() + 1] } or
MkMapValueKnown(string key) { isKnownMapKey(key) } or
MkMapValueAll() or
MkPromiseFilter() or
MkIteratorFilter() or
MkAnyProperty() or
MkAnyCapturedContent() or
// The following content sets are used exclusively as an intermediate value in flow summaries.
// These are encoded as a ContentSummaryComponent, although the flow graphs we generate are different
// than an ordinary content component. These special content sets should never appear in a step.
MkAwaited() or
MkAnyPropertyDeep() or
MkArrayElementDeep() or
MkOptionalStep(string name) { isAccessPathTokenPresent("OptionalStep", name) } or
MkOptionalBarrier(string name) { isAccessPathTokenPresent("OptionalBarrier", name) }
/**
* Holds if `cs` is used to encode a special operation as a content component, but should not
* be treated as an ordinary content component.
*/
predicate isSpecialContentSet(ContentSet cs) {
cs = MkAwaited() or
cs = MkAnyPropertyDeep() or
cs = MkArrayElementDeep() or
cs instanceof MkOptionalStep or
cs instanceof MkOptionalBarrier
}
}
module Public {
private import Private
/**
* A storage location on an object, such as a property name.
*/
class Content extends TContent {
/** Gets a string representation of this content. */
cached
string toString() {
// Note that these strings are visible to the end-user, in the access path of a PathNode.
result = this.asPropertyName()
or
this.isUnknownArrayElement() and
result = "ArrayElement"
or
this = MkMapKey() and
result = "MapKey"
or
this = MkMapValueWithUnknownKey() and
result = "MapValue"
or
exists(string key |
this = MkMapValueWithKnownKey(key) and
result = "MapValue[" + key + "]"
)
or
this = MkSetElement() and
result = "SetElement"
or
this = MkIteratorElement() and
result = "IteratorElement"
or
this = MkIteratorError() and
result = "IteratorError"
or
this = MkPromiseValue() and
result = "PromiseValue"
or
this = MkPromiseError() and
result = "PromiseError"
or
result = this.asCapturedVariable().getName()
}
/** Gets the property name represented by this content, if any. */
string asPropertyName() { this = MkPropertyContent(result) }
/** Gets the array index represented by this content, if any. */
pragma[nomagic]
int asArrayIndex() { result = this.asPropertyName().(PropertyName).asArrayIndex() }
/** Gets the captured variable represented by this content, if any. */
LocalVariableOrThis asCapturedVariable() { this = MkCapturedContent(result) }
/** Holds if this represents values stored at an unknown array index. */
predicate isUnknownArrayElement() { this = MkArrayElementUnknown() }
/** Holds if this represents values stored in a `Map` at an unknown key. */
predicate isMapValueWithUnknownKey() { this = MkMapValueWithUnknownKey() }
/** Holds if this represents values stored in a `Map` as the given string key. */
predicate isMapValueWithKnownKey(string key) { this = MkMapValueWithKnownKey(key) }
}
/**
* An entity that represents the set of `Content`s being accessed at a read or store operation.
*/
class ContentSet extends TContentSet {
/** Gets a content that may be stored into when storing into this set. */
pragma[inline]
Content getAStoreContent() {
result = this.asSingleton()
or
// For array element access with known lower bound, just store into the unknown array element
this = ContentSet::arrayElementLowerBound(_) and
result.isUnknownArrayElement()
or
exists(int n |
this = ContentSet::arrayElementKnown(n) and
result.asArrayIndex() = n
)
or
exists(string key |
this = ContentSet::mapValueWithKnownKey(key) and
result.isMapValueWithKnownKey(key)
)
or
this = ContentSet::mapValueAll() and
result.isMapValueWithUnknownKey()
}
/** Gets a content that may be read from when reading from this set. */
pragma[nomagic]
Content getAReadContent() {
result = this.asSingleton()
or
this = ContentSet::promiseFilter() and
(
result = MkPromiseValue()
or
result = MkPromiseError()
)
or
this = ContentSet::iteratorFilter() and
(
result = MkIteratorElement()
or
result = MkIteratorError()
)
or
exists(int bound | this = ContentSet::arrayElementLowerBound(bound) |
result.isUnknownArrayElement()
or
result.asArrayIndex() >= bound
)
or
exists(int n | this = ContentSet::arrayElementKnown(n) |
result.isUnknownArrayElement()
or
result.asArrayIndex() = n
)
or
exists(string key | this = ContentSet::mapValueWithKnownKey(key) |
result.isMapValueWithUnknownKey()
or
result.isMapValueWithKnownKey(key)
)
or
this = ContentSet::mapValueAll() and
(
result.isMapValueWithUnknownKey()
or
result.isMapValueWithKnownKey(_)
)
or
this = ContentSet::anyProperty() and
(
result instanceof MkPropertyContent
or
result instanceof MkArrayElementUnknown
)
or
this = ContentSet::anyCapturedContent() and
result instanceof Private::MkCapturedContent
}
/** Gets the singleton content to be accessed. */
Content asSingleton() { this = MkSingletonContent(result) }
/** Gets the property name to be accessed, provided that this is a singleton content set. */
PropertyName asPropertyName() { result = this.asSingleton().asPropertyName() }
/**
* Gets a string representation of this content set.
*/
string toString() {
result = this.asSingleton().toString()
or
this = ContentSet::promiseFilter() and result = "PromiseFilter"
or
this = ContentSet::iteratorFilter() and result = "IteratorFilter"
or
exists(int bound |
this = ContentSet::arrayElementLowerBound(bound) and
result = "ArrayElement[" + bound + "..]"
)
or
exists(int n | this = ContentSet::arrayElementKnown(n) and result = "ArrayElement[" + n + "]")
or
this = ContentSet::mapValueAll() and
result = "MapValue"
or
this = ContentSet::anyProperty() and
result = "AnyMember"
or
this = MkAwaited() and result = "Awaited (with coercion)"
or
this = MkAnyPropertyDeep() and result = "AnyMemberDeep"
or
this = MkArrayElementDeep() and result = "ArrayElementDeep"
or
this = MkAnyCapturedContent() and
result = "AnyCapturedContent"
or
exists(string name |
this = MkOptionalStep(name) and
result = "OptionalStep[" + name + "]"
)
or
exists(string name |
this = MkOptionalBarrier(name) and
result = "OptionalBarrier[" + name + "]"
)
}
}
/**
* Companion module to the `ContentSet` class, providing access to various content sets.
*/
module ContentSet {
/**
* A content set containing only the given content.
*/
pragma[inline]
ContentSet singleton(Content content) { result.asSingleton() = content }
/**
* A content set corresponding to the given property name.
*/
pragma[inline]
ContentSet property(PropertyName name) { result.asSingleton().asPropertyName() = name }
/**
* A content set that should only be used in `withContent` and `withoutContent` steps, which
* matches the two promise-related contents, `Awaited[value]` and `Awaited[error]`.
*/
ContentSet promiseFilter() { result = MkPromiseFilter() }
/**
* A content set that should only be used in `withContent` and `withoutContent` steps, which
* matches the two iterator-related contents, `IteratorElement` and `IteratorError`.
*/
ContentSet iteratorFilter() { result = MkIteratorFilter() }
/**
* A content set describing the result of a resolved promise.
*/
ContentSet promiseValue() { result = singleton(MkPromiseValue()) }
/**
* A content set describing the error stored in a rejected promise.
*/
ContentSet promiseError() { result = singleton(MkPromiseError()) }
/**
* A content set describing all array elements, regardless of their index in the array.
*/
ContentSet arrayElement() { result = MkArrayElementLowerBound(0) }
/**
* A content set describing array elements at index `bound` or greater.
*
* For `bound=0` this gets the same content set as `ContentSet::arrayElement()`, that is,
* the content set describing all array elements.
*
* For large values of `bound` this has no result - see `ContentSet::arrayElementLowerBoundFromInt`.
*/
ContentSet arrayElementLowerBound(int bound) { result = MkArrayElementLowerBound(bound) }
/**
* A content set describing an access to array index `n`.
*
* This content set reads from element `n` and the unknown element, and stores to index `n`.
*
* For large values of `n` this has no result - see `ContentSet::arrayElementFromInt`.
*/
ContentSet arrayElementKnown(int n) { result = MkArrayElementKnown(n) }
/**
* The singleton content set describing array elements stored at an unknown index.
*/
ContentSet arrayElementUnknown() { result = singleton(MkArrayElementUnknown()) }
/**
* Gets a content set describing array elements at index `bound` or greater.
*
* If `bound` is too large, it is truncated to the greatest lower bound we can represent.
*/
bindingset[bound]
ContentSet arrayElementLowerBoundFromInt(int bound) {
result = arrayElementLowerBound(bound.minimum(getMaxPreciseArrayIndex() + 1))
}
/**
* Gets the content set describing an access to array index `n`.
*
* If `n` is too large, it is truncated to the greatest lower bound we can represent.
*/
bindingset[n]
ContentSet arrayElementFromInt(int n) {
result = arrayElementKnown(n)
or
not exists(arrayElementKnown(n)) and
result = arrayElementLowerBoundFromInt(n)
}
/** Gets the content set describing the keys of a `Map` object. */
ContentSet mapKey() { result = singleton(MkMapKey()) }
/** Gets the content set describing the values of a `Map` object stored with an unknown key. */
ContentSet mapValueWithUnknownKey() { result = singleton(MkMapValueWithUnknownKey()) }
/**
* Gets the content set describing the value of a `Map` object stored with the given known `key`.
*
* This has no result if `key` is not one of the keys we track precisely. See also `mapValueFromKey`.
*/
ContentSet mapValueWithKnownKeyStrict(string key) { result = MkMapValueKnown(key) }
/**
* Gets the content set describing an access to a map value with the given `key`.
*
* This content set also reads from a value stored with an unknown key. Use `mapValueWithKnownKeyStrict` to strictly
* refer to known keys.
*
* This has no result if `key` is not one of the keys we track precisely. See also `mapValueFromKey`.
*/
ContentSet mapValueWithKnownKey(string key) { result = singleton(MkMapValueWithKnownKey(key)) }
/** Gets the content set describing all values in a map (with known or unknown key). */
ContentSet mapValueAll() { result = MkMapValueAll() }
/**
* Gets the content set describing the value in a `Map` object stored at the given `key`.
*
* If `key` is not one of the keys we track precisely, this is mapped to the unknown key instead.
*/
bindingset[key]
ContentSet mapValueFromKey(string key) {
result = mapValueWithKnownKey(key)
or
not exists(mapValueWithKnownKey(key)) and
result = mapValueWithUnknownKey()
}
/** Gets the content set describing the elements of a `Set` object. */
ContentSet setElement() { result = singleton(MkSetElement()) }
/** Gets the content set describing the elements of an iterator object. */
ContentSet iteratorElement() { result = singleton(MkIteratorElement()) }
/** Gets the content set describing the exception to be thrown when attempting to iterate over the given value. */
ContentSet iteratorError() { result = singleton(MkIteratorError()) }
/**
* Gets a content set that reads from all ordinary properties.
*
* This includes array elements, but not the contents of `Map`, `Set`, `Promise`, or iterator objects.
*
* This content set has no effect if used in a store step.
*/
ContentSet anyProperty() { result = MkAnyProperty() }
/**
* Gets a content set corresponding to the pseudo-property `propertyName`.
*/
pragma[nomagic]
private ContentSet fromLegacyPseudoProperty(string propertyName) {
propertyName = Promises::valueProp() and
result = promiseValue()
or
propertyName = Promises::errorProp() and
result = promiseError()
or
propertyName = DataFlow::PseudoProperties::arrayElement() and
result = arrayElement()
or
propertyName = DataFlow::PseudoProperties::iteratorElement() and
result = iteratorElement()
or
propertyName = DataFlow::PseudoProperties::setElement() and
result = setElement()
or
propertyName = DataFlow::PseudoProperties::mapValueAll() and
result = mapValueAll()
or
propertyName = DataFlow::PseudoProperties::mapValueUnknownKey() and
result = mapValueWithUnknownKey()
or
exists(string key |
propertyName = DataFlow::PseudoProperties::mapValueKey(key) and
result = mapValueWithKnownKey(key)
)
}
/**
* Gets the content set corresponding to the given property name, where legacy pseudo-properties
* are mapped to their corresponding content sets (which are no longer seen as property names).
*/
bindingset[propertyName]
ContentSet fromLegacyProperty(string propertyName) {
result = fromLegacyPseudoProperty(propertyName)
or
not exists(fromLegacyPseudoProperty(propertyName)) and
(
// In case a map-value key was contributed via a SharedFlowStep, but we don't have a ContentSet for it,
// convert it to the unknown key.
if DataFlow::PseudoProperties::isMapValueKey(propertyName)
then result = mapValueWithUnknownKey()
else result = property(propertyName)
)
}
/**
* Gets a content set that reads from all captured variables stored on a function.
*/
ContentSet anyCapturedContent() { result = Private::MkAnyCapturedContent() }
}
}

View File

@@ -0,0 +1,56 @@
private import javascript
private import codeql.dataflow.internal.DataFlowImplConsistency
private import sharedlib.DataFlowArg
private import semmle.javascript.dataflow.internal.DataFlowPrivate
private import semmle.javascript.dataflow.internal.DataFlowNode
private module ConsistencyConfig implements InputSig<Location, JSDataFlow> {
private predicate isAmbientNode(DataFlow::Node node) {
exists(AstNode n | n.isAmbient() |
node = TValueNode(n) or
node = TThisNode(n) or
node = TReflectiveParametersNode(n) or
node = TPropNode(n) or
node = TFunctionSelfReferenceNode(n) or
node = TExceptionalFunctionReturnNode(n) or
node = TExprPostUpdateNode(n) or
node = TExceptionalInvocationReturnNode(n) or
node = TDestructuredModuleImportNode(n)
)
}
predicate missingLocationExclude(DataFlow::Node n) {
n instanceof FlowSummaryNode
or
n instanceof FlowSummaryIntermediateAwaitStoreNode
or
n instanceof FlowSummaryDynamicParameterArrayNode
or
n instanceof FlowSummaryDefaultExceptionalReturn
or
n instanceof GenericSynthesizedNode
or
n = DataFlow::globalAccessPathRootPseudoNode()
}
predicate uniqueNodeLocationExclude(DataFlow::Node n) { missingLocationExclude(n) }
predicate uniqueEnclosingCallableExclude(DataFlow::Node n) { isAmbientNode(n) }
predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) {
isAmbientNode(call.asOrdinaryCall()) or
isAmbientNode(call.asAccessorCall())
}
predicate argHasPostUpdateExclude(ArgumentNode node) {
// Side-effects directly on these can't propagate back to the caller, and for longer access paths it's too imprecise
node instanceof TStaticArgumentArrayNode or
node instanceof TDynamicArgumentArrayNode
}
predicate reverseReadExclude(DataFlow::Node node) {
node instanceof FlowSummaryDynamicParameterArrayNode
}
}
module Consistency = MakeConsistency<Location, JSDataFlow, JSTaintFlow, ConsistencyConfig>;

View File

@@ -5,38 +5,157 @@
*/
private import javascript
private import codeql.util.Boolean
private import semmle.javascript.dataflow.internal.AdditionalFlowInternal
private import semmle.javascript.dataflow.internal.Contents::Private
private import semmle.javascript.dataflow.internal.sharedlib.DataFlowImplCommon as DataFlowImplCommon
private import semmle.javascript.dataflow.internal.sharedlib.Ssa as Ssa2
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.javascript.dataflow.internal.sharedlib.FlowSummaryImpl as FlowSummaryImpl
private import semmle.javascript.dataflow.internal.FlowSummaryPrivate as FlowSummaryPrivate
private import semmle.javascript.dataflow.internal.VariableCapture as VariableCapture
private import semmle.javascript.dataflow.internal.VariableOrThis
cached
private module Cached {
private Content dynamicArgumentsContent() {
result.asArrayIndex() = [0 .. 10]
or
result.isUnknownArrayElement()
}
/**
* The raw data type underlying `DataFlow::Node`.
*/
cached
newtype TNode =
TValueNode(AST::ValueNode nd) or
/** An SSA node from the legacy SSA library */
TSsaDefNode(SsaDefinition d) or
/** Use of a variable or 'this', with flow from a post-update node (from an earlier use) */
TSsaUseNode(ControlFlowNode use) { use = any(Ssa2::SsaConfig::SourceVariable v).getAUse() } or
/** Phi-read node (new SSA library). Ordinary phi nodes are represented by TSsaDefNode. */
TSsaPhiReadNode(Ssa2::PhiReadNode phi) or
/** Input to a phi node (new SSA library) */
TSsaInputNode(Ssa2::SsaInputNode input) or
TCapturedVariableNode(LocalVariable v) { v.isCaptured() } or
TPropNode(@property p) or
TRestPatternNode(DestructuringPattern dp, Expr rest) { rest = dp.getRest() } or
TElementPatternNode(ArrayPattern ap, Expr p) { p = ap.getElement(_) } or
TElementNode(ArrayExpr arr, Expr e) { e = arr.getAnElement() } or
TReflectiveCallNode(MethodCallExpr ce, string kind) {
ce.getMethodName() = kind and
(kind = "call" or kind = "apply")
} or
TThisNode(StmtContainer f) { f.(Function).getThisBinder() = f or f instanceof TopLevel } or
TFunctionSelfReferenceNode(Function f) or
TStaticArgumentArrayNode(InvokeExpr node) or
TDynamicArgumentArrayNode(InvokeExpr node) { node.isSpreadArgument(_) } or
TStaticParameterArrayNode(Function f) {
f.getAParameter().isRestParameter() or f.usesArgumentsObject()
} or
TDynamicParameterArrayNode(Function f) or
/** Data about to be stored in the rest parameter object. Needed for shifting array indices. */
TRestParameterStoreNode(Function f, Content storeContent) {
f.getRestParameter().getIndex() > 0 and
storeContent = dynamicArgumentsContent()
} or
/** Data about to be stored in the dynamic argument array of an invocation. Needed for shifting array indices. */
TDynamicArgumentStoreNode(InvokeExpr invoke, Content storeContent) {
invoke.isSpreadArgument(_) and
storeContent = dynamicArgumentsContent()
} or
TApplyCallTaintNode(MethodCallExpr node) {
node.getMethodName() = "apply" and exists(node.getArgument(1))
} or
TDestructuredModuleImportNode(ImportDeclaration decl) {
exists(decl.getASpecifier().getImportedName())
} or
THtmlAttributeNode(HTML::Attribute attr) or
TXmlAttributeNode(XmlAttribute attr) or
TFunctionReturnNode(Function f) or
TExceptionalFunctionReturnNode(Function f) or
TExceptionalInvocationReturnNode(InvokeExpr e) or
TGlobalAccessPathRoot() or
TTemplatePlaceholderTag(Templating::TemplatePlaceholderTag tag) or
TReflectiveParametersNode(Function f) { f.usesArgumentsObject() } or
TExprPostUpdateNode(AST::ValueNode e) {
e = any(InvokeExpr invoke).getAnArgument() or
e = any(PropAccess access).getBase() or
e = any(DestructuringPattern pattern) or
e = any(InvokeExpr invoke).getCallee() or
// We have read steps out of the await operand, so it technically needs a post-update
e = any(AwaitExpr a).getOperand() or
e = any(Function f) or // functions are passed as their own self-reference argument
// The RHS of an assignment can be an argument to a setter-call, so it needs a post-update node
e = any(Assignment asn | asn.getTarget() instanceof PropAccess).getRhs()
} or
TNewCallThisArgument(NewExpr e) or
TImplicitThisUse(ImplicitThisUse use, Boolean isPost) or
TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn) or
TFlowSummaryDynamicParameterArrayNode(FlowSummaryImpl::Public::SummarizedCallable callable) or
TFlowSummaryIntermediateAwaitStoreNode(FlowSummaryImpl::Private::SummaryNode sn) {
// NOTE: This dependency goes through the 'Steps' module whose instantiation depends on the call graph,
// but the specific predicate we're referering to does not use that information.
// So it doesn't cause negative recursion but it might look a bit surprising.
FlowSummaryPrivate::Steps::summaryStoreStep(sn, MkAwaited(), _)
} or
TFlowSummaryDefaultExceptionalReturn(FlowSummaryImpl::Public::SummarizedCallable callable) {
not DataFlowPrivate::mentionsExceptionalReturn(callable)
} or
TSynthCaptureNode(VariableCapture::VariableCaptureOutput::SynthesizedCaptureNode node) or
TGenericSynthesizedNode(AstNode node, string tag, DataFlowPrivate::DataFlowCallable container) {
any(AdditionalFlowInternal flow).needsSynthesizedNode(node, tag, container)
} or
TForbiddenRecursionGuard() {
none() and
// We want to prune irrelevant models before materialising data flow nodes, so types contributed
// directly from CodeQL must expose their pruning info without depending on data flow nodes.
(any(ModelInput::TypeModel tm).isTypeUsed("") implies any())
}
cached
private module Backref {
cached
predicate backref() {
DataFlowImplCommon::forceCachingInSameStage() or
exists(any(DataFlow::Node node).toString()) or
exists(any(DataFlow::Node node).getContainer()) or
any(DataFlow::Node node).hasLocationInfo(_, _, _, _, _) or
exists(any(Content c).toString())
}
}
}
import Cached
private class TEarlyStageNode =
TValueNode or TSsaDefNode or TCapturedVariableNode or TPropNode or TRestPatternNode or
TElementPatternNode or TElementNode or TReflectiveCallNode or TThisNode or
TFunctionSelfReferenceNode or TDestructuredModuleImportNode or THtmlAttributeNode or
TFunctionReturnNode or TExceptionalFunctionReturnNode or TExceptionalInvocationReturnNode or
TGlobalAccessPathRoot or TTemplatePlaceholderTag or TReflectiveParametersNode or
TExprPostUpdateNode or TNewCallThisArgument or TStaticArgumentArrayNode or
TDynamicArgumentArrayNode or TStaticParameterArrayNode or TDynamicParameterArrayNode or
TImplicitThisUse;
/**
* The raw data type underlying `DataFlow::Node`.
* A data-flow node that is not a flow summary node.
*
* This node exists to avoid an unwanted dependency on flow summaries in some parts of the codebase
* that should not depend on them.
*
* In particular, this dependency chain must not result in negative recursion:
* - Flow summaries can only be created after pruning irrelevant flow summaries
* - To prune irrelevant flow summaries, we must know which packages are imported
* - To know which packages are imported, module systems must be evaluated
* - The AMD and NodeJS module systems rely on data flow to find calls to `require` and similar.
* These module systems must therefore use `EarlyStageNode` instead of `DataFlow::Node`.
*/
cached
newtype TNode =
TValueNode(AST::ValueNode nd) or
TSsaDefNode(SsaDefinition d) or
TCapturedVariableNode(LocalVariable v) { v.isCaptured() } or
TPropNode(@property p) or
TRestPatternNode(DestructuringPattern dp, Expr rest) { rest = dp.getRest() } or
TElementPatternNode(ArrayPattern ap, Expr p) { p = ap.getElement(_) } or
TElementNode(ArrayExpr arr, Expr e) { e = arr.getAnElement() } or
TReflectiveCallNode(MethodCallExpr ce, string kind) {
ce.getMethodName() = kind and
(kind = "call" or kind = "apply")
} or
TThisNode(StmtContainer f) { f.(Function).getThisBinder() = f or f instanceof TopLevel } or
TDestructuredModuleImportNode(ImportDeclaration decl) {
exists(decl.getASpecifier().getImportedName())
} or
THtmlAttributeNode(HTML::Attribute attr) or
TXmlAttributeNode(XmlAttribute attr) or
TFunctionReturnNode(Function f) or
TExceptionalFunctionReturnNode(Function f) or
TExceptionalInvocationReturnNode(InvokeExpr e) or
TGlobalAccessPathRoot() or
TTemplatePlaceholderTag(Templating::TemplatePlaceholderTag tag) or
TReflectiveParametersNode(Function f) or
TForbiddenRecursionGuard() {
none() and
// We want to prune irrelevant models before materialising data flow nodes, so types contributed
// directly from CodeQL must expose their pruning info without depending on data flow nodes.
(any(ModelInput::TypeModel tm).isTypeUsed("") implies any())
}
class EarlyStageNode extends TEarlyStageNode {
/** Gets a string representation of this data flow node. */
string toString() { result = this.(DataFlow::Node).toString() }
/** Gets the location of this data flow node. */
Location getLocation() { result = this.(DataFlow::Node).getLocation() }
}

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,7 @@
*/
import javascript
import semmle.javascript.dataflow.Configuration
deprecated import semmle.javascript.dataflow.Configuration
import semmle.javascript.dataflow.internal.CallGraphs
private import semmle.javascript.internal.CachedStages
@@ -30,20 +30,36 @@ predicate returnExpr(Function f, DataFlow::Node source, DataFlow::Node sink) {
not f = any(SetterMethodDeclaration decl).getBody()
}
/**
* A step from a post-update node to the local sources of the corresponding pre-update node.
*
* This ensures that `getPostUpdateNode()` can be used in place of `getALocalSource()` when generating
* store steps, and the resulting step will work in both data flow analyses.
*/
pragma[nomagic]
private predicate legacyPostUpdateStep(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::Node node |
pred = node.getPostUpdateNode() and
succ = node.getALocalSource()
)
}
/**
* Holds if data can flow in one step from `pred` to `succ`, taking
* additional steps from the configuration into account.
*/
pragma[inline]
predicate localFlowStep(
deprecated predicate localFlowStep(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration configuration,
FlowLabel predlbl, FlowLabel succlbl
) {
pred = succ.getAPredecessor() and predlbl = succlbl
or
DataFlow::SharedFlowStep::step(pred, succ) and predlbl = succlbl
legacyPostUpdateStep(pred, succ) and predlbl = succlbl
or
DataFlow::SharedFlowStep::step(pred, succ, predlbl, succlbl)
DataFlow::LegacyFlowStep::step(pred, succ) and predlbl = succlbl
or
DataFlow::LegacyFlowStep::step(pred, succ, predlbl, succlbl)
or
exists(boolean vp | configuration.isAdditionalFlowStep(pred, succ, vp) |
vp = true and
@@ -529,9 +545,9 @@ class Boolean extends boolean {
/**
* A summary of an inter-procedural data flow path.
*/
newtype TPathSummary =
deprecated newtype TPathSummary =
/** A summary of an inter-procedural data flow path. */
MkPathSummary(Boolean hasReturn, Boolean hasCall, FlowLabel start, FlowLabel end)
deprecated MkPathSummary(Boolean hasReturn, Boolean hasCall, FlowLabel start, FlowLabel end)
/**
* A summary of an inter-procedural data flow path.
@@ -544,7 +560,7 @@ newtype TPathSummary =
* We only want to build properly matched call/return sequences, so if a path has both
* call steps and return steps, all return steps must precede all call steps.
*/
class PathSummary extends TPathSummary {
deprecated class PathSummary extends TPathSummary {
Boolean hasReturn;
Boolean hasCall;
FlowLabel start;
@@ -618,7 +634,7 @@ class PathSummary extends TPathSummary {
}
}
module PathSummary {
deprecated module PathSummary {
/**
* Gets a summary describing a path without any calls or returns.
*/

View File

@@ -0,0 +1,254 @@
/**
* Provides JS specific classes and predicates for defining flow summaries.
*/
private import javascript
private import semmle.javascript.dataflow.internal.DataFlowPrivate
private import semmle.javascript.dataflow.internal.Contents::Private
private import sharedlib.DataFlowImplCommon
private import sharedlib.FlowSummaryImpl::Private as Private
private import sharedlib.FlowSummaryImpl::Public
private import codeql.dataflow.internal.AccessPathSyntax as AccessPathSyntax
private import semmle.javascript.internal.flow_summaries.ExceptionFlow
/**
* A class of callables that are candidates for flow summary modeling.
*/
class SummarizedCallableBase = string;
/** Gets the parameter position representing a callback itself, if any. */
ArgumentPosition callbackSelfParameterPosition() { result.isFunctionSelfReference() }
/**
* Gets the content set corresponding to `Awaited[arg]`.
*/
private ContentSet getPromiseContent(string arg) {
arg = "value" and result = ContentSet::promiseValue()
or
arg = "error" and result = ContentSet::promiseError()
}
pragma[nomagic]
private predicate positionName(ParameterPosition pos, string operand) {
operand = pos.asPositional().toString()
or
pos.isThis() and operand = "this"
or
pos.isFunctionSelfReference() and operand = "function"
or
operand = pos.asPositionalLowerBound() + ".."
}
/**
* Holds if `operand` desugars to the given `pos`. Only used for parsing.
*/
bindingset[operand]
private predicate desugaredPositionName(ParameterPosition pos, string operand) {
operand = "any" and
pos.asPositionalLowerBound() = 0
or
pos.asPositional() = AccessPathSyntax::parseInt(operand) // parse closed intervals
}
private string encodeContentAux(ContentSet cs, string arg) {
cs = ContentSet::arrayElement() and
result = "ArrayElement" and
arg = ""
or
cs = ContentSet::arrayElementUnknown() and
result = "ArrayElement" and
arg = "?"
or
exists(int n |
cs = ContentSet::arrayElementLowerBound(n) and
result = "ArrayElement" and
arg = n + ".." and
n > 0 // n=0 is just 'ArrayElement'
or
cs = ContentSet::arrayElementKnown(n) and
result = "ArrayElement" and
arg = n.toString()
or
n = cs.asPropertyName().toInt() and
n >= 0 and
result = "ArrayElement" and
arg = n + "!"
)
or
arg = "" and
(
cs = ContentSet::mapValueAll() and result = "MapValue"
or
cs = ContentSet::mapKey() and result = "MapKey"
or
cs = ContentSet::setElement() and result = "SetElement"
or
cs = ContentSet::iteratorElement() and result = "IteratorElement"
or
cs = ContentSet::iteratorError() and result = "IteratorError"
)
or
cs = getPromiseContent(arg) and
result = "Awaited"
or
cs = MkAwaited() and result = "Awaited" and arg = ""
or
cs = MkAnyPropertyDeep() and result = "AnyMemberDeep" and arg = ""
or
cs = MkArrayElementDeep() and result = "ArrayElementDeep" and arg = ""
or
cs = MkOptionalStep(arg) and result = "OptionalStep"
or
cs = MkOptionalBarrier(arg) and result = "OptionalBarrier"
}
/**
* Gets the textual representation of content `cs` used in MaD.
*
* `arg` will be printed in square brackets (`[]`) after the result, unless
* `arg` is the empty string.
*/
string encodeContent(ContentSet cs, string arg) {
result = encodeContentAux(cs, arg)
or
not exists(encodeContentAux(cs, _)) and
result = "Member" and
arg = cs.asSingleton().toString()
}
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
string encodeParameterPosition(ParameterPosition pos) {
positionName(pos, result) and result != "any"
}
/** Gets the textual representation of an argument position in the format used for flow summaries. */
string encodeArgumentPosition(ArgumentPosition pos) {
positionName(pos, result) and result != "any"
}
/** Gets the return kind corresponding to specification `"ReturnValue"`. */
ReturnKind getStandardReturnValueKind() { result = MkNormalReturnKind() and Stage::ref() }
private module FlowSummaryStepInput implements Private::StepsInputSig {
DataFlowCall getACall(SummarizedCallable sc) {
exists(LibraryCallable callable | callable = sc |
result.asOrdinaryCall() =
[
callable.getACall(), callable.getACallSimple(),
callable.(LibraryCallableInternal).getACallStage2()
]
)
}
}
module Steps = Private::Steps<FlowSummaryStepInput>;
module RenderSummarizedCallable = Private::RenderSummarizedCallable<FlowSummaryStepInput>;
class AccessPath = Private::AccessPath;
class AccessPathToken = Private::AccessPathToken;
/**
* Gets the textual representation of return kind `rk` used in MaD.
*
* `arg` will be printed in square brackets (`[]`) after the result, unless
* `arg` is the empty string.
*/
string encodeReturn(ReturnKind rk, string arg) {
result = "ReturnValue" and
(
rk = MkNormalReturnKind() and arg = ""
or
rk = MkExceptionalReturnKind() and arg = "exception"
)
}
/**
* Gets the textual representation of without-content `c` used in MaD.
*
* `arg` will be printed in square brackets (`[]`) after the result, unless
* `arg` is the empty string.
*/
string encodeWithoutContent(ContentSet c, string arg) { result = "Without" + encodeContent(c, arg) }
/**
* Gets the textual representation of with-content `c` used in MaD.
*
* `arg` will be printed in square brackets (`[]`) after the result, unless
* `arg` is the empty string.
*/
string encodeWithContent(ContentSet c, string arg) { result = "With" + encodeContent(c, arg) }
/**
* Gets a parameter position corresponding to the unknown token `token`.
*
* The token is unknown because it could not be reverse-encoded using the
* `encodeParameterPosition` predicate. This is useful for example when a
* single token gives rise to multiple parameter positions, such as ranges
* `0..n`.
*/
bindingset[token]
ParameterPosition decodeUnknownParameterPosition(AccessPathSyntax::AccessPathTokenBase token) {
token.getName() = "Argument" and
desugaredPositionName(result, token.getAnArgument())
}
/**
* Gets an argument position corresponding to the unknown token `token`.
*
* The token is unknown because it could not be reverse-encoded using the
* `encodeArgumentPosition` predicate. This is useful for example when a
* single token gives rise to multiple argument positions, such as ranges
* `0..n`.
*/
bindingset[token]
ArgumentPosition decodeUnknownArgumentPosition(AccessPathSyntax::AccessPathTokenBase token) {
token.getName() = "Parameter" and
desugaredPositionName(result, token.getAnArgument())
}
/**
* Gets a content corresponding to the unknown token `token`.
*
* The token is unknown because it could not be reverse-encoded using the
* `encodeContent` predicate.
*/
bindingset[token]
ContentSet decodeUnknownContent(AccessPathSyntax::AccessPathTokenBase token) { none() }
/**
* Gets a return kind corresponding to the unknown token `token`.
*
* The token is unknown because it could not be reverse-encoded using the
* `encodeReturn` predicate.
*/
bindingset[token]
ReturnKind decodeUnknownReturn(AccessPathSyntax::AccessPathTokenBase token) { none() }
/**
* Gets a without-content corresponding to the unknown token `token`.
*
* The token is unknown because it could not be reverse-encoded using the
* `encodeWithoutContent` predicate.
*/
bindingset[token]
ContentSet decodeUnknownWithoutContent(AccessPathSyntax::AccessPathTokenBase token) { none() }
/**
* Gets a with-content corresponding to the unknown token `token`.
*
* The token is unknown because it could not be reverse-encoded using the
* `encodeWithContent` predicate.
*/
bindingset[token]
ContentSet decodeUnknownWithContent(AccessPathSyntax::AccessPathTokenBase token) { none() }
cached
module Stage {
cached
predicate ref() { 1 = 1 }
cached
predicate backref() { optionalStep(_, _, _) }
}

View File

@@ -46,6 +46,7 @@ class PreCallGraphStep extends Unit {
}
}
cached
module PreCallGraphStep {
/**
* Holds if there is a step from `pred` to `succ`.
@@ -83,6 +84,7 @@ module PreCallGraphStep {
/**
* Holds if there is a step from the `loadProp` property of `pred` to the `storeProp` property in `succ`.
*/
cached
predicate loadStoreStep(
DataFlow::Node pred, DataFlow::SourceNode succ, string loadProp, string storeProp
) {
@@ -90,6 +92,91 @@ module PreCallGraphStep {
}
}
/**
* Internal extension point for adding legacy flow edges prior to call graph construction
* and type tracking, but where the steps should not be used by the new data flow library.
*
* Steps added here will be added to both `LegacyFlowStep` and `SharedTypeTrackingStep`.
*
* Contributing steps that rely on type tracking will lead to negative recursion.
*/
class LegacyPreCallGraphStep extends Unit {
/**
* Holds if there is a step from `pred` to `succ`.
*/
predicate step(DataFlow::Node pred, DataFlow::Node succ) { none() }
/**
* Holds if there is a step from `pred` into the `prop` property of `succ`.
*/
predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) { none() }
/**
* Holds if there is a step from the `prop` property of `pred` to `succ`.
*/
predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) { none() }
/**
* Holds if there is a step from the `prop` property of `pred` to the same property in `succ`.
*/
predicate loadStoreStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) { none() }
/**
* Holds if there is a step from the `loadProp` property of `pred` to the `storeProp` property in `succ`.
*/
predicate loadStoreStep(
DataFlow::Node pred, DataFlow::SourceNode succ, string loadProp, string storeProp
) {
none()
}
}
cached
module LegacyPreCallGraphStep {
/**
* Holds if there is a step from `pred` to `succ`.
*/
cached
predicate step(DataFlow::Node pred, DataFlow::Node succ) {
any(LegacyPreCallGraphStep s).step(pred, succ)
}
/**
* Holds if there is a step from `pred` into the `prop` property of `succ`.
*/
cached
predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
any(LegacyPreCallGraphStep s).storeStep(pred, succ, prop)
}
/**
* Holds if there is a step from the `prop` property of `pred` to `succ`.
*/
cached
predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
Stages::TypeTracking::ref() and
any(LegacyPreCallGraphStep s).loadStep(pred, succ, prop)
}
/**
* Holds if there is a step from the `prop` property of `pred` to the same property in `succ`.
*/
cached
predicate loadStoreStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
any(LegacyPreCallGraphStep s).loadStoreStep(pred, succ, prop)
}
/**
* Holds if there is a step from the `loadProp` property of `pred` to the `storeProp` property in `succ`.
*/
cached
predicate loadStoreStep(
DataFlow::Node pred, DataFlow::SourceNode succ, string loadProp, string storeProp
) {
any(LegacyPreCallGraphStep s).loadStoreStep(pred, succ, loadProp, storeProp)
}
}
private class SharedFlowStepFromPreCallGraph extends DataFlow::SharedFlowStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
PreCallGraphStep::step(pred, succ)
@@ -114,26 +201,60 @@ private class SharedFlowStepFromPreCallGraph extends DataFlow::SharedFlowStep {
}
}
private class LegacyFlowStepFromPreCallGraph extends DataFlow::LegacyFlowStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
LegacyPreCallGraphStep::step(pred, succ)
}
override predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
LegacyPreCallGraphStep::storeStep(pred, succ, prop)
}
override predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
LegacyPreCallGraphStep::loadStep(pred, succ, prop)
}
override predicate loadStoreStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
LegacyPreCallGraphStep::loadStoreStep(pred, succ, prop)
}
override predicate loadStoreStep(
DataFlow::Node pred, DataFlow::Node succ, string loadProp, string storeProp
) {
LegacyPreCallGraphStep::loadStoreStep(pred, succ, loadProp, storeProp)
}
}
private class SharedTypeTrackingStepFromPreCallGraph extends DataFlow::SharedTypeTrackingStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
PreCallGraphStep::step(pred, succ)
or
LegacyPreCallGraphStep::step(pred, succ)
}
override predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
PreCallGraphStep::storeStep(pred, succ, prop)
or
LegacyPreCallGraphStep::storeStep(pred, succ, prop)
}
override predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
PreCallGraphStep::loadStep(pred, succ, prop)
or
LegacyPreCallGraphStep::loadStep(pred, succ, prop)
}
override predicate loadStoreStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) {
PreCallGraphStep::loadStoreStep(pred, succ, prop)
or
LegacyPreCallGraphStep::loadStoreStep(pred, succ, prop)
}
override predicate loadStoreStep(
DataFlow::Node pred, DataFlow::SourceNode succ, string loadProp, string storeProp
) {
PreCallGraphStep::loadStoreStep(pred, succ, loadProp, storeProp)
or
LegacyPreCallGraphStep::loadStoreStep(pred, succ, loadProp, storeProp)
}
}

View File

@@ -1,6 +1,8 @@
import javascript
private import semmle.javascript.dataflow.TypeTracking
private import semmle.javascript.internal.CachedStages
private import semmle.javascript.dataflow.internal.Contents as Contents
private import sharedlib.SummaryTypeTracker as SummaryTypeTracker
private import FlowSteps
cached
@@ -29,6 +31,8 @@ private module Cached {
SharedTypeTrackingStep::loadStoreStep(_, _, _, this)
or
this = DataFlow::PseudoProperties::arrayLikeElement()
or
this instanceof Contents::Private::PropertyName
}
}
@@ -46,6 +50,12 @@ private module Cached {
LoadStoreStep(PropertyName fromProp, PropertyName toProp) {
SharedTypeTrackingStep::loadStoreStep(_, _, fromProp, toProp)
or
exists(DataFlow::ContentSet loadContent, DataFlow::ContentSet storeContent |
SummaryTypeTracker::basicLoadStoreStep(_, _, loadContent, storeContent) and
fromProp = loadContent.asPropertyName() and
toProp = storeContent.asPropertyName()
)
or
summarizedLoadStoreStep(_, _, fromProp, toProp)
} or
WithoutPropStep(PropertySet props) { SharedTypeTrackingStep::withoutPropStep(_, _, props) }
@@ -205,6 +215,21 @@ private module Cached {
succ = getACallbackSource(parameter).getParameter(i) and
summary = ReturnStep()
)
or
SummaryTypeTracker::levelStepNoCall(pred, succ) and summary = LevelStep()
or
exists(DataFlow::ContentSet content |
SummaryTypeTracker::basicLoadStep(pred, succ, content) and
summary = LoadStep(content.asPropertyName())
or
SummaryTypeTracker::basicStoreStep(pred, succ, content) and
summary = StoreStep(content.asPropertyName())
)
or
exists(DataFlow::ContentSet loadContent, DataFlow::ContentSet storeContent |
SummaryTypeTracker::basicLoadStoreStep(pred, succ, loadContent, storeContent) and
summary = LoadStoreStep(loadContent.asPropertyName(), storeContent.asPropertyName())
)
}
}

View File

@@ -0,0 +1,119 @@
private import javascript
private import semmle.javascript.dataflow.internal.DataFlowPrivate
private import semmle.javascript.dataflow.internal.DataFlowNode
private import semmle.javascript.dataflow.internal.Contents::Public
private import semmle.javascript.dataflow.internal.sharedlib.FlowSummaryImpl as FlowSummaryImpl
private import semmle.javascript.dataflow.internal.FlowSummaryPrivate as FlowSummaryPrivate
private import semmle.javascript.dataflow.internal.BarrierGuards
private import semmle.javascript.dataflow.internal.sharedlib.Ssa as Ssa2
cached
predicate defaultAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
TaintTracking::AdditionalTaintStep::step(node1, node2)
or
FlowSummaryPrivate::Steps::summaryLocalStep(node1.(FlowSummaryNode).getSummaryNode(),
node2.(FlowSummaryNode).getSummaryNode(), false, _) // TODO: preserve 'model' parameter
or
// Convert steps into and out of array elements to plain taint steps
FlowSummaryPrivate::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(),
ContentSet::arrayElement(), node2.(FlowSummaryNode).getSummaryNode())
or
FlowSummaryPrivate::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(),
ContentSet::arrayElement(), node2.(FlowSummaryNode).getSummaryNode())
or
// If the spread argument itself is tainted (not inside a content), store it into the dynamic argument array.
exists(InvokeExpr invoke, Content c |
node1 = TValueNode(invoke.getAnArgument().stripParens().(SpreadElement).getOperand()) and
node2 = TDynamicArgumentStoreNode(invoke, c) and
c.isUnknownArrayElement()
)
or
// If the array in an .apply() call is tainted (not inside a content), box it in an array element (similar to the case above).
exists(ApplyCallTaintNode taintNode |
node1 = taintNode.getArrayNode() and
node2 = taintNode
)
}
predicate defaultAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2, string model) {
defaultAdditionalTaintStep(node1, node2) and model = "" // TODO: set model
}
bindingset[node]
pragma[inline_late]
private BasicBlock getBasicBlockFromSsa2(Ssa2::Node node) {
result = node.(Ssa2::ExprNode).getExpr().getBasicBlock()
or
node.(Ssa2::SsaInputNode).isInputInto(_, result)
}
/**
* Holds if `node` should act as a taint barrier, as it occurs after a variable has been checked to be falsy.
*
* For example:
* ```js
* if (!x) {
* use(x); // <-- 'x' is a varAccessBarrier
* }
* ```
*
* This is particularly important for ensuring that query-specific barrier guards work when they
* occur after a truthiness-check:
* ```js
* if (x && !isSafe(x)) {
* throw new Error()
* }
* use(x); // both inputs to the phi-read for 'x' are blocked (one by varAccessBarrier, one by isSafe(x))
* ```
*/
private predicate varAccessBarrier(DataFlow::Node node) {
exists(ConditionGuardNode guard, Ssa2::ExprNode nodeFrom, Ssa2::Node nodeTo |
guard.getOutcome() = false and
guard.getTest().(VarAccess) = nodeFrom.getExpr() and
Ssa2::localFlowStep(_, nodeFrom, nodeTo, true) and
guard.dominates(getBasicBlockFromSsa2(nodeTo)) and
node = getNodeFromSsa2(nodeTo)
)
}
/**
* Holds if `node` should be a sanitizer in all global taint flow configurations
* but not in local taint.
*/
cached
predicate defaultTaintSanitizer(DataFlow::Node node) {
node instanceof DataFlow::VarAccessBarrier or
varAccessBarrier(node) or
node = MakeBarrierGuard<TaintTracking::AdditionalBarrierGuard>::getABarrierNode()
}
/**
* Holds if default taint-tracking should allow implicit reads
* of `c` at sinks and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, ContentSet c) {
exists(node) and
c = [ContentSet::promiseValue(), ContentSet::arrayElement()] and
// Optional steps are added through isAdditionalFlowStep but we don't want the implicit reads
not optionalStep(node, _, _)
}
private predicate isArgumentToResolvedCall(DataFlow::Node arg) {
exists(DataFlowCall c |
exists(viableCallable(c)) and
isArgumentNode(arg, c, _)
)
}
predicate speculativeTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::CallNode call |
node1 = call.getAnArgument() and
node2 = call and
// A given node can appear as argument in more than one call. For example `x` in `fn.call(x)` is
// is argument 0 of the `fn.call` call, but also the receiver of a reflective call to `fn`.
// It is thus not enough to check if `call` has a known target; we nede to ensure that none of the calls
// involving `node1` have a known target.
not isArgumentToResolvedCall(node1)
)
}

View File

@@ -0,0 +1,308 @@
private import javascript as js
private import semmle.javascript.dataflow.internal.DataFlowNode
private import semmle.javascript.dataflow.internal.VariableOrThis
private import codeql.dataflow.VariableCapture
private import semmle.javascript.dataflow.internal.sharedlib.DataFlowImplCommon as DataFlowImplCommon
module VariableCaptureConfig implements InputSig<js::DbLocation> {
private js::Function getLambdaFromVariable(js::LocalVariable variable) {
result.getVariable() = variable
or
result = variable.getAnAssignedExpr().getUnderlyingValue()
or
exists(js::ClassDeclStmt cls |
result = cls.getConstructor().getBody() and
variable = cls.getVariable()
)
}
additional predicate isTopLevelLike(js::StmtContainer container) {
container instanceof js::TopLevel
or
container = any(js::AmdModuleDefinition mod).getFactoryFunction()
or
isTopLevelLike(container.(js::ImmediatelyInvokedFunctionExpr).getEnclosingContainer())
or
// Containers declaring >100 captured variables tend to be singletons and are too expensive anyway
strictcount(js::LocalVariable v | v.isCaptured() and v.getDeclaringContainer() = container) >
100
}
class CapturedVariable extends LocalVariableOrThis {
CapturedVariable() {
DataFlowImplCommon::forceCachingInSameStage() and
this.isCaptured() and
not isTopLevelLike(this.getDeclaringContainer())
}
Callable getCallable() { result = this.getDeclaringContainer().getFunctionBoundary() }
}
additional predicate captures(js::Function fun, CapturedVariable variable) {
(
variable.asLocalVariable().getAnAccess().getContainer().getFunctionBoundary() = fun
or
variable.getAThisUse().getUseContainer() = fun
or
exists(js::Function inner |
captures(inner, variable) and
containsReferenceTo(fun, inner)
)
) and
not variable.getDeclaringContainer() = fun
}
private predicate containsReferenceTo(js::Function fun, js::Function other) {
other.getEnclosingContainer() = fun
or
exists(js::LocalVariable variable |
other = getLambdaFromVariable(variable) and
variable.getAnAccess().getEnclosingFunction() = fun and
fun.getEnclosingContainer() = other.getEnclosingContainer().getEnclosingContainer*() and
other != fun
)
}
private js::Function getACapturingFunctionInTree(js::AstNode e) {
result = e and
captures(e, _)
or
not e instanceof js::Function and
result = getACapturingFunctionInTree(e.getAChild())
}
/**
* Holds if `decl` declares a variable that is captured by its own initializer, that is, the initializer of `decl`.
*
* For example, the declaration of `obj` below captures itself in its initializer:
* ```js
* const obj = {
* method: () => { ...obj... }
* }
* ```
*
* The lambda can only observe values of `obj` at one of the aliases of that lambda. Due to limited aliases analysis,
* the only alias we can see is the lambda itself. However, at this stage the `obj` variable is still unassigned, so it
* just sees its implicit initialization, thus failing to capture any real flows through `obj`.
*
* Consider that the similar example does not have this problem:
*
* ```js
* const obj = {};
* obj.method = () => { ...obj... };
* ```
*
* In this case, `obj` has already been assigned at the point of the lambda creation, so we propagate the correct value
* into the lambda.
*
* Our workaround is to make the first example look like the second one, by placing the assignment of
* `obj` before the object literal. We do this whenever a variable captures itself in its initializer.
*/
private predicate isCapturedByOwnInitializer(js::VariableDeclarator decl) {
exists(js::Function function |
function = getACapturingFunctionInTree(decl.getInit()) and
captures(function,
LocalVariableOrThis::variable(decl.getBindingPattern().(js::VarDecl).getVariable()))
)
}
class ControlFlowNode = js::ControlFlowNode;
class BasicBlock extends js::BasicBlock {
Callable getEnclosingCallable() { result = this.getContainer().getFunctionBoundary() }
}
class Callable extends js::StmtContainer {
predicate isConstructor() {
// JS constructors should not be seen as "constructors" in this context.
none()
}
}
class CapturedParameter extends CapturedVariable {
CapturedParameter() { this.asLocalVariable().isParameter() or exists(this.asThisContainer()) }
}
class Expr extends js::AST::ValueNode {
/** Holds if the `i`th node of basic block `bb` evaluates this expression. */
predicate hasCfgNode(BasicBlock bb, int i) {
// Note: this is overridden for FunctionDeclStmt
bb.getNode(i) = this
}
}
class VariableRead extends Expr instanceof js::ControlFlowNode {
private CapturedVariable variable;
VariableRead() { this = variable.getAUse() }
CapturedVariable getVariable() { result = variable }
}
class ClosureExpr extends Expr {
ClosureExpr() { captures(this, _) }
predicate hasBody(Callable c) { c = this }
predicate hasAliasedAccess(Expr e) {
e = this
or
e.(js::Expr).getUnderlyingValue() = this
or
exists(js::LocalVariable variable |
this = getLambdaFromVariable(variable) and
e.(js::Expr).getUnderlyingValue() = variable.getAnAccess()
)
}
}
private newtype TVariableWrite =
MkExplicitVariableWrite(js::VarRef pattern) {
exists(js::DataFlow::lvalueNodeInternal(pattern)) and
any(CapturedVariable v).asLocalVariable() = pattern.getVariable()
} or
MkImplicitVariableInit(CapturedVariable v) { not v instanceof CapturedParameter }
class VariableWrite extends TVariableWrite {
CapturedVariable getVariable() { none() } // Overridden in subclass
string toString() { none() } // Overridden in subclass
js::DbLocation getLocation() { none() } // Overridden in subclass
predicate hasCfgNode(BasicBlock bb, int i) { none() } // Overridden in subclass
// note: langauge-specific
js::DataFlow::Node getSource() { none() } // Overridden in subclass
}
additional class ExplicitVariableWrite extends VariableWrite, MkExplicitVariableWrite {
private js::VarRef pattern;
ExplicitVariableWrite() { this = MkExplicitVariableWrite(pattern) }
override CapturedVariable getVariable() { result.asLocalVariable() = pattern.getVariable() }
override string toString() { result = pattern.toString() }
/** Gets the location of this write. */
override js::DbLocation getLocation() { result = pattern.getLocation() }
override js::DataFlow::Node getSource() {
// Note: there is not always an expression corresponding to the RHS of the assignment.
// We do however have a data-flow node for this purpose (the lvalue-node).
// We use the pattern as a placeholder here, to be mapped to a data-flow node with `DataFlow::lvalueNode`.
result = js::DataFlow::lvalueNodeInternal(pattern)
}
/**
* Gets a CFG node that should act at the place where this variable write happens, overriding its "true" CFG node.
*/
private js::ControlFlowNode getCfgNodeOverride() {
exists(js::VariableDeclarator decl |
decl.getBindingPattern() = pattern and
isCapturedByOwnInitializer(decl) and
result = decl.getInit().getFirstControlFlowNode()
)
}
/** Holds if the `i`th node of basic block `bb` evaluates this expression. */
override predicate hasCfgNode(BasicBlock bb, int i) {
bb.getNode(i) = this.getCfgNodeOverride()
or
not exists(this.getCfgNodeOverride()) and
bb.getNode(i) = pattern.(js::LValue).getDefNode()
}
}
additional class ImplicitVariableInit extends VariableWrite, MkImplicitVariableInit {
private CapturedVariable variable;
ImplicitVariableInit() { this = MkImplicitVariableInit(variable) }
override string toString() { result = "[implicit init] " + variable }
override js::DbLocation getLocation() { result = variable.getLocation() }
override CapturedVariable getVariable() { result = variable }
override predicate hasCfgNode(BasicBlock bb, int i) {
// 'i' would normally be bound to 0, but we lower it to -1 so FunctionDeclStmts can be evaluated
// at index 0.
any(js::SsaImplicitInit def).definesAt(bb, _, variable.asLocalVariable()) and i = -1
or
bb.(js::EntryBasicBlock).getContainer() = variable.asThisContainer() and i = -1
}
}
BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() }
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { result = bb.getImmediateDominator() }
predicate entryBlock(BasicBlock bb) { bb instanceof js::EntryBasicBlock }
predicate exitBlock(BasicBlock bb) { bb.getLastNode() instanceof js::ControlFlowExitNode }
}
module VariableCaptureOutput = Flow<js::DbLocation, VariableCaptureConfig>;
js::DataFlow::Node getNodeFromClosureNode(VariableCaptureOutput::ClosureNode node) {
result = TValueNode(node.(VariableCaptureOutput::ExprNode).getExpr())
or
result =
TValueNode(node.(VariableCaptureOutput::ParameterNode)
.getParameter()
.asLocalVariable()
.getADeclaration())
or
result = TThisNode(node.(VariableCaptureOutput::ParameterNode).getParameter().asThisContainer())
or
result = TExprPostUpdateNode(node.(VariableCaptureOutput::ExprPostUpdateNode).getExpr())
or
// Note: the `this` parameter in the capture library is expected to be a parameter that refers to the lambda object itself,
// which for JS means the `TFunctionSelfReferenceNode`, not `TThisNode` as one might expect.
result = TFunctionSelfReferenceNode(node.(VariableCaptureOutput::ThisParameterNode).getCallable())
or
result = TSynthCaptureNode(node.(VariableCaptureOutput::SynthesizedCaptureNode))
or
result = node.(VariableCaptureOutput::VariableWriteSourceNode).getVariableWrite().getSource()
}
VariableCaptureOutput::ClosureNode getClosureNode(js::DataFlow::Node node) {
node = getNodeFromClosureNode(result)
}
private module Debug {
private import VariableCaptureConfig
predicate relevantContainer(js::StmtContainer container) {
container.getEnclosingContainer*().(js::Function).getName() = "exists"
}
predicate localFlowStep(
VariableCaptureOutput::ClosureNode node1, VariableCaptureOutput::ClosureNode node2
) {
VariableCaptureOutput::localFlowStep(node1, node2)
}
predicate localFlowStepMapped(js::DataFlow::Node node1, js::DataFlow::Node node2) {
localFlowStep(getClosureNode(node1), getClosureNode(node2)) and
relevantContainer(node1.getContainer())
}
predicate readBB(VariableRead read, BasicBlock bb, int i) { read.hasCfgNode(bb, i) }
predicate writeBB(VariableWrite write, BasicBlock bb, int i) { write.hasCfgNode(bb, i) }
int captureDegree(js::Function fun) {
result = strictcount(CapturedVariable v | captures(fun, v))
}
int maxDegree() { result = max(captureDegree(_)) }
int captureMax(js::Function fun) { result = captureDegree(fun) and result = maxDegree() }
int captureMax(js::Function fun, CapturedVariable v) {
result = captureDegree(fun) and result = maxDegree() and captures(fun, v)
}
}

View File

@@ -0,0 +1,130 @@
private import javascript
private import DataFlowNode
cached
private newtype TLocalVariableOrThis =
TLocalVariable(LocalVariable var) or
TThis(StmtContainer container) { not container instanceof ArrowFunctionExpr }
/** A local variable or `this` in a particular container. */
class LocalVariableOrThis extends TLocalVariableOrThis {
/** Gets the local variable represented by this newtype, if any. */
LocalVariable asLocalVariable() { this = TLocalVariable(result) }
/** If this represents `this`, gets the enclosing container */
StmtContainer asThisContainer() { this = TThis(result) }
/** Gets the name of the variable or the string `"this"`. */
string toString() { result = this.getName() }
/** Gets the name of the variable or the string `"this"`. */
string getName() {
result = this.asLocalVariable().getName()
or
this instanceof TThis and result = "this"
}
/** Gets the location of a declaration of this variable, or the declaring container if this is `this`. */
DbLocation getLocation() {
result = this.asLocalVariable().getLocation()
or
result = this.asThisContainer().getLocation()
}
/** Holds if this is a captured variable or captured `this`. */
predicate isCaptured() {
this.asLocalVariable().isCaptured()
or
hasCapturedThis(this.asThisContainer())
}
/** Gets the container declaring this variable or is the enclosing container for `this`. */
StmtContainer getDeclaringContainer() {
result = this.asLocalVariable().getDeclaringContainer()
or
result = this.asThisContainer()
}
/** Gets an explicit access to `this` represented by this value. */
ThisExpr getAThisExpr() { result.getBindingContainer() = this.asThisContainer() }
/** Gets an implicit or explicit use of the `this` represented by this value. */
ThisUse getAThisUse() { result.getBindingContainer() = this.asThisContainer() }
/** Gets an expression that accesses this variable or `this`. */
ControlFlowNode getAUse() {
result = this.asLocalVariable().getAnAccess()
or
result = this.getAThisUse()
}
}
bindingset[c1, c2]
pragma[inline_late]
private predicate sameContainer(StmtContainer c1, StmtContainer c2) { c1 = c2 }
pragma[nomagic]
private predicate hasCapturedThis(StmtContainer c) {
exists(ThisExpr expr |
expr.getBindingContainer() = c and
not sameContainer(c, expr.getContainer())
)
}
module LocalVariableOrThis {
/** Gets the representation of the given local variable. */
LocalVariableOrThis variable(LocalVariable v) { result.asLocalVariable() = v }
/** Gets the representation of `this` in the given container. */
LocalVariableOrThis thisInContainer(StmtContainer c) { result = TThis(c) }
}
/**
* An explicit or implicit use of `this`.
*
* Implicit uses include `super()` calls and instance field initializers (which includes TypeScript parameter fields).
*/
abstract class ThisUse instanceof ControlFlowNode {
/** Gets the container binding the `this` being accessed */
abstract StmtContainer getBindingContainer();
/** Get the container in which `this` is being accessed. */
abstract StmtContainer getUseContainer();
/** Gets a string representation of this element. */
string toString() { result = super.toString() }
/** Gets the location of this use of `this`. */
DbLocation getLocation() { result = super.getLocation() }
}
private predicate implicitThisUse(ControlFlowNode node, StmtContainer thisBinder) {
thisBinder = node.(SuperExpr).getBinder()
or
exists(FieldDefinition field |
not field.isStatic() and
node = field and
thisBinder = field.getDeclaringClass().getConstructor().getBody()
)
}
class ImplicitThisUse extends ThisUse {
ImplicitThisUse() { implicitThisUse(this, _) }
override StmtContainer getBindingContainer() { implicitThisUse(this, result) }
override StmtContainer getUseContainer() {
// The following differs from FieldDefinition.getContainer() which returns the container enclosing
// the class, not the class constructor.
// TODO: consider changing this in FieldDefinition.getContainer()
result = this.(FieldDefinition).getDeclaringClass().getConstructor().getBody()
or
result = this.(SuperExpr).getContainer()
}
}
private class ExplicitThisUse extends ThisUse instanceof ThisExpr {
override StmtContainer getBindingContainer() { result = ThisExpr.super.getBindingContainer() }
override StmtContainer getUseContainer() { result = ThisExpr.super.getContainer() }
}

View File

@@ -0,0 +1,7 @@
/** Provides the instantiation of the shared data flow library. */
private import semmle.javascript.Locations
private import codeql.dataflow.DataFlow
private import DataFlowArg
import DataFlowMake<Location, JSDataFlow>
import DataFlowImplSpecific::Public

View File

@@ -0,0 +1,53 @@
private import semmle.javascript.Locations
private import DataFlowImplSpecific
private import codeql.dataflow.DataFlow as SharedDataFlow
private import codeql.dataflow.TaintTracking as SharedTaintTracking
private import codeql.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
module JSDataFlow implements SharedDataFlow::InputSig<Location> {
import Private
import Public
// Explicitly implement signature members that have a default
predicate typeStrongerThan = Private::typeStrongerThan/2;
predicate neverSkipInPathGraph = Private::neverSkipInPathGraph/1;
predicate accessPathLimit = Private::accessPathLimit/0;
predicate viableImplInCallContext = Private::viableImplInCallContext/2;
predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1;
}
module JSTaintFlow implements SharedTaintTracking::InputSig<Location, JSDataFlow> {
import semmle.javascript.dataflow.internal.TaintTrackingPrivate
}
module JSFlowSummary implements FlowSummaryImpl::InputSig<Location, JSDataFlow> {
private import semmle.javascript.dataflow.internal.FlowSummaryPrivate as FlowSummaryPrivate
import FlowSummaryPrivate
// Explicitly implement signature members that have a default
predicate callbackSelfParameterPosition = FlowSummaryPrivate::callbackSelfParameterPosition/0;
predicate encodeContent = FlowSummaryPrivate::encodeContent/2;
predicate encodeReturn = FlowSummaryPrivate::encodeReturn/2;
predicate encodeWithoutContent = FlowSummaryPrivate::encodeWithoutContent/2;
predicate encodeWithContent = FlowSummaryPrivate::encodeWithContent/2;
predicate decodeUnknownParameterPosition = FlowSummaryPrivate::decodeUnknownParameterPosition/1;
predicate decodeUnknownArgumentPosition = FlowSummaryPrivate::decodeUnknownArgumentPosition/1;
predicate decodeUnknownContent = FlowSummaryPrivate::decodeUnknownContent/1;
predicate decodeUnknownReturn = FlowSummaryPrivate::decodeUnknownReturn/1;
predicate decodeUnknownWithoutContent = FlowSummaryPrivate::decodeUnknownWithoutContent/1;
predicate decodeUnknownWithContent = FlowSummaryPrivate::decodeUnknownWithContent/1;
}

View File

@@ -0,0 +1,4 @@
private import semmle.javascript.Locations
private import codeql.dataflow.internal.DataFlowImpl
private import DataFlowArg
import MakeImpl<Location, JSDataFlow>

View File

@@ -0,0 +1,4 @@
private import semmle.javascript.Locations
private import DataFlowArg
private import codeql.dataflow.internal.DataFlowImplCommon
import MakeImplCommon<Location, JSDataFlow>

View File

@@ -0,0 +1,12 @@
private import javascript
// This file provides the input to FlowSummaryImpl.qll, which is shared via identical-files.json.
module Private {
import semmle.javascript.dataflow.internal.DataFlowPrivate
}
module Public {
import semmle.javascript.dataflow.internal.Contents::Public
class Node = DataFlow::Node;
}

View File

@@ -0,0 +1,4 @@
private import semmle.javascript.Locations
private import codeql.dataflow.internal.FlowSummaryImpl
private import DataFlowArg
import Make<Location, JSDataFlow, JSFlowSummary>

View File

@@ -0,0 +1,109 @@
/**
* Instantiates the shared SSA library for JavaScript, but only to establish use-use flow.
*
* JavaScript's old SSA library is still responsible for the ordinary SSA flow.
*/
private import javascript as js
private import codeql.ssa.Ssa
private import semmle.javascript.internal.BasicBlockInternal as BasicBlockInternal
private import semmle.javascript.dataflow.internal.VariableOrThis
module SsaConfig implements InputSig<js::DbLocation> {
class ControlFlowNode = js::ControlFlowNode;
class BasicBlock = js::BasicBlock;
class ExitBasicBlock extends BasicBlock {
ExitBasicBlock() { this.isExitBlock() }
}
class SourceVariable extends LocalVariableOrThis {
SourceVariable() { not this.isCaptured() }
}
pragma[nomagic]
private js::EntryBasicBlock getEntryBlock(js::StmtContainer container) {
result.getContainer() = container
}
predicate variableWrite(BasicBlock bb, int i, SourceVariable v, boolean certain) {
certain = true and
(
bb.defAt(i, v.asLocalVariable(), _)
or
// Implicit initialization and function parameters
bb = getEntryBlock(v.getDeclaringContainer()) and
i = -1
)
}
predicate variableRead(BasicBlock bb, int i, SourceVariable v, boolean certain) {
bb.useAt(i, v.asLocalVariable(), _) and certain = true
or
certain = true and
bb.getNode(i).(ThisUse).getBindingContainer() = v.asThisContainer()
}
predicate getImmediateBasicBlockDominator = BasicBlockInternal::immediateDominator/1;
pragma[inline]
BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() }
}
import Make<js::DbLocation, SsaConfig>
module SsaDataflowInput implements DataFlowIntegrationInputSig {
class Expr extends js::ControlFlowNode {
Expr() { this = any(SsaConfig::SourceVariable v).getAUse() }
predicate hasCfgNode(js::BasicBlock bb, int i) { this = bb.getNode(i) }
}
predicate ssaDefAssigns(WriteDefinition def, Expr value) {
// This library only handles use-use flow after a post-update, there are no definitions, only uses.
none()
}
class Parameter = js::Parameter;
predicate ssaDefInitializesParam(WriteDefinition def, Parameter p) {
// This library only handles use-use flow after a post-update, there are no definitions, only uses.
none()
}
cached
Expr getARead(Definition def) {
// Copied from implementation so we can cache it here
exists(SsaConfig::SourceVariable v, js::BasicBlock bb, int i |
ssaDefReachesRead(v, def, bb, i) and
SsaConfig::variableRead(bb, i, v, true) and
result.hasCfgNode(bb, i)
)
}
class Guard extends js::ControlFlowNode {
Guard() { this = any(js::ConditionGuardNode g).getTest() }
predicate hasCfgNode(js::BasicBlock bb, int i) { this = bb.getNode(i) }
}
pragma[inline]
predicate guardControlsBlock(Guard guard, js::BasicBlock bb, boolean branch) {
exists(js::ConditionGuardNode g |
g.getTest() = guard and
g.dominates(bb) and
branch = g.getOutcome()
)
}
js::BasicBlock getAConditionalBasicBlockSuccessor(js::BasicBlock bb, boolean branch) {
exists(js::ConditionGuardNode g |
bb = g.getTest().getBasicBlock() and
result = g.getBasicBlock() and
branch = g.getOutcome()
)
}
}
import DataFlowIntegration<SsaDataflowInput>

View File

@@ -0,0 +1,83 @@
private import semmle.javascript.Locations
private import codeql.typetracking.internal.SummaryTypeTracker
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.javascript.dataflow.FlowSummary as FlowSummary
private import FlowSummaryImpl as FlowSummaryImpl
private import DataFlowArg
private module SummaryFlowConfig implements Input {
import JSDataFlow
import FlowSummaryImpl::Public
import FlowSummaryImpl::Private
import FlowSummaryImpl::Private::SummaryComponent
class Content = DataFlow::ContentSet;
class ContentFilter extends Unit {
ContentFilter() { none() }
}
ContentFilter getFilterFromWithoutContentStep(Content content) { none() }
ContentFilter getFilterFromWithContentStep(Content content) { none() }
predicate singleton = SummaryComponentStack::singleton/1;
predicate push = SummaryComponentStack::push/2;
SummaryComponent return() {
result = SummaryComponent::return(DataFlowPrivate::MkNormalReturnKind())
}
Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) {
// Note: we cannot rely on DataFlowPrivate::DataFlowCall here because that depends on the call graph.
exists(ArgumentPosition apos, ParameterPosition ppos, Node argNode |
arg = argument(ppos) and
parameterMatch(ppos, apos) and
(
argNode = call.(DataFlow::InvokeNode).getArgument(apos.asPositional())
or
apos.isThis() and
argNode = call.(DataFlow::CallNode).getReceiver()
)
|
isPostUpdate = true and result = argNode.getPostUpdateNode()
or
isPostUpdate = false and result = argNode
)
}
Node parameterOf(Node callable, SummaryComponent param) {
exists(ArgumentPosition apos, ParameterPosition ppos, DataFlow::FunctionNode function |
param = parameter(apos) and
parameterMatch(ppos, apos) and
callable = function
|
result = function.getParameter(ppos.asPositional())
or
ppos.isThis() and
result = function.getReceiver()
)
}
Node returnOf(Node callable, SummaryComponent return) {
return = return() and
result = callable.(DataFlow::FunctionNode).getReturnNode()
}
class SummarizedCallable instanceof SummarizedCallableImpl {
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
super.propagatesFlow(input, output, preservesValue, _)
}
string toString() { result = super.toString() }
}
Node callTo(SummarizedCallable callable) {
result = callable.(FlowSummary::SummarizedCallable).getACallSimple()
}
}
import SummaryFlow<SummaryFlowConfig>

View File

@@ -0,0 +1,6 @@
/** Provides the instantiation of the shared taint tracking library. */
private import semmle.javascript.Locations
private import codeql.dataflow.TaintTracking
private import DataFlowArg
import TaintFlowMake<Location, JSDataFlow, JSTaintFlow>

View File

@@ -12,10 +12,11 @@
* Backward exploration in particular does not scale on non-trivial code bases and hence is of limited
* usefulness as it stands.
*/
deprecated module;
import javascript
private class BackwardExploringConfiguration extends DataFlow::Configuration {
deprecated private class BackwardExploringConfiguration extends DataFlow::Configuration {
BackwardExploringConfiguration() { this = any(DataFlow::Configuration cfg) }
override predicate isSource(DataFlow::Node node) { any() }

View File

@@ -10,10 +10,11 @@
*
* NOTE: This library should only be used for debugging and exploration, not in production code.
*/
deprecated module;
import javascript
private class ForwardExploringConfiguration extends DataFlow::Configuration {
deprecated private class ForwardExploringConfiguration extends DataFlow::Configuration {
ForwardExploringConfiguration() { this = any(DataFlow::Configuration cfg) }
override predicate isSink(DataFlow::Node node) { any() }

View File

@@ -61,6 +61,8 @@ predicate isTestFile(File f) {
)
or
f.getAbsolutePath().regexpMatch(".*/__(mocks|tests)__/.*")
or
f.getBaseName().matches("%.test.%")
}
/**

View File

@@ -142,7 +142,7 @@ module AsyncPackage {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::FunctionNode iteratee, IterationCall call |
iteratee = call.getIteratorCallback() and // Require a closure to avoid spurious call/return mismatch.
pred = call.getCollection() and
pred = call.getCollection() and // TODO: needs a flow summary to ensure ArrayElement content is unfolded
succ = iteratee.getParameter(0)
)
}

View File

@@ -52,6 +52,7 @@ module Markdown {
private class MarkdownTableStep extends MarkdownStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::CallNode call | call = DataFlow::moduleImport("markdown-table").getACall() |
// TODO: needs a flow summary to ensure ArrayElement content is unfolded
succ = call and
pred = call.getArgument(0)
)

View File

@@ -56,13 +56,15 @@ predicate parseTypeString(string rawType, string package, string qualifiedName)
* Holds if models describing `package` may be relevant for the analysis of this database.
*/
predicate isPackageUsed(string package) {
exists(DataFlow::moduleImport(package))
or
exists(JS::PackageJson json | json.getPackageName() = package)
or
package = "global"
or
any(DataFlow::SourceNode sn).hasUnderlyingType(package, _)
package = any(JS::Import imp).getImportedPath().getValue()
or
any(JS::TypeName t).hasQualifiedName(package, _)
or
any(JS::TypeAnnotation t).hasQualifiedName(package, _)
or
exists(JS::PackageJson json | json.getPackageName() = package)
}
bindingset[type]

View File

@@ -0,0 +1,365 @@
/**
* Provides classes for working with basic blocks, and predicates for computing
* liveness information for local variables.
*/
import javascript
private import semmle.javascript.internal.StmtContainers
private import semmle.javascript.internal.CachedStages
/**
* Holds if `nd` starts a new basic block.
*/
private predicate startsBB(ControlFlowNode nd) {
not exists(nd.getAPredecessor()) and exists(nd.getASuccessor())
or
nd.isJoin()
or
nd.getAPredecessor().isBranch()
}
/**
* Holds if the first node of basic block `succ` is a control flow
* successor of the last node of basic block `bb`.
*/
private predicate succBB(BasicBlock bb, BasicBlock succ) { succ = bb.getLastNode().getASuccessor() }
/**
* Holds if the first node of basic block `bb` is a control flow
* successor of the last node of basic block `pre`.
*/
private predicate predBB(BasicBlock bb, BasicBlock pre) { succBB(pre, bb) }
/** Holds if `bb` is an entry basic block. */
private predicate entryBB(BasicBlock bb) { bb.getFirstNode() instanceof ControlFlowEntryNode }
/** Holds if `bb` is an exit basic block. */
private predicate exitBB(BasicBlock bb) { bb.getLastNode() instanceof ControlFlowExitNode }
cached
private module Cached {
/**
* Holds if `succ` is a control flow successor of `nd` within the same basic block.
*/
private predicate intraBBSucc(ControlFlowNode nd, ControlFlowNode succ) {
succ = nd.getASuccessor() and
not succ instanceof BasicBlock
}
/**
* Holds if `nd` is the `i`th node in basic block `bb`.
*
* In other words, `i` is the shortest distance from a node `bb`
* that starts a basic block to `nd` along the `intraBBSucc` relation.
*/
cached
predicate bbIndex(BasicBlock bb, ControlFlowNode nd, int i) =
shortestDistances(startsBB/1, intraBBSucc/2)(bb, nd, i)
cached
int bbLength(BasicBlock bb) { result = strictcount(ControlFlowNode nd | bbIndex(bb, nd, _)) }
cached
predicate useAt(BasicBlock bb, int i, Variable v, VarUse u) {
Stages::BasicBlocks::ref() and
v = u.getVariable() and
bbIndex(bb, u, i)
}
cached
predicate defAt(BasicBlock bb, int i, Variable v, VarDef d) {
exists(VarRef lhs |
lhs = d.getTarget().(BindingPattern).getABindingVarRef() and
v = lhs.getVariable()
|
lhs = d.getTarget() and
bbIndex(bb, d, i)
or
exists(PropertyPattern pp |
lhs = pp.getValuePattern() and
bbIndex(bb, pp, i)
)
or
exists(ObjectPattern op |
lhs = op.getRest() and
bbIndex(bb, lhs, i)
)
or
exists(ArrayPattern ap |
lhs = ap.getAnElement() and
bbIndex(bb, lhs, i)
)
)
}
cached
predicate reachableBB(BasicBlock bb) {
entryBB(bb)
or
exists(BasicBlock predBB | succBB(predBB, bb) | reachableBB(predBB))
}
}
private import Cached
/** Gets the immediate dominator of `bb`. */
cached
BasicBlock immediateDominator(BasicBlock bb) = idominance(entryBB/1, succBB/2)(_, result, bb)
/** Gets the immediate post-dominator of `bb`. */
cached
BasicBlock immediatePostDominator(BasicBlock bb) = idominance(exitBB/1, predBB/2)(_, result, bb)
import Public
module Public {
/**
* A basic block, that is, a maximal straight-line sequence of control flow nodes
* without branches or joins.
*
* At the database level, a basic block is represented by its first control flow node.
*/
class BasicBlock extends @cfg_node, NodeInStmtContainer {
cached
BasicBlock() { Stages::BasicBlocks::ref() and startsBB(this) }
/** Gets a basic block succeeding this one. */
BasicBlock getASuccessor() { succBB(this, result) }
/** Gets a basic block preceding this one. */
BasicBlock getAPredecessor() { result.getASuccessor() = this }
/** Gets a node in this block. */
ControlFlowNode getANode() { result = this.getNode(_) }
/** Gets the node at the given position in this block. */
ControlFlowNode getNode(int pos) { bbIndex(this, result, pos) }
/** Gets the first node in this block. */
ControlFlowNode getFirstNode() { result = this }
/** Gets the last node in this block. */
ControlFlowNode getLastNode() { result = this.getNode(this.length() - 1) }
/** Gets the length of this block. */
int length() { result = bbLength(this) }
/** Holds if this basic block uses variable `v` in its `i`th node `u`. */
predicate useAt(int i, Variable v, VarUse u) { useAt(this, i, v, u) }
/** Holds if this basic block defines variable `v` in its `i`th node `d`. */
predicate defAt(int i, Variable v, VarDef d) { defAt(this, i, v, d) }
/**
* Holds if `v` is live at entry to this basic block and `u` is a use of `v`
* witnessing the liveness.
*
* In other words, `u` is a use of `v` that is reachable from the
* entry node of this basic block without going through a redefinition
* of `v`. The use `u` may either be in this basic block, or in another
* basic block reachable from this one.
*/
predicate isLiveAtEntry(Variable v, VarUse u) {
// restrict `u` to be reachable from this basic block
u = this.getASuccessor*().getANode() and
(
// shortcut: if `v` is never defined, then it must be live
this.isDefinedInSameContainer(v)
implies
// otherwise, do full liveness computation
this.isLiveAtEntryImpl(v, u)
)
}
/**
* Holds if `v` is live at entry to this basic block and `u` is a use of `v`
* witnessing the liveness, where `v` is defined at least once in the enclosing
* function or script.
*/
private predicate isLiveAtEntryImpl(Variable v, VarUse u) {
this.isLocallyLiveAtEntry(v, u)
or
this.isDefinedInSameContainer(v) and
not this.defAt(_, v, _) and
this.getASuccessor().isLiveAtEntryImpl(v, u)
}
/**
* Holds if `v` is defined at least once in the function or script to which
* this basic block belongs.
*/
private predicate isDefinedInSameContainer(Variable v) {
exists(VarDef def | def.getAVariable() = v and def.getContainer() = this.getContainer())
}
/**
* Holds if `v` is a variable that is live at entry to this basic block.
*
* Note that this is equivalent to `bb.isLiveAtEntry(v, _)`, but may
* be more efficient on large databases.
*/
predicate isLiveAtEntry(Variable v) {
this.isLocallyLiveAtEntry(v, _)
or
not this.defAt(_, v, _) and this.getASuccessor().isLiveAtEntry(v)
}
/**
* Holds if local variable `v` is live at entry to this basic block and
* `u` is a use of `v` witnessing the liveness.
*/
predicate localIsLiveAtEntry(LocalVariable v, VarUse u) {
this.isLocallyLiveAtEntry(v, u)
or
not this.defAt(_, v, _) and this.getASuccessor().localIsLiveAtEntry(v, u)
}
/**
* Holds if local variable `v` is live at entry to this basic block.
*/
predicate localIsLiveAtEntry(LocalVariable v) {
this.isLocallyLiveAtEntry(v, _)
or
not this.defAt(_, v, _) and this.getASuccessor().localIsLiveAtEntry(v)
}
/**
* Holds if `d` is a definition of `v` that is reachable from the beginning of
* this basic block without going through a redefinition of `v`.
*/
predicate localMayBeOverwritten(LocalVariable v, VarDef d) {
this.isLocallyOverwritten(v, d)
or
not this.defAt(_, v, _) and this.getASuccessor().localMayBeOverwritten(v, d)
}
/**
* Gets the next index after `i` in this basic block at which `v` is
* defined or used, provided that `d` is a definition of `v` at index `i`.
* If there are no further uses or definitions of `v` after `i`, the
* result is the length of this basic block.
*/
private int nextDefOrUseAfter(PurelyLocalVariable v, int i, VarDef d) {
this.defAt(i, v, d) and
result =
min(int j |
(this.defAt(j, v, _) or this.useAt(j, v, _) or j = this.length()) and
j > i
)
}
/**
* Holds if `d` defines variable `v` at the `i`th node of this basic block, and
* the definition is live, that is, the variable may be read after this
* definition and before a re-definition.
*/
predicate localLiveDefAt(PurelyLocalVariable v, int i, VarDef d) {
exists(int j | j = this.nextDefOrUseAfter(v, i, d) |
this.useAt(j, v, _)
or
j = this.length() and this.getASuccessor().localIsLiveAtEntry(v)
)
}
/**
* Holds if `u` is a use of `v` in this basic block, and there are
* no definitions of `v` before it.
*/
private predicate isLocallyLiveAtEntry(Variable v, VarUse u) {
exists(int n | this.useAt(n, v, u) | not exists(int m | m < n | this.defAt(m, v, _)))
}
/**
* Holds if `d` is a definition of `v` in this basic block, and there are
* no other definitions of `v` before it.
*/
private predicate isLocallyOverwritten(Variable v, VarDef d) {
exists(int n | this.defAt(n, v, d) | not exists(int m | m < n | this.defAt(m, v, _)))
}
/**
* Gets the basic block that immediately dominates this basic block.
*/
ReachableBasicBlock getImmediateDominator() { result = immediateDominator(this) }
/**
* Holds if this if a basic block whose last node is an exit node.
*/
predicate isExitBlock() { exitBB(this) }
}
/**
* An unreachable basic block, that is, a basic block
* whose first node is unreachable.
*/
class UnreachableBlock extends BasicBlock {
UnreachableBlock() { this.getFirstNode().isUnreachable() }
}
/**
* An entry basic block, that is, a basic block
* whose first node is the entry node of a statement container.
*/
class EntryBasicBlock extends BasicBlock {
EntryBasicBlock() { entryBB(this) }
}
/**
* A basic block that is reachable from an entry basic block.
*/
class ReachableBasicBlock extends BasicBlock {
ReachableBasicBlock() { reachableBB(this) }
/**
* Holds if this basic block strictly dominates `bb`.
*/
pragma[inline]
predicate strictlyDominates(ReachableBasicBlock bb) { this = immediateDominator+(bb) }
/**
* Holds if this basic block dominates `bb`.
*
* This predicate is reflexive: each reachable basic block dominates itself.
*/
pragma[inline]
predicate dominates(ReachableBasicBlock bb) { this = immediateDominator*(bb) }
/**
* Holds if this basic block strictly post-dominates `bb`.
*/
pragma[inline]
predicate strictlyPostDominates(ReachableBasicBlock bb) { this = immediatePostDominator+(bb) }
/**
* Holds if this basic block post-dominates `bb`.
*
* This predicate is reflexive: each reachable basic block post-dominates itself.
*/
pragma[inline]
predicate postDominates(ReachableBasicBlock bb) { this = immediatePostDominator*(bb) }
}
/**
* A reachable basic block with more than one predecessor.
*/
class ReachableJoinBlock extends ReachableBasicBlock {
ReachableJoinBlock() { this.getFirstNode().isJoin() }
/**
* Holds if this basic block belongs to the dominance frontier of `b`, that is
* `b` dominates a predecessor of this block, but not this block itself.
*
* Algorithm from Cooper et al., "A Simple, Fast Dominance Algorithm" (Figure 5),
* who in turn attribute it to Ferrante et al., "The program dependence graph and
* its use in optimization".
*/
predicate inDominanceFrontierOf(ReachableBasicBlock b) {
b = this.getAPredecessor() and not b = this.getImmediateDominator()
or
exists(ReachableBasicBlock prev | this.inDominanceFrontierOf(prev) |
b = prev.getImmediateDominator() and
not b = this.getImmediateDominator()
)
}
}
}

View File

@@ -25,6 +25,7 @@ private import StmtContainers
private import semmle.javascript.dataflow.internal.PreCallGraphStep
private import semmle.javascript.dataflow.internal.FlowSteps
private import semmle.javascript.dataflow.internal.AccessPaths
private import semmle.javascript.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
/**
* Contains a `cached module` for each stage.
@@ -106,6 +107,30 @@ module Stages {
}
}
/**
* The part of data flow computed before flow summary nodes.
*/
cached
module EarlyDataFlowStage {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the early DataFlow stage.
*/
cached
predicate ref() { 1 = 1 }
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backref() {
1 = 1
or
DataFlow::localFlowStep(_, _)
}
}
/**
* The `dataflow` stage.
*/
@@ -128,8 +153,6 @@ module Stages {
or
exists(AmdModule a)
or
DataFlow::localFlowStep(_, _)
or
exists(any(DataFlow::SourceNode s).getAPropertyReference("foo"))
or
exists(any(Expr e).getExceptionTarget())
@@ -322,19 +345,7 @@ module Stages {
or
any(RegExpTerm t).isUsedAsRegExp()
or
any(TaintTracking::AdditionalSanitizerGuardNode e).appliesTo(_)
}
cached
class DummySanitizer extends TaintTracking::AdditionalSanitizerGuardNode {
cached
DummySanitizer() { none() }
cached
override predicate appliesTo(TaintTracking::Configuration cfg) { none() }
cached
override predicate sanitizes(boolean outcome, Expr e) { none() }
TaintTrackingPrivate::defaultTaintSanitizer(_)
}
}
}

View File

@@ -0,0 +1,13 @@
private import AmbiguousCoreMethods
private import Arrays
private import AsyncAwait
private import ExceptionFlow
private import ForOfLoops
private import Generators
private import Iterators
private import JsonStringify
private import Maps
private import Promises
private import Sets
private import Strings
private import DynamicImportStep

View File

@@ -0,0 +1,151 @@
/**
* Contains flow summaries for methods with a name that can found on more than one of the core types: Array, String, Map, Set, Promise.
*
* This is an overview of the ambiguous methods and the classes that contain them (not all of these require a flow summary):
* ```
* at: String, Array
* concat: String, Array
* includes: String, Array
* indexOf: String, Array
* lastIndexOf: String, Array
* slice: String, Array
* entries: Array, Map, Set
* forEach: Array, Map, Set
* keys: Array, Map, Set
* values: Array, Map, Set
* clear: Map, Set
* delete: Map, Set
* has: Map, Set
* ```
*
* (Promise is absent in the table above as there currently are no name clashes with Promise methods)
*/
private import javascript
private import semmle.javascript.dataflow.internal.DataFlowNode
private import semmle.javascript.dataflow.FlowSummary
private import FlowSummaryUtil
class At extends SummarizedCallable {
At() { this = "Array#at / String#at" }
override InstanceCall getACallSimple() { result.getMethodName() = "at" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[this].ArrayElement" and
output = "ReturnValue"
//
// There is no flow for String#at since we currently consider single-character extraction to be too restrictive
}
}
class Concat extends SummarizedCallable {
Concat() { this = "Array#concat / String#concat" }
override InstanceCall getACallSimple() { result.getMethodName() = "concat" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[this,0..].ArrayElement" and
output = "ReturnValue.ArrayElement"
or
preservesValue = false and
input = "Argument[this,0..]" and
output = "ReturnValue"
}
}
class Slice extends SummarizedCallable {
Slice() { this = "Array#slice / String#slice" }
override InstanceCall getACallSimple() { result.getMethodName() = "slice" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[this].ArrayElement" and
output = "ReturnValue.ArrayElement"
or
preservesValue = false and
input = "Argument[this]" and
output = "ReturnValue"
}
}
class Entries extends SummarizedCallable {
Entries() { this = "Array#entries / Map#entries / Set#entries" }
override InstanceCall getACall() {
result.getMethodName() = "entries" and
result.getNumArgument() = 0
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this]." + ["MapKey", "SetElement"] and
output = "ReturnValue.IteratorElement.Member[0]"
or
input = "Argument[this]." + ["ArrayElement", "SetElement", "MapValue"] and
output = "ReturnValue.IteratorElement.Member[1]"
)
}
}
class ForEach extends SummarizedCallable {
ForEach() { this = "Array#forEach / Map#forEach / Set#forEach" }
override InstanceCall getACallSimple() { result.getMethodName() = "forEach" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
/*
* array.forEach(callbackfn, thisArg)
* callbackfn(value, index, array)
*/
(
input = "Argument[this]." + ["ArrayElement", "SetElement", "MapValue"] and
output = "Argument[0].Parameter[0]"
or
input = "Argument[this]." + ["MapKey", "SetElement"] and
output = "Argument[0].Parameter[1]"
or
input = "Argument[this]" and
output = "Argument[0].Parameter[2]" // object being iterated over
or
input = "Argument[1]" and // thisArg
output = "Argument[0].Parameter[this]"
)
}
}
class Keys extends SummarizedCallable {
Keys() { this = "Array#keys / Map#keys / Set#keys" }
override InstanceCall getACallSimple() {
result.getMethodName() = "keys" and
result.getNumArgument() = 0
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[this]." + ["MapKey", "SetElement"] and
output = "ReturnValue.IteratorElement"
}
}
class Values extends SummarizedCallable {
Values() { this = "Array#values / Map#values / Set#values" }
override InstanceCall getACallSimple() {
result.getMethodName() = "values" and
result.getNumArgument() = 0
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[this]." + ["ArrayElement", "SetElement", "MapValue"] and
output = "ReturnValue.IteratorElement"
}
}

View File

@@ -0,0 +1,593 @@
/**
* Contains a summary for relevant methods on arrays.
*
* Note that some of Array methods are modelled in `AmbiguousCoreMethods.qll`, and `toString` is special-cased elsewhere.
*/
private import javascript
private import semmle.javascript.dataflow.FlowSummary
private import semmle.javascript.dataflow.InferredTypes
private import semmle.javascript.dataflow.internal.DataFlowPrivate as Private
private import FlowSummaryUtil
pragma[nomagic]
DataFlow::SourceNode arrayConstructorRef() { result = DataFlow::globalVarRef("Array") }
pragma[nomagic]
private int firstSpreadIndex(ArrayExpr expr) {
result = min(int i | expr.getElement(i) instanceof SpreadElement)
}
/**
* Store and read steps for an array literal. Since literals are not seen as calls, this is not a flow summary.
*
* In case of spread elements `[x, ...y]`, we generate a read from `y -> ...y` and then a store from `...y` into
* the array literal (to ensure constant-indices get broken up).
*/
class ArrayLiteralStep extends DataFlow::AdditionalFlowStep {
override predicate storeStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
exists(ArrayExpr array, int i |
pred = array.getElement(i).flow() and
succ = array.flow()
|
if i >= firstSpreadIndex(array)
then contents = DataFlow::ContentSet::arrayElement() // after a spread operator, store into unknown indices
else contents = DataFlow::ContentSet::arrayElementFromInt(i)
)
}
override predicate readStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
exists(SpreadElement spread |
spread = any(ArrayExpr array).getAnElement() and
pred = spread.getOperand().flow() and
succ = spread.flow() and
contents = DataFlow::ContentSet::arrayElement()
)
}
}
pragma[nomagic]
private predicate isForLoopVariable(Variable v) {
v.getADeclarationStatement() = any(ForStmt stmt).getInit()
or
// Handle the somewhat rare case: `for (v; ...; ++v) { ... }`
v.getADeclaration() = any(ForStmt stmt).getInit()
}
private predicate isLikelyArrayIndex(Expr e) {
// Require that 'e' is of type number and refers to a for-loop variable.
// TODO: This is here to mirror the old behaviour. Experiment with turning the 'and' into an 'or'.
TTNumber() = unique(InferredType type | type = e.flow().analyze().getAType()) and
isForLoopVariable(e.(VarAccess).getVariable())
or
e.(PropAccess).getPropertyName() = "length"
}
/**
* A dynamic property store `obj[e] = rhs` seen as a potential array access.
*
* We need to restrict to cases where `e` is likely to be an array index, as
* propagating data between arbitrary unknown property accesses is too imprecise.
*/
class DynamicArrayStoreStep extends DataFlow::AdditionalFlowStep {
override predicate storeStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
exists(Assignment assignment, IndexExpr lvalue |
lvalue = assignment.getLhs() and
not exists(lvalue.getPropertyName()) and
isLikelyArrayIndex(lvalue.getPropertyNameExpr()) and
contents = DataFlow::ContentSet::arrayElement() and
succ.(DataFlow::ExprPostUpdateNode).getPreUpdateNode() = lvalue.getBase().flow()
|
pred = assignment.(Assignment).getRhs().flow()
or
// for compound assignments, use the result of the operator
pred = assignment.(CompoundAssignExpr).flow()
)
}
}
class ArrayConstructorSummary extends SummarizedCallable {
ArrayConstructorSummary() { this = "Array constructor" }
override DataFlow::InvokeNode getACallSimple() {
result = arrayConstructorRef().getAnInvocation()
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0..]" and
output = "ReturnValue.ArrayElement"
or
preservesValue = false and
input = "Argument[0..]" and
output = "ReturnValue"
}
}
/**
* A call to `join` with a separator argument.
*
* Calls without separators are modelled in `StringConcatenation.qll`.
*/
class Join extends SummarizedCallable {
Join() { this = "Array#join" }
override InstanceCall getACallSimple() {
result.getMethodName() = "join" and
result.getNumArgument() = [0, 1]
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = false and
input = "Argument[this].ArrayElement" and
output = "ReturnValue"
}
}
class CopyWithin extends SummarizedCallable {
CopyWithin() { this = "Array#copyWithin" }
override InstanceCall getACallSimple() { result.getMethodName() = "copyWithin" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[this].WithArrayElement" and
output = "ReturnValue"
or
// Explicitly add a taint step since WithArrayElement is not implicitly converted to a taint step
preservesValue = false and
input = "Argument[this]" and
output = "ReturnValue"
}
}
class FlowIntoCallback extends SummarizedCallable {
FlowIntoCallback() { this = "Array method with flow into callback" }
override InstanceCall getACallSimple() {
result.getMethodName() = ["every", "findIndex", "findLastIndex", "some"]
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this].ArrayElement" and
output = "Argument[0].Parameter[0]"
or
input = "Argument[1]" and
output = "Argument[0].Parameter[this]"
)
}
}
class Filter extends SummarizedCallable {
Filter() { this = "Array#filter" }
override InstanceCall getACallSimple() { result.getMethodName() = "filter" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this].ArrayElement" and
output = "Argument[0].Parameter[0]"
or
input = "Argument[1]" and
output = "Argument[0].Parameter[this]"
or
// Note: in case the filter condition acts as a barrier/sanitizer,
// it is up to the query to mark the 'filter' call as a barrier/sanitizer
input = "Argument[this].WithArrayElement" and
output = "ReturnValue"
)
or
// Explicitly add a taint step since WithArrayElement is not implicitly converted to a taint step
preservesValue = false and
input = "Argument[this]" and
output = "ReturnValue"
}
}
class Fill extends SummarizedCallable {
Fill() { this = "Array#fill" } // TODO: clear contents if no interval is given
override InstanceCall getACallSimple() { result.getMethodName() = "fill" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0..]" and
output = ["ReturnValue.ArrayElement", "Argument[this].ArrayElement"]
}
}
class FindLike extends SummarizedCallable {
FindLike() { this = "Array#find / Array#findLast" }
override InstanceCall getACallSimple() { result.getMethodName() = ["find", "findLast"] }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this].ArrayElement" and
output = ["Argument[0].Parameter[0]", "ReturnValue"]
or
input = "Argument[1]" and
output = "Argument[0].Parameter[this]"
)
}
}
class FindLibrary extends SummarizedCallable {
FindLibrary() { this = "'array.prototype.find' / 'array-find'" }
override DataFlow::CallNode getACallSimple() {
result = DataFlow::moduleImport(["array.prototype.find", "array-find"]).getACall()
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0].ArrayElement" and
output = ["Argument[1].Parameter[0]", "ReturnValue"]
or
input = "Argument[2]" and
output = "Argument[1].Parameter[this]"
)
}
}
class Flat extends SummarizedCallable {
private int depth;
Flat() { this = "Array#flat(" + depth + ")" and depth in [1 .. 3] }
override InstanceCall getACallSimple() {
result.getMethodName() = "flat" and
(
result.getNumArgument() = 1 and
result.getArgument(0).getIntValue() = depth
or
depth = 1 and
result.getNumArgument() = 0
)
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this]" + concat(int n | n in [0 .. depth] | ".ArrayElement")
or
exists(int partialDepth | partialDepth in [1 .. depth - 1] |
input =
"Argument[this]" + concat(int n | n in [0 .. partialDepth] | ".ArrayElement") +
".WithoutArrayElement"
)
) and
output = "ReturnValue.ArrayElement"
}
}
class FlatMap extends SummarizedCallable {
FlatMap() { this = "Array#flatMap" }
override InstanceCall getACallSimple() { result.getMethodName() = "flatMap" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this].ArrayElement" and
output = "Argument[0].Parameter[0]"
or
input = "Argument[this]" and
output = "Argument[0].Parameter[2]"
or
input = "Argument[1]" and
output = "Argument[0].Parameter[1]"
or
input = "Argument[0].ReturnValue." + ["ArrayElement", "WithoutArrayElement"] and
output = "ReturnValue.ArrayElement"
)
}
}
private DataFlow::CallNode arrayFromCall() {
// TODO: update fromAsync model when async iterators are supported
result = arrayConstructorRef().getAMemberCall(["from", "fromAsync"])
or
result = DataFlow::moduleImport("array-from").getACall()
}
class From1Arg extends SummarizedCallable {
From1Arg() { this = "Array.from(arg)" }
override DataFlow::CallNode getACallSimple() {
result = arrayFromCall() and result.getNumArgument() = 1
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0].WithArrayElement" and
output = "ReturnValue"
or
input = "Argument[0]." + ["SetElement", "IteratorElement"] and
output = "ReturnValue.ArrayElement"
or
input = "Argument[0].MapKey" and
output = "ReturnValue.ArrayElement.Member[0]"
or
input = "Argument[0].MapValue" and
output = "ReturnValue.ArrayElement.Member[1]"
or
input = "Argument[0].IteratorError" and
output = "ReturnValue[exception]"
)
or
// Explicitly add a taint step since WithArrayElement is not implicitly converted to a taint step
preservesValue = false and
input = "Argument[0]" and
output = "ReturnValue"
}
}
class FromManyArg extends SummarizedCallable {
FromManyArg() { this = "Array.from(arg, callback, [thisArg])" }
override DataFlow::CallNode getACallSimple() {
result = arrayFromCall() and
result.getNumArgument() > 1
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0]." + ["ArrayElement", "SetElement", "IteratorElement"] and
output = "Argument[1].Parameter[0]"
or
input = "Argument[0].MapKey" and
output = "Argument[1].Parameter[0].Member[0]"
or
input = "Argument[0].MapValue" and
output = "Argument[1].Parameter[0].Member[1]"
or
input = "Argument[1].ReturnValue" and
output = "ReturnValue.ArrayElement"
or
input = "Argument[2]" and
output = "Argument[1].Parameter[this]"
or
input = "Argument[0].IteratorError" and
output = "ReturnValue[exception]"
)
}
}
class Map extends SummarizedCallable {
Map() { this = "Array#map" }
override InstanceCall getACallSimple() {
// Note that this summary may spuriously apply to library methods named `map` such as from lodash/underscore.
// However, this will not cause spurious flow, because for such functions, the first argument will be an array, not a callback,
// and every part of the summary below uses Argument[0] in a way that requires it to be a callback.
result.getMethodName() = "map"
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this].ArrayElement" and
output = "Argument[0].Parameter[0]"
or
input = "Argument[this]" and
output = "Argument[0].Parameter[2]"
or
input = "Argument[1]" and
output = "Argument[0].Parameter[this]"
or
input = "Argument[0].ReturnValue" and
output = "ReturnValue.ArrayElement"
)
}
}
class Of extends SummarizedCallable {
Of() { this = "Array.of" }
override DataFlow::CallNode getACallSimple() {
result = arrayConstructorRef().getAMemberCall("of")
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0..]" and
output = "ReturnValue.ArrayElement"
}
}
class Pop extends SummarizedCallable {
Pop() { this = "Array#pop" }
override InstanceCall getACallSimple() { result.getMethodName() = "pop" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[this].ArrayElement" and
output = "ReturnValue"
}
}
class PushLike extends SummarizedCallable {
PushLike() { this = "Array#push / Array#unshift" }
override InstanceCall getACallSimple() { result.getMethodName() = ["push", "unshift"] }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0..]" and
output = "Argument[this].ArrayElement"
}
}
class ReduceLike extends SummarizedCallable {
ReduceLike() { this = "Array#reduce / Array#reduceRight" }
override InstanceCall getACallSimple() { result.getMethodName() = ["reduce", "reduceRight"] }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
/*
* Signatures:
* reduce(callbackFn, [initialValue])
* callbackfn(accumulator, currentValue, index, array)
*/
(
input = ["Argument[1]", "Argument[0].ReturnValue"] and
output = "Argument[0].Parameter[0]" // accumulator
or
input = "Argument[this].ArrayElement" and
output = "Argument[0].Parameter[1]" // currentValue
or
input = "Argument[this]" and
output = "Argument[0].Parameter[3]" // array
or
input = "Argument[0].ReturnValue" and
output = "ReturnValue"
)
}
}
class Reverse extends SummarizedCallable {
Reverse() { this = "Array#reverse / Array#toReversed" }
override InstanceCall getACallSimple() { result.getMethodName() = ["reverse", "toReversed"] }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[this].ArrayElement" and
output = "ReturnValue.ArrayElement"
}
}
class Shift extends SummarizedCallable {
Shift() { this = "Array#shift" }
override InstanceCall getACallSimple() { result.getMethodName() = "shift" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[this].ArrayElement[0]" and
output = "ReturnValue"
or
// ArrayElement[0] in the above summary is not automatically converted to a taint step, so manully add
// one from the array to the return value.
preservesValue = false and
input = "Argument[this]" and
output = "ReturnValue"
}
}
class Sort extends SummarizedCallable {
Sort() { this = "Array#sort / Array#toSorted" }
override InstanceCall getACallSimple() { result.getMethodName() = ["sort", "toSorted"] }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this].ArrayElement" and
output = "ReturnValue.ArrayElement"
or
input = "Argument[this].ArrayElement" and
output = "Argument[0].Parameter[0,1]"
)
}
}
class Splice extends SummarizedCallable {
Splice() { this = "Array#splice" }
override InstanceCall getACallSimple() { result.getMethodName() = "splice" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this].ArrayElement" and
output = "ReturnValue.ArrayElement"
or
input = "Argument[2..]" and
output = ["Argument[this].ArrayElement", "ReturnValue.ArrayElement"]
)
}
}
class ToSpliced extends SummarizedCallable {
ToSpliced() { this = "Array#toSpliced" }
override InstanceCall getACallSimple() { result.getMethodName() = "toSpliced" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this].ArrayElement" and
output = "ReturnValue.ArrayElement"
or
input = "Argument[2..]" and
output = "ReturnValue.ArrayElement"
)
}
}
class ArrayCoercionPackage extends FunctionalPackageSummary {
ArrayCoercionPackage() { this = "ArrayCoercionPackage" }
override string getAPackageName() { result = ["arrify", "array-ify"] }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0].WithArrayElement" and
output = "ReturnValue"
or
input = "Argument[0].WithoutArrayElement" and
output = "ReturnValue.ArrayElement"
)
or
// Explicitly add a taint step since WithArrayElement is not implicitly converted to a taint step
preservesValue = false and
input = "Argument[0]" and
output = "ReturnValue"
}
}
class ArrayCopyingPackage extends FunctionalPackageSummary {
ArrayCopyingPackage() { this = "ArrayCopyingPackage" }
override string getAPackageName() { result = ["array-union", "array-uniq", "uniq"] }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0..].ArrayElement" and
output = "ReturnValue.ArrayElement"
}
}
class ArrayFlatteningPackage extends FunctionalPackageSummary {
ArrayFlatteningPackage() { this = "ArrayFlatteningPackage" }
override string getAPackageName() {
result = ["array-flatten", "arr-flatten", "flatten", "array.prototype.flat"]
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// TODO: properly support these. For the moment we're just adding parity with the old model
preservesValue = false and
input = "Argument[0..]" and
output = "ReturnValue"
}
}

View File

@@ -0,0 +1,104 @@
/**
* Contains flow steps to model flow through `async` functions and the `await` operator.
*/
private import javascript
private import semmle.javascript.dataflow.internal.DataFlowNode
private import semmle.javascript.dataflow.internal.AdditionalFlowInternal
private import semmle.javascript.dataflow.internal.DataFlowPrivate
/**
* Steps modelling flow in an `async` function.
*
* Note about promise-coercion and flattening:
* - `await` preserves non-promise values, e.g. `await "foo"` is just `"foo"`.
* - `return` preserves existing promise values, and boxes other values in a promise.
*
* We rely on `expectsContent` and `clearsContent` to handle coercion/flattening without risk of creating a nested promise object.
*
* The following is a brief overview of the steps we generate:
* ```js
* async function foo() {
* await x; // x --- READ[promise-value] ---> await x
* await x; // x --- VALUE -----------------> await x (has clearsContent)
* await x; // x --- READ[promise-error] ---> exception target
*
* return x; // x --- VALUE --> return node (has expectsContent)
* return x; // x --- VALUE --> synthetic node (clearsContent) --- STORE[promise-value] --> return node
*
* // exceptional return node --> STORE[promise-error] --> return node
* }
* ```
*/
class AsyncAwait extends AdditionalFlowInternal {
override predicate needsSynthesizedNode(AstNode node, string tag, DataFlowCallable container) {
// We synthesize a clearsContent node to contain the values that need to be boxed in a promise before returning
node.(Function).isAsync() and
container.asSourceCallable() = node and
tag = "async-raw-return"
}
override predicate clearsContent(DataFlow::Node node, DataFlow::ContentSet contents) {
node = getSynthesizedNode(_, "async-raw-return") and
contents = DataFlow::ContentSet::promiseFilter()
or
// The result of 'await' cannot be a promise. This is needed for the local flow step into 'await'
node.asExpr() instanceof AwaitExpr and
contents = DataFlow::ContentSet::promiseFilter()
}
override predicate expectsContent(DataFlow::Node node, DataFlow::ContentSet contents) {
// The final return value must be a promise. This is needed for the local flow step into the return node.
exists(Function f |
f.isAsync() and
node = TFunctionReturnNode(f) and
contents = DataFlow::ContentSet::promiseFilter()
)
}
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(AwaitExpr await |
// Allow non-promise values to propagate through await.
pred = await.getOperand().flow() and
succ = await.flow() // clears promise-content
)
or
exists(Function f |
// To avoid creating a nested promise, flow to two different nodes which only permit promises/non-promises respectively
f.isAsync() and
pred = f.getAReturnedExpr().flow()
|
succ = getSynthesizedNode(f, "async-raw-return") // clears promise-content
or
succ = TFunctionReturnNode(f) // expects promise-content
)
}
override predicate readStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
exists(AwaitExpr await | pred = await.getOperand().flow() |
contents = DataFlow::ContentSet::promiseValue() and
succ = await.flow()
or
contents = DataFlow::ContentSet::promiseError() and
succ = await.getExceptionTarget()
)
}
override predicate storeStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
exists(Function f | f.isAsync() |
// Box returned non-promise values in a promise
pred = getSynthesizedNode(f, "async-raw-return") and
contents = DataFlow::ContentSet::promiseValue() and
succ = TFunctionReturnNode(f)
or
// Store thrown exceptions in promise-error
pred = TExceptionalFunctionReturnNode(f) and
contents = DataFlow::ContentSet::promiseError() and
succ = TFunctionReturnNode(f)
)
}
}

View File

@@ -0,0 +1,39 @@
/**
* Contains flow steps to model flow from a module into a dynamic `import()` expression.
*/
private import javascript
private import semmle.javascript.dataflow.internal.DataFlowNode
private import semmle.javascript.dataflow.internal.AdditionalFlowInternal
private import semmle.javascript.dataflow.internal.DataFlowPrivate
/**
* Flow steps for dynamic import expressions.
*
* The default export of the imported module must be boxed in a promise, so we pass
* it through a synthetic node.
*/
class DynamicImportStep extends AdditionalFlowInternal {
override predicate needsSynthesizedNode(AstNode node, string tag, DataFlowCallable container) {
node instanceof DynamicImportExpr and
tag = "imported-value" and
container.asSourceCallable() = node.getContainer()
}
override predicate jumpStep(DataFlow::Node pred, DataFlow::Node succ) {
exists(DynamicImportExpr expr |
pred = expr.getImportedModule().getAnExportedValue("default") and
succ = getSynthesizedNode(expr, "imported-value")
)
}
override predicate storeStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
exists(DynamicImportExpr expr |
pred = getSynthesizedNode(expr, "imported-value") and
contents = DataFlow::ContentSet::promiseValue() and
succ = TValueNode(expr)
)
}
}

View File

@@ -0,0 +1,48 @@
/**
* Contains a summary for propagating exceptions out of callbacks
*/
private import javascript
private import FlowSummaryUtil
private import semmle.javascript.dataflow.internal.AdditionalFlowInternal
private import semmle.javascript.dataflow.internal.DataFlowPrivate
private import semmle.javascript.dataflow.FlowSummary
private import semmle.javascript.internal.flow_summaries.Promises
private predicate isCallback(DataFlow::SourceNode node) {
node instanceof DataFlow::FunctionNode
or
node instanceof DataFlow::PartialInvokeNode
or
exists(DataFlow::SourceNode prev |
isCallback(prev) and
DataFlow::argumentPassingStep(_, prev.getALocalUse(), _, node)
)
}
/**
* Summary that propagates exceptions out of callbacks back to the caller.
*
* This summary only applies to calls that have no other call targets.
* See also `FlowSummaryDefaultExceptionalReturn`, which handles calls that have a summary target,
* but where the summary does not mention `ReturnValue[exception]`.
*/
private class ExceptionFlowSummary extends SummarizedCallable, LibraryCallableInternal {
ExceptionFlowSummary() { this = "Exception propagator" }
override DataFlow::CallNode getACallStage2() {
not exists(result.getACallee()) and
not exists(SummarizedCallable c | result = [c.getACall(), c.getACallSimple()]) and
// Avoid a few common cases where the exception should not propagate back
not result.getCalleeName() = ["addEventListener", EventEmitter::on()] and
not result = promiseConstructorRef().getAnInvocation() and
// Restrict to cases where a callback is known to flow in, as lambda flow in DataFlowImplCommon blows up otherwise
isCallback(result.getAnArgument().getALocalSource())
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0..].ReturnValue[exception]" and
output = "ReturnValue[exception]"
}
}

View File

@@ -0,0 +1,51 @@
private import javascript
private import semmle.javascript.dataflow.FlowSummary
private import semmle.javascript.dataflow.internal.Contents::Private
/**
* A method call or a reflective invocation (`call` or `apply`) that takes a receiver.
*
* Note that `DataFlow::MethodCallNode` does not include reflective invocation.
*/
class InstanceCall extends DataFlow::CallNode {
InstanceCall() { exists(this.getReceiver()) }
/** Gets the name of method being invoked */
string getMethodName() { result = this.getCalleeName() }
}
/**
* A summary a function that is the default export from an NPM package.
*/
abstract class FunctionalPackageSummary extends SummarizedCallable {
bindingset[this]
FunctionalPackageSummary() { any() }
/** Gets a name of a package for which this summary applies. */
abstract string getAPackageName();
override DataFlow::InvokeNode getACallSimple() {
result = DataFlow::moduleImport(this.getAPackageName()).getAnInvocation()
}
override DataFlow::InvokeNode getACall() {
result = API::moduleImport(this.getAPackageName()).getAnInvocation()
}
}
/**
* Gets a content from a set of contents that together represent all valid array indices.
*
* This can be used to generate flow summaries that should preserve precise array indices,
* in cases where `WithArrayElement` is not sufficient.
*/
string getAnArrayContent() {
// Values stored at a known, small index
result = "ArrayElement[" + getAPreciseArrayIndex() + "!]"
or
// Values stored at a known, but large index
result = "ArrayElement[" + (getMaxPreciseArrayIndex() + 1) + "..]"
or
// Values stored at an unknown index
result = "ArrayElement[?]"
}

View File

@@ -0,0 +1,59 @@
/**
* Contains flow steps to model flow through `for..of` loops.
*/
private import javascript
private import semmle.javascript.dataflow.internal.DataFlowNode
private import semmle.javascript.dataflow.internal.AdditionalFlowInternal
private import semmle.javascript.dataflow.internal.DataFlowPrivate
class ForOfLoopStep extends AdditionalFlowInternal {
override predicate needsSynthesizedNode(AstNode node, string tag, DataFlowCallable container) {
// Intermediate nodes to convert (MapKey, MapValue) to a `[key, value]` array.
//
// For the loop `for (let lvalue of domain)` we generate the following steps:
//
// domain --- READ[MapKey] ---> synthetic node 1 --- STORE[0] ---> lvalue
// domain --- READ[MapValue] ---> synthetic node 2 --- STORE[1] ---> lvalue
//
node instanceof ForOfStmt and
tag = ["for-of-map-key", "for-of-map-value"] and
container.asSourceCallable() = node.getContainer()
}
override predicate readStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
exists(ForOfStmt stmt | pred = stmt.getIterationDomain().flow() |
contents =
[
DataFlow::ContentSet::arrayElement(), DataFlow::ContentSet::setElement(),
DataFlow::ContentSet::iteratorElement()
] and
succ = DataFlow::lvalueNode(stmt.getLValue())
or
contents = DataFlow::ContentSet::mapKey() and
succ = getSynthesizedNode(stmt, "for-of-map-key")
or
contents = DataFlow::ContentSet::mapValueAll() and
succ = getSynthesizedNode(stmt, "for-of-map-value")
or
contents = DataFlow::ContentSet::iteratorError() and
succ = stmt.getIterationDomain().getExceptionTarget()
)
}
override predicate storeStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
exists(ForOfStmt stmt |
pred = getSynthesizedNode(stmt, "for-of-map-key") and
contents.asSingleton().asArrayIndex() = 0
or
pred = getSynthesizedNode(stmt, "for-of-map-value") and
contents.asSingleton().asArrayIndex() = 1
|
succ = DataFlow::lvalueNode(stmt.getLValue())
)
}
}

View File

@@ -0,0 +1,59 @@
/**
* Contains flow steps to model flow through generator functions.
*/
private import javascript
private import semmle.javascript.dataflow.internal.DataFlowNode
private import semmle.javascript.dataflow.internal.AdditionalFlowInternal
/**
* Steps modelling flow out of a generator function:
* ```js
* function* foo() {
* yield x; // store 'x' in the return value's IteratorElement
* yield* y; // flow directly to return value, which has expectsContent, so only iterator contents can pass through.
* throw z; // store 'z' in the return value's IteratorError
* }
* ```
*/
class GeneratorFunctionStep extends AdditionalFlowInternal {
override predicate expectsContent(DataFlow::Node node, DataFlow::ContentSet contents) {
// Ensure that the return value can only return iterator contents. This is needed for 'yield*'.
exists(Function fun |
fun.isGenerator() and
node = TFunctionReturnNode(fun) and
contents = DataFlow::ContentSet::iteratorFilter()
)
}
override predicate storeStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
// `yield x`. Store into the return value's iterator element.
exists(Function fun, YieldExpr yield | fun.isGenerator() |
not yield.isDelegating() and
yield.getContainer() = fun and
pred = yield.getOperand().flow() and
contents = DataFlow::ContentSet::iteratorElement() and
succ = TFunctionReturnNode(fun)
)
or
exists(Function f | f.isGenerator() |
// Store thrown exceptions in the iterator-error
pred = TExceptionalFunctionReturnNode(f) and
succ = TFunctionReturnNode(f) and
contents = DataFlow::ContentSet::iteratorError()
)
}
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
// `yield* x`. Flow into the return value, which has expectsContent, so only iterator contents can pass through.
exists(Function fun, YieldExpr yield |
fun.isGenerator() and
yield.getContainer() = fun and
yield.isDelegating() and
pred = yield.getOperand().flow() and
succ = TFunctionReturnNode(fun)
)
}
}

View File

@@ -0,0 +1,29 @@
/**
* Contains flow summaries and steps modelling flow through iterators.
*/
private import javascript
private import semmle.javascript.dataflow.internal.DataFlowNode
private import semmle.javascript.dataflow.FlowSummary
private import semmle.javascript.dataflow.internal.AdditionalFlowInternal
private import FlowSummaryUtil
class IteratorNext extends SummarizedCallable {
IteratorNext() { this = "Iterator#next" }
override DataFlow::MethodCallNode getACallSimple() {
result.getMethodName() = "next" and
result.getNumArgument() = 0
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[this].IteratorElement" and
output = "ReturnValue.Member[value]"
or
input = "Argument[this].IteratorError" and
output = "ReturnValue[exception]"
)
}
}

View File

@@ -0,0 +1,20 @@
/**
* Contains implicit read steps at the input to any function that converts a deep object to a string, such as `JSON.stringify`.
*/
private import javascript
private import FlowSummaryUtil
private import semmle.javascript.dataflow.internal.AdditionalFlowInternal
private import semmle.javascript.dataflow.FlowSummary
private class JsonStringifySummary extends SummarizedCallable {
JsonStringifySummary() { this = "JSON.stringify" }
override DataFlow::InvokeNode getACall() { result instanceof JsonStringifyCall }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = false and
input = ["Argument[0]", "Argument[0].AnyMemberDeep"] and
output = "ReturnValue"
}
}

View File

@@ -0,0 +1,140 @@
/**
* Contains flow summaries and steps modelling flow through `Map` objects.
*/
private import javascript
private import semmle.javascript.dataflow.FlowSummary
private import FlowSummaryUtil
private DataFlow::SourceNode mapConstructorRef() { result = DataFlow::globalVarRef("Map") }
class MapConstructor extends SummarizedCallable {
MapConstructor() { this = "Map constructor" }
override DataFlow::InvokeNode getACallSimple() {
result = mapConstructorRef().getAnInstantiation()
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0]." + ["ArrayElement", "SetElement", "IteratorElement"] + ".Member[0]" and
output = "ReturnValue.MapKey"
or
input = "Argument[0]." + ["ArrayElement", "SetElement", "IteratorElement"] + ".Member[1]" and
output = "ReturnValue.MapValue"
or
input = ["Argument[0].WithMapKey", "Argument[0].WithMapValue"] and
output = "ReturnValue"
)
}
}
/**
* A read step for `Map#get`.
*
* This is implemented as a step instead of a flow summary, as we currently do not expose a MaD syntax
* for map values with a known key.
*/
class MapGetStep extends DataFlow::AdditionalFlowStep {
override predicate readStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = "get" and
call.getNumArgument() = 1 and
pred = call.getReceiver() and
succ = call
|
contents = DataFlow::ContentSet::mapValueFromKey(call.getArgument(0).getStringValue())
or
not exists(call.getArgument(0).getStringValue()) and
contents = DataFlow::ContentSet::mapValueAll()
)
}
}
/**
* A read step for `Map#set`.
*
* This is implemented as a step instead of a flow summary, as we currently do not expose a MaD syntax
* for map values with a known key.
*/
class MapSetStep extends DataFlow::AdditionalFlowStep {
override predicate storeStep(
DataFlow::Node pred, DataFlow::ContentSet contents, DataFlow::Node succ
) {
exists(DataFlow::MethodCallNode call |
call.getMethodName() = "set" and
call.getNumArgument() = 2 and
pred = call.getArgument(1) and
succ.(DataFlow::ExprPostUpdateNode).getPreUpdateNode() = call.getReceiver()
|
contents = DataFlow::ContentSet::mapValueFromKey(call.getArgument(0).getStringValue())
or
not exists(call.getArgument(0).getStringValue()) and
contents = DataFlow::ContentSet::mapValueWithUnknownKey()
)
}
}
class MapGet extends SummarizedCallable {
MapGet() { this = "Map#get" }
override DataFlow::MethodCallNode getACallSimple() {
none() and // TODO: Disabled for now - need MaD syntax for known map values
result.getMethodName() = "get" and
result.getNumArgument() = 1
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[this].MapValue" and
output = "ReturnValue"
}
}
class MapSet extends SummarizedCallable {
MapSet() { this = "Map#set" }
override DataFlow::MethodCallNode getACallSimple() {
result.getMethodName() = "set" and
result.getNumArgument() = 2
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = ["Argument[this].WithMapKey", "Argument[this].WithMapValue"] and
output = "ReturnValue"
or
preservesValue = true and
none() and // TODO: Disabled for now - need MaD syntax for known map values
(
input = "Argument[0]" and
output = "Argument[this].MapKey"
or
input = "Argument[1]" and
output = "Argument[this].MapValue"
)
}
}
class MapGroupBy extends SummarizedCallable {
MapGroupBy() { this = "Map#groupBy" }
override DataFlow::CallNode getACallSimple() {
result = mapConstructorRef().getAMemberCall("groupBy") and
result.getNumArgument() = 2
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0].ArrayElement" and
output = ["Argument[1].Parameter[0]", "ReturnValue.MapValue.ArrayElement"]
or
input = "Argument[1].ReturnValue" and
output = "ReturnValue.MapKey"
)
}
}

View File

@@ -0,0 +1,324 @@
/**
* Contains flow summaries and steps modelling flow through `Promise` objects.
*/
private import javascript
private import semmle.javascript.dataflow.FlowSummary
private import FlowSummaryUtil
DataFlow::SourceNode promiseConstructorRef() {
result = Promises::promiseConstructorRef()
or
result = DataFlow::moduleImport("bluebird")
or
result = DataFlow::moduleMember(["q", "kew", "bluebird"], "Promise") // note: bluebird.Promise == bluebird
or
result = Closure::moduleImport("goog.Promise")
}
//
// Note that the 'Awaited' token has a special interpretation.
// See a write-up here: https://github.com/github/codeql-javascript-team/issues/423
//
private class PromiseConstructor extends SummarizedCallable {
PromiseConstructor() { this = "new Promise()" }
override DataFlow::InvokeNode getACallSimple() {
// Disabled for now. The field-flow branch limit will be negatively affected by having
// calls to multiple variants of `new Promise()`.
none()
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
// TODO: when FlowSummaryImpl.qll supports these summaries, remove the workaround in PromiseConstructorWorkaround
// resolve(value)
input = "Argument[0].Parameter[0].Argument[0]" and output = "ReturnValue.Awaited"
or
// reject(value)
input = "Argument[0].Parameter[1].Argument[0]" and output = "ReturnValue.Awaited[error]"
or
// throw from executor
input = "Argument[0].ReturnValue[exception]" and output = "ReturnValue.Awaited[error]"
)
}
}
/**
* A workaround to the `PromiseConstructor`, to be used until FlowSummaryImpl.qll has sufficient support
* for callbacks.
*/
module PromiseConstructorWorkaround {
class ResolveSummary extends SummarizedCallable {
ResolveSummary() { this = "new Promise() resolve callback" }
override DataFlow::InvokeNode getACallSimple() {
result =
promiseConstructorRef().getAnInstantiation().getCallback(0).getParameter(0).getACall()
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0]" and
output = "Argument[function].Member[resolve-value]"
}
}
class RejectCallback extends SummarizedCallable {
RejectCallback() { this = "new Promise() reject callback" }
override DataFlow::InvokeNode getACallSimple() {
result =
promiseConstructorRef().getAnInstantiation().getCallback(0).getParameter(1).getACall()
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0]" and
output = "Argument[function].Member[reject-value]"
}
}
class ConstructorSummary extends SummarizedCallable {
ConstructorSummary() { this = "new Promise() workaround" }
override DataFlow::InvokeNode getACallSimple() {
result = promiseConstructorRef().getAnInstantiation()
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0].Parameter[0].Member[resolve-value]" and
output = "ReturnValue.Awaited"
or
input = "Argument[0].Parameter[1].Member[reject-value]" and
output = "ReturnValue.Awaited[error]"
or
input = "Argument[0].ReturnValue[exception]" and
output = "ReturnValue.Awaited[error]"
)
}
}
}
private class PromiseThen2Arguments extends SummarizedCallable {
PromiseThen2Arguments() { this = "Promise#then() with 2 arguments" }
override InstanceCall getACallSimple() {
result.getMethodName() = "then" and
result.getNumArgument() = 2
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0,1].ReturnValue" and output = "ReturnValue.Awaited"
or
input = "Argument[0,1].ReturnValue[exception]" and output = "ReturnValue.Awaited[error]"
or
input = "Argument[this].Awaited[value]" and output = "Argument[0].Parameter[0]"
or
input = "Argument[this].Awaited[error]" and output = "Argument[1].Parameter[0]"
)
}
}
private class PromiseThen1Argument extends SummarizedCallable {
PromiseThen1Argument() { this = "Promise#then() with 1 argument" }
override InstanceCall getACallSimple() {
result.getMethodName() = "then" and
result.getNumArgument() = 1
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0].ReturnValue" and output = "ReturnValue.Awaited"
or
input = "Argument[0].ReturnValue[exception]" and output = "ReturnValue.Awaited[error]"
or
input = "Argument[this].Awaited[value]" and output = "Argument[0].Parameter[0]"
or
input = "Argument[this].WithAwaited[error]" and output = "ReturnValue"
)
}
}
private class PromiseCatch extends SummarizedCallable {
PromiseCatch() { this = "Promise#catch()" }
override InstanceCall getACallSimple() { result.getMethodName() = "catch" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0].ReturnValue" and output = "ReturnValue.Awaited"
or
input = "Argument[0].ReturnValue[exception]" and output = "ReturnValue.Awaited[error]"
or
input = "Argument[this].Awaited[value]" and output = "ReturnValue.Awaited[value]"
or
input = "Argument[this].Awaited[error]" and output = "Argument[0].Parameter[0]"
)
}
}
private class PromiseFinally extends SummarizedCallable {
PromiseFinally() { this = "Promise#finally()" }
override InstanceCall getACallSimple() { result.getMethodName() = "finally" }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0].ReturnValue.Awaited[error]" and output = "ReturnValue.Awaited[error]"
or
input = "Argument[0].ReturnValue[exception]" and output = "ReturnValue.Awaited[error]"
or
input = "Argument[this].WithAwaited[value,error]" and output = "ReturnValue"
)
}
}
private class PromiseResolve extends SummarizedCallable {
PromiseResolve() { this = "Promise.resolve()" }
override InstanceCall getACallSimple() {
result = promiseConstructorRef().getAMemberCall("resolve")
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0]" and
output = "ReturnValue.Awaited"
}
}
private class PromiseReject extends SummarizedCallable {
PromiseReject() { this = "Promise.reject()" }
override InstanceCall getACallSimple() {
result = promiseConstructorRef().getAMemberCall("reject")
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0]" and
output = "ReturnValue.Awaited[error]"
}
}
private class PromiseAll extends SummarizedCallable {
PromiseAll() { this = "Promise.all()" }
override DataFlow::InvokeNode getACallSimple() {
result = promiseConstructorRef().getAMemberCall("all")
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
exists(string content | content = getAnArrayContent() |
input = "Argument[0]." + content + ".Awaited" and
output = "ReturnValue.Awaited[value]." + content
)
or
preservesValue = true and
input = "Argument[0].ArrayElement.WithAwaited[error]" and
output = "ReturnValue"
or
preservesValue = false and
input = "Argument[0]" and
output = "ReturnValue"
}
}
private class PromiseAnyLike extends SummarizedCallable {
PromiseAnyLike() { this = "Promise.any() or Promise.race()" }
override DataFlow::InvokeNode getACallSimple() {
result = promiseConstructorRef().getAMemberCall(["any", "race", "firstFulfilled"])
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0].ArrayElement" and
output = "ReturnValue.Awaited"
}
}
private class PromiseAllSettled extends SummarizedCallable {
PromiseAllSettled() { this = "Promise.allSettled()" }
override DataFlow::InvokeNode getACallSimple() {
result = promiseConstructorRef().getAMemberCall("allSettled")
or
result = DataFlow::moduleImport("promise.allsettled").getACall()
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
exists(string content | content = getAnArrayContent() |
input = "Argument[0]." + content + ".Awaited" and
output = "ReturnValue.Awaited[value]." + content + ".Member[value]"
or
input = "Argument[0]." + content + ".Awaited[error]" and
output = "ReturnValue.Awaited[value]." + content + ".Member[reason]"
)
}
}
private class BluebirdMapSeries extends SummarizedCallable {
BluebirdMapSeries() { this = "bluebird.mapSeries" }
override DataFlow::InvokeNode getACallSimple() {
result = promiseConstructorRef().getAMemberCall("mapSeries")
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0].Awaited.ArrayElement.Awaited" and
output = "Argument[1].Parameter[0]"
or
input = "Argument[0].Awaited.ArrayElement.WithAwaited[error]" and
output = "ReturnValue"
or
input = "Argument[0].WithAwaited[error]" and
output = "ReturnValue"
or
input = "Argument[1].ReturnValue.Awaited" and
output = "ReturnValue.Awaited.ArrayElement"
or
input = "Argument[1].ReturnValue.WithAwaited[error]" and
output = "ReturnValue"
)
}
}
/**
* - `Promise.withResolvers`, a method pending standardization,
* - `goog.Closure.withResolver()` (non-plural spelling)
* - `bluebird.Promise.defer()`
*/
private class PromiseWithResolversLike extends SummarizedCallable {
PromiseWithResolversLike() { this = "Promise.withResolvers()" }
override DataFlow::InvokeNode getACallSimple() {
result = promiseConstructorRef().getAMemberCall(["withResolver", "withResolvers", "defer"])
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
// TODO: not currently supported by FlowSummaryImpl.qll
input = "ReturnValue.Member[resolve].Argument[0]" and
output = "ReturnValue.Member[promise].Awaited"
or
input = "ReturnValue.Member[reject].Argument[0]" and
output = "ReturnValue.Member[promise].Awaited[error]"
)
}
}

View File

@@ -0,0 +1,46 @@
/**
* Contains flow summaries and steps modelling flow through `Set` objects.
*/
private import javascript
private import semmle.javascript.dataflow.FlowSummary
private import FlowSummaryUtil
private DataFlow::SourceNode setConstructorRef() { result = DataFlow::globalVarRef("Set") }
class SetConstructor extends SummarizedCallable {
SetConstructor() { this = "Set constructor" }
override DataFlow::InvokeNode getACallSimple() {
result = setConstructorRef().getAnInstantiation()
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
(
input = "Argument[0]." + ["ArrayElement", "SetElement", "IteratorElement"] and
output = "ReturnValue.SetElement"
or
input = "Argument[0].MapKey" and
output = "ReturnValue.SetElement.Member[0]"
or
input = "Argument[0].MapValue" and
output = "ReturnValue.SetElement.Member[1]"
)
}
}
class SetAdd extends SummarizedCallable {
SetAdd() { this = "Set#add" }
override DataFlow::MethodCallNode getACallSimple() {
result.getMethodName() = "add" and
result.getNumArgument() = 1
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = true and
input = "Argument[0]" and
output = "Argument[this].SetElement"
}
}

View File

@@ -0,0 +1,101 @@
/**
* Contains flow summaries and steps modelling flow through string methods.
*/
private import javascript
private import semmle.javascript.dataflow.FlowSummary
/**
* Summary for calls to `.replace` or `.replaceAll` (without a regexp pattern containing a wildcard).
*/
private class StringReplaceNoWildcard extends SummarizedCallable {
StringReplaceNoWildcard() {
this = "String#replace / String#replaceAll (without wildcard pattern)"
}
override StringReplaceCall getACall() { not result.hasRegExpContainingWildcard() }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = false and
(
input = "Argument[this]" and
output = "ReturnValue"
or
input = "Argument[1].ReturnValue" and
output = "ReturnValue"
)
}
}
/**
* Summary for calls to `.replace` or `.replaceAll` (with a regexp pattern containing a wildcard).
*
* In this case, the receiver is considered to flow into the callback.
*/
private class StringReplaceWithWildcard extends SummarizedCallable {
StringReplaceWithWildcard() {
this = "String#replace / String#replaceAll (with wildcard pattern)"
}
override StringReplaceCall getACall() { result.hasRegExpContainingWildcard() }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = false and
(
input = "Argument[this]" and
output = ["ReturnValue", "Argument[1].Parameter[0]"]
or
input = "Argument[1].ReturnValue" and
output = "ReturnValue"
)
}
}
class StringSplit extends SummarizedCallable {
StringSplit() { this = "String#split" }
override DataFlow::MethodCallNode getACallSimple() {
result.getMethodName() = "split" and
result.getNumArgument() = [1, 2] and
not result.getArgument(0).getStringValue() = ["#", "?"]
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = false and
input = "Argument[this]" and
output = "ReturnValue.ArrayElement"
}
}
/**
* A call of form `x.split("#")` or `x.split("?")`.
*
* These are of special significance when tracking a tainted URL suffix, such as `window.location.href`,
* because the first element of the resulting array should not be considered tainted.
*
* This summary defaults to the same behaviour as the general `.split()` case, but it contains optional steps
* and barriers named `tainted-url-suffix` that should be activated when tracking a tainted URL suffix.
*/
class StringSplitHashOrQuestionMark extends SummarizedCallable {
StringSplitHashOrQuestionMark() { this = "String#split with '#' or '?'" }
override DataFlow::MethodCallNode getACallSimple() {
result.getMethodName() = "split" and
result.getNumArgument() = [1, 2] and
result.getArgument(0).getStringValue() = ["#", "?"]
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
preservesValue = false and
(
input = "Argument[this].OptionalBarrier[split-url-suffix]" and
output = "ReturnValue.ArrayElement"
or
input = "Argument[this].OptionalStep[split-url-suffix-pre]" and
output = "ReturnValue.ArrayElement[0]"
or
input = "Argument[this].OptionalStep[split-url-suffix-post]" and
output = "ReturnValue.ArrayElement[1]" // TODO: support ArrayElement[1..]
)
}
}

View File

@@ -0,0 +1,90 @@
/**
* Contains a class with flow states that are used by multiple queries.
*/
private import javascript
private import TaintedUrlSuffixCustomizations
private import TaintedObjectCustomizations
private newtype TFlowState =
TTaint() or
TTaintedUrlSuffix() or
TTaintedPrefix() or
TTaintedObject()
/**
* A flow state indicating which part of a value is tainted.
*/
class FlowState extends TFlowState {
/**
* Holds if this represents a value that is considered entirely tainted, except the first character
* might not be user-controlled.
*/
predicate isTaint() { this = TTaint() }
/**
* Holds if this represents a URL whose fragment and/or query parts are considered tainted.
*/
predicate isTaintedUrlSuffix() { this = TTaintedUrlSuffix() }
/**
* Holds if this represents a string whose prefix is known to be tainted.
*/
predicate isTaintedPrefix() { this = TTaintedPrefix() }
/**
* Holds if this represents a deeply tainted object, such as a JSON object
* parsed from user-controlled data.
*/
predicate isTaintedObject() { this = TTaintedObject() }
/** Gets a string representation of this flow state. */
string toString() {
this.isTaint() and result = "taint"
or
this.isTaintedUrlSuffix() and result = "tainted-url-suffix"
or
this.isTaintedPrefix() and result = "tainted-prefix"
or
this.isTaintedObject() and result = "tainted-object"
}
/** DEPRECATED. Gets the corresponding flow label. */
deprecated DataFlow::FlowLabel toFlowLabel() {
this.isTaint() and result.isTaint()
or
this.isTaintedUrlSuffix() and result = TaintedUrlSuffix::label()
or
this.isTaintedPrefix() and result = "PrefixString"
or
this.isTaintedObject() and result = TaintedObject::label()
}
}
/** Convenience predicates for working with common flow states. */
module FlowState {
/**
* Gets the flow state representing a value that is considered entirely tainted, except the first character
* might not be user-controlled.
*/
FlowState taint() { result.isTaint() }
/**
* Gets the flow state representing a URL whose fragment and/or query parts are considered tainted.
*/
FlowState taintedUrlSuffix() { result.isTaintedUrlSuffix() }
/**
* Gets the flow state representing a string whose prefix is known to be tainted.
*/
FlowState taintedPrefix() { result.isTaintedPrefix() }
/**
* Gets the flow state representing a deeply tainted object, such as a JSON object
* parsed from user-controlled data.
*/
FlowState taintedObject() { result.isTaintedObject() }
/** DEPRECATED. Gets the flow state corresponding to `label`. */
deprecated FlowState fromFlowLabel(DataFlow::FlowLabel label) { result.toFlowLabel() = label }
}

View File

@@ -7,10 +7,10 @@
*
* To track deeply tainted objects, a flow-tracking configuration should generally include the following:
*
* 1. One or more sinks associated with the label `TaintedObject::label()`.
* 2. The sources from `TaintedObject::isSource`.
* 3. The flow steps from `TaintedObject::step`.
* 4. The sanitizing guards `TaintedObject::SanitizerGuard`.
* 1. One or more sinks associated with the flow state `FlowState::taintedObject()`.
* 2. The sources from `TaintedObject::Source`.
* 3. The flow steps from `TaintedObject::isAdditionalFlowStep`.
* 4. The barriers from `TaintedObject::SanitizerGuard::getABarrierNode(state)`.
*/
import javascript
@@ -22,56 +22,67 @@ module TaintedObject {
import TaintedObjectCustomizations::TaintedObject
// Materialize flow labels
private class ConcreteTaintedObjectLabel extends TaintedObjectLabel {
deprecated private class ConcreteTaintedObjectLabel extends TaintedObjectLabel {
ConcreteTaintedObjectLabel() { this = this }
}
/**
* DEPRECATED. Use `isAdditionalFlowStep(node1, state1, node2, state2)` instead.
*/
deprecated predicate step(Node src, Node trg, FlowLabel inlbl, FlowLabel outlbl) {
isAdditionalFlowStep(src, FlowState::fromFlowLabel(inlbl), trg, FlowState::fromFlowLabel(outlbl))
}
/**
* Holds for the flows steps that are relevant for tracking user-controlled JSON objects.
*/
predicate step(Node src, Node trg, FlowLabel inlbl, FlowLabel outlbl) {
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
// JSON parsers map tainted inputs to tainted JSON
inlbl.isDataOrTaint() and
outlbl = label() and
state1.isTaint() and
state2.isTaintedObject() and
exists(JsonParserCall parse |
src = parse.getInput() and
trg = parse.getOutput()
node1 = parse.getInput() and
node2 = parse.getOutput()
)
or
// Property reads preserve deep object taint.
inlbl = label() and
outlbl = label() and
trg.(PropRead).getBase() = src
state1.isTaintedObject() and
state2.isTaintedObject() and
node2.(PropRead).getBase() = node1
or
// Property projection preserves deep object taint
inlbl = label() and
outlbl = label() and
trg.(PropertyProjection).getObject() = src
state1.isTaintedObject() and
state2.isTaintedObject() and
node2.(PropertyProjection).getObject() = node1
or
// Extending objects preserves deep object taint
inlbl = label() and
outlbl = label() and
state1.isTaintedObject() and
state2.isTaintedObject() and
exists(ExtendCall call |
src = call.getAnOperand() and
trg = call
node1 = call.getAnOperand() and
node2 = call
or
src = call.getASourceOperand() and
trg = call.getDestinationOperand().getALocalSource()
node1 = call.getASourceOperand() and
node2 = call.getDestinationOperand().getALocalSource()
)
or
// Spreading into an object preserves deep object taint: `p -> { ...p }`
inlbl = label() and
outlbl = label() and
state1.isTaintedObject() and
state2.isTaintedObject() and
exists(ObjectLiteralNode obj |
src = obj.getASpreadProperty() and
trg = obj
node1 = obj.getASpreadProperty() and
node2 = obj
)
}
/**
* DEPRECATED. Use the `Source` class and `FlowState#isTaintedObject()` directly.
*
* Holds if `node` is a source of JSON taint and label is the JSON taint label.
*/
predicate isSource(Node source, FlowLabel label) { source instanceof Source and label = label() }
deprecated predicate isSource(Node source, FlowLabel label) {
source instanceof Source and label = label()
}
/** Request input accesses as a JSON source. */
private class RequestInputAsSource extends Source {
@@ -81,7 +92,37 @@ module TaintedObject {
/**
* A sanitizer guard that blocks deep object taint.
*/
abstract class SanitizerGuard extends TaintTracking::LabeledSanitizerGuardNode { }
abstract class SanitizerGuard extends DataFlow::Node {
/** Holds if this node blocks flow through `e`, provided it evaluates to `outcome`. */
predicate blocksExpr(boolean outcome, Expr e) { none() }
/** Holds if this node blocks flow of `state` through `e`, provided it evaluates to `outcome`. */
predicate blocksExpr(boolean outcome, Expr e, FlowState state) { none() }
/** DEPRECATED. Use `blocksExpr` instead. */
deprecated predicate sanitizes(boolean outcome, Expr e, FlowLabel label) {
this.blocksExpr(outcome, e, FlowState::fromFlowLabel(label))
}
/** DEPRECATED. Use `blocksExpr` instead. */
deprecated predicate sanitizes(boolean outcome, Expr e) { this.blocksExpr(outcome, e) }
}
deprecated private class SanitizerGuardLegacy extends TaintTracking::LabeledSanitizerGuardNode instanceof SanitizerGuard
{
deprecated override predicate sanitizes(boolean outcome, Expr e, FlowLabel label) {
SanitizerGuard.super.sanitizes(outcome, e, label)
}
deprecated override predicate sanitizes(boolean outcome, Expr e) {
SanitizerGuard.super.sanitizes(outcome, e)
}
}
/**
* A sanitizer guard that blocks deep object taint.
*/
module SanitizerGuard = DataFlow::MakeStateBarrierGuard<FlowState, SanitizerGuard>;
/**
* A test of form `typeof x === "something"`, preventing `x` from being an object in some cases.
@@ -103,10 +144,10 @@ module TaintedObject {
)
}
override predicate sanitizes(boolean outcome, Expr e, FlowLabel label) {
override predicate blocksExpr(boolean outcome, Expr e, FlowState state) {
polarity = outcome and
e = operand and
label = label()
state.isTaintedObject()
}
}
@@ -117,7 +158,7 @@ module TaintedObject {
NumberGuard() { TaintTracking::isNumberGuard(this, x, polarity) }
override predicate sanitizes(boolean outcome, Expr e) { e = x and outcome = polarity }
override predicate blocksExpr(boolean outcome, Expr e) { e = x and outcome = polarity }
}
/** A guard that checks whether an input a valid string identifier using `mongoose.Types.ObjectId.isValid` */
@@ -131,8 +172,8 @@ module TaintedObject {
.getACall()
}
override predicate sanitizes(boolean outcome, Expr e, FlowLabel lbl) {
e = super.getAnArgument().asExpr() and outcome = true and lbl = label()
override predicate blocksExpr(boolean outcome, Expr e, FlowState state) {
e = super.getAnArgument().asExpr() and outcome = true and state.isTaintedObject()
}
}
@@ -145,10 +186,10 @@ module TaintedObject {
JsonSchemaValidationGuard() { this = call.getAValidationResultAccess(polarity) }
override predicate sanitizes(boolean outcome, Expr e, FlowLabel label) {
override predicate blocksExpr(boolean outcome, Expr e, FlowState state) {
outcome = polarity and
e = call.getInput().asExpr() and
label = label()
state.isTaintedObject()
}
}
}

View File

@@ -7,8 +7,10 @@ import javascript
/** Provides classes and predicates for reasoning about deeply tainted objects. */
module TaintedObject {
import CommonFlowState
/** A flow label representing a deeply tainted object. */
abstract class TaintedObjectLabel extends DataFlow::FlowLabel {
abstract deprecated class TaintedObjectLabel extends DataFlow::FlowLabel {
TaintedObjectLabel() { this = "tainted-object" }
}
@@ -19,7 +21,7 @@ module TaintedObject {
*
* Note that the presence of the this label generally implies the presence of the `taint` label as well.
*/
DataFlow::FlowLabel label() { result instanceof TaintedObjectLabel }
deprecated DataFlow::FlowLabel label() { result instanceof TaintedObjectLabel }
/**
* A source of a user-controlled deep object.

View File

@@ -10,105 +10,9 @@ import javascript
* which we collectively refer to as the "suffix" of the URL.
*/
module TaintedUrlSuffix {
private import DataFlow
import TaintedUrlSuffixCustomizations::TaintedUrlSuffix
/**
* The flow label representing a URL with a tainted query and fragment part.
*
* Can also be accessed using `TaintedUrlSuffix::label()`.
*/
class TaintedUrlSuffixLabel extends FlowLabel {
TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
}
/**
* Gets the flow label representing a URL with a tainted query and fragment part.
*/
FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
/** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
ClientSideRemoteFlowSource source() {
result = DOM::locationRef().getAPropertyRead(["search", "hash"])
or
result = DOM::locationSource()
or
result.getKind().isUrl()
}
/** Holds for `pred -> succ` is a step of form `x -> x.p` */
private predicate isSafeLocationProp(DataFlow::PropRead read) {
// Ignore properties that refer to the scheme, domain, port, auth, or path.
read.getPropertyName() =
[
"protocol", "scheme", "host", "hostname", "domain", "origin", "port", "path", "pathname",
"username", "password", "auth"
]
}
/**
* Holds if there is a flow step `src -> dst` involving the URL suffix taint label.
*
* This handles steps through string operations, promises, URL parsers, and URL accessors.
*/
predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
// Inherit all ordinary taint steps except `x -> x.p` steps
srclbl = label() and
dstlbl = label() and
TaintTracking::sharedTaintStep(src, dst) and
not isSafeLocationProp(dst)
or
// Transition from URL suffix to full taint when extracting the query/fragment part.
srclbl = label() and
dstlbl.isTaint() and
(
exists(MethodCallNode call, string name |
src = call.getReceiver() and
dst = call and
name = call.getMethodName()
|
// Substring that is not a prefix
name = StringOps::substringMethodName() and
not call.getArgument(0).getIntValue() = 0
or
// Split around '#' or '?' and extract the suffix
name = "split" and
call.getArgument(0).getStringValue() = ["#", "?"] and
not exists(call.getAPropertyRead("0")) // Avoid false flow to the prefix
or
// Replace '#' and '?' with nothing
name = "replace" and
call.getArgument(0).getStringValue() = ["#", "?"] and
call.getArgument(1).getStringValue() = ""
or
// The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
// The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
name = "get"
or
// Methods on URL objects from the Closure library
name = "getDecodedQuery"
or
name = "getFragment"
or
name = "getParameterValue"
or
name = "getParameterValues"
or
name = "getQueryData"
)
or
exists(PropRead read |
src = read.getBase() and
dst = read and
// Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
read.getPropertyName() = "query"
)
or
// Assume calls to regexp.exec always extract query/fragment parameters.
exists(MethodCallNode call |
call = any(RegExpLiteral re).flow().(DataFlow::SourceNode).getAMethodCall("exec") and
src = call.getArgument(0) and
dst = call
)
)
deprecated private class ConcreteTaintedUrlSuffixLabel extends TaintedUrlSuffixLabel {
ConcreteTaintedUrlSuffixLabel() { this = this }
}
}

View File

@@ -0,0 +1,171 @@
/**
* Provides a flow state for reasoning about URLs with a tainted query and fragment part,
* which we collectively refer to as the "suffix" of the URL.
*/
import javascript
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
/**
* Provides a flow state for reasoning about URLs with a tainted query and fragment part,
* which we collectively refer to as the "suffix" of the URL.
*/
module TaintedUrlSuffix {
private import DataFlow
import CommonFlowState
/**
* The flow label representing a URL with a tainted query and fragment part.
*
* Can also be accessed using `TaintedUrlSuffix::label()`.
*/
abstract deprecated class TaintedUrlSuffixLabel extends FlowLabel {
TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
}
/**
* Gets the flow label representing a URL with a tainted query and fragment part.
*/
deprecated FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
/** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
ClientSideRemoteFlowSource source() {
result = DOM::locationRef().getAPropertyRead(["search", "hash"])
or
result = DOM::locationSource()
or
result.getKind().isUrl()
}
/**
* DEPRECATED. Use `isStateBarrier(node, state)` instead.
*
* Holds if `node` should be a barrier for the given `label`.
*
* This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
* label.
*/
deprecated predicate isBarrier(Node node, FlowLabel label) {
isStateBarrier(node, FlowState::fromFlowLabel(label))
}
/**
* Holds if `node` should be blocked in `state`.
*/
predicate isStateBarrier(Node node, FlowState state) {
DataFlowPrivate::optionalBarrier(node, "split-url-suffix") and
state.isTaintedUrlSuffix()
}
/**
* DEPRECATED. Use `isAdditionalFlowStep` instead.
*/
deprecated predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
isAdditionalFlowStep(src, FlowState::fromFlowLabel(srclbl), dst,
FlowState::fromFlowLabel(dstlbl))
}
/**
* Holds if there is a flow step `node1 -> node2` involving the URL suffix flow state.
*
* This handles steps through string operations, promises, URL parsers, and URL accessors.
*/
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
// Transition from tainted-url-suffix to general taint when entering the second array element
// of a split('#') or split('?') array.
//
// x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
//
// Technically we should also preverse tainted-url-suffix when entering the first array element of such
// a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
// (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
state1.isTaintedUrlSuffix() and
state2.isTaint() and
DataFlowPrivate::optionalStep(node1, "split-url-suffix-post", node2)
or
// Transition from URL suffix to full taint when extracting the query/fragment part.
state1.isTaintedUrlSuffix() and
state2.isTaint() and
(
exists(MethodCallNode call, string name |
node1 = call.getReceiver() and
node2 = call and
name = call.getMethodName()
|
// Substring that is not a prefix
name = StringOps::substringMethodName() and
not call.getArgument(0).getIntValue() = 0
or
// Replace '#' and '?' with nothing
name = "replace" and
call.getArgument(0).getStringValue() = ["#", "?"] and
call.getArgument(1).getStringValue() = ""
or
// The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
// The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
name = "get"
or
// Methods on URL objects from the Closure library
name = "getDecodedQuery"
or
name = "getFragment"
or
name = "getParameterValue"
or
name = "getParameterValues"
or
name = "getQueryData"
)
or
exists(PropRead read |
node1 = read.getBase() and
node2 = read and
// Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
read.getPropertyName() = "query"
)
or
exists(MethodCallNode call, DataFlow::RegExpCreationNode re |
(
call = re.getAMethodCall("exec") and
node1 = call.getArgument(0) and
node2 = call
or
call.getMethodName() = ["match", "matchAll"] and
re.flowsTo(call.getArgument(0)) and
node1 = call.getReceiver() and
node2 = call
)
|
captureAfterSuffixIndicator(re.getRoot().getAChild*())
or
// If the regexp is unknown, assume it will extract the URL suffix
not exists(re.getRoot())
)
)
}
/** Holds if the `n`th child of `seq` contains a character indicating that everything thereafter is part of the suffix */
private predicate containsSuffixIndicator(RegExpSequence seq, int n) {
// Also include '=' as it usually only appears in the URL suffix
seq.getChild(n).getAChild*().(RegExpConstant).getValue().regexpMatch(".*[?#=].*")
}
/** Holds if the `n`th child of `seq` contains a capture group. */
private predicate containsCaptureGroup(RegExpSequence seq, int n) {
seq.getChild(n).getAChild*().(RegExpGroup).isCapture()
}
/**
* Holds if `seq` contains a capture group that will likely match path of the URL suffix,
* thereby extracting tainted data.
*
* For example, `/#(.*)/.exec(url)` will extract the tainted URL suffix from `url`.
*/
private predicate captureAfterSuffixIndicator(RegExpSequence seq) {
exists(int suffix, int capture |
containsSuffixIndicator(seq, suffix) and
containsCaptureGroup(seq, capture) and
suffix < capture
)
}
}

View File

@@ -19,7 +19,23 @@ import BrokenCryptoAlgorithmCustomizations::BrokenCryptoAlgorithm
* added either by extending the relevant class, or by subclassing this configuration itself,
* and amending the sources and sinks.
*/
class Configuration extends TaintTracking::Configuration {
module BrokenCryptoAlgorithmConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/**
* Taint tracking flow for sensitive information in broken or weak cryptographic algorithms.
*/
module BrokenCryptoAlgorithmFlow = TaintTracking::Global<BrokenCryptoAlgorithmConfig>;
/**
* DEPRECATED. Use the `BrokenCryptoAlgorithmFlow` module instead.
*/
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "BrokenCryptoAlgorithm" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }

View File

@@ -14,9 +14,11 @@ module BuildArtifactLeak {
*/
abstract class Sink extends DataFlow::Node {
/**
* DEPRECATED. This query no longer uses flow state.
*
* Gets a data-flow label that leaks information for this sink.
*/
DataFlow::FlowLabel getLabel() { result.isTaint() }
deprecated DataFlow::FlowLabel getLabel() { result.isTaint() }
}
/**

View File

@@ -14,7 +14,33 @@ import CleartextLoggingCustomizations::CleartextLogging as CleartextLogging
/**
* A taint tracking configuration for storage of sensitive information in build artifact.
*/
class Configuration extends TaintTracking::Configuration {
module BuildArtifactLeakConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof CleartextLogging::Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof CleartextLogging::Barrier }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
CleartextLogging::isAdditionalTaintStep(node1, node2)
}
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet contents) {
// All properties of a leaked object are themselves leaked.
contents = DataFlow::ContentSet::anyProperty() and
isSink(node)
}
}
/**
* Taint tracking flow for storage of sensitive information in build artifact.
*/
module BuildArtifactLeakFlow = TaintTracking::Global<BuildArtifactLeakConfig>;
/**
* DEPRECATED. Use the `BuildArtifactLeakFlow` module instead.
*/
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "BuildArtifactLeak" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowLabel lbl) {

View File

@@ -16,14 +16,20 @@ module CleartextLogging {
/** Gets a string that describes the type of this data flow source. */
abstract string describe();
abstract DataFlow::FlowLabel getLabel();
/**
* DEPRECATED. Overriding this predicate no longer has any effect.
*/
deprecated DataFlow::FlowLabel getLabel() { result.isTaint() }
}
/**
* A data flow sink for clear-text logging of sensitive information.
*/
abstract class Sink extends DataFlow::Node {
DataFlow::FlowLabel getLabel() { result.isTaint() }
/**
* DEPRECATED. Overriding this predicate no longer has any effect.
*/
deprecated DataFlow::FlowLabel getLabel() { result.isTaint() }
}
/**
@@ -106,29 +112,28 @@ module CleartextLogging {
abstract private class NonCleartextPassword extends DataFlow::Node { }
/**
* An object with a property that may contain password information
*
* This is a source since `console.log(obj)` will show the properties of `obj`.
* A value stored in a property that may contain password information
*/
private class ObjectPasswordPropertySource extends DataFlow::ValueNode, Source {
string name;
ObjectPasswordPropertySource() {
exists(DataFlow::PropWrite write |
write.getPropertyName() = name and
name.regexpMatch(maybePassword()) and
not name.regexpMatch(notSensitiveRegexp()) and
write = this.(DataFlow::SourceNode).getAPropertyWrite(name) and
this = write.getRhs() and
// avoid safe values assigned to presumably unsafe names
not write.getRhs() instanceof NonCleartextPassword
not this instanceof NonCleartextPassword
)
}
override string describe() { result = "an access to " + name }
override DataFlow::FlowLabel getLabel() { result.isTaint() }
}
/** An access to a variable or property that might contain a password. */
/**
* An access to a variable or property that might contain a password.
*/
private class ReadPasswordSource extends DataFlow::ValueNode, Source {
string name;
@@ -150,8 +155,6 @@ module CleartextLogging {
}
override string describe() { result = "an access to " + name }
override DataFlow::FlowLabel getLabel() { result.isTaint() }
}
/** A call that might return a password. */
@@ -164,8 +167,6 @@ module CleartextLogging {
}
override string describe() { result = "a call to " + name }
override DataFlow::FlowLabel getLabel() { result.isTaint() }
}
/** An access to the sensitive object `process.env`. */
@@ -173,8 +174,28 @@ module CleartextLogging {
ProcessEnvSource() { this = NodeJSLib::process().getAPropertyRead("env") }
override string describe() { result = "process environment" }
}
override DataFlow::FlowLabel getLabel() { result.isTaint() }
/** Gets a data flow node referring to `process.env`. */
private DataFlow::SourceNode processEnv(DataFlow::TypeTracker t) {
t.start() and
result instanceof ProcessEnvSource
or
exists(DataFlow::TypeTracker t2 | result = processEnv(t2).track(t2, t))
}
/** Gets a data flow node referring to `process.env`. */
DataFlow::SourceNode processEnv() { result = processEnv(DataFlow::TypeTracker::end()) }
/**
* A property access on `process.env`, seen as a barrier.
*/
private class SafeEnvironmentVariableBarrier extends Barrier instanceof DataFlow::PropRead {
SafeEnvironmentVariableBarrier() {
this = processEnv().getAPropertyRead() and
// If the name is known, it should not be sensitive
not nameIndicatesSensitiveData(this.getPropertyName(), _)
}
}
/**
@@ -186,26 +207,10 @@ module CleartextLogging {
succ.(DataFlow::PropRead).getBase() = pred
}
private class PropReadAsBarrier extends Barrier {
PropReadAsBarrier() {
this = any(DataFlow::PropRead read).getBase() and
// the 'foo' in 'foo.bar()' may have flow, we only want to suppress plain property reads
not this = any(DataFlow::MethodCallNode call).getReceiver() and
// do not block custom taint steps from this node
not isAdditionalTaintStep(this, _)
}
}
/**
* Holds if the edge `src` -> `trg` is an additional taint-step for clear-text logging of sensitive information.
*/
predicate isAdditionalTaintStep(DataFlow::Node src, DataFlow::Node trg) {
// A taint propagating data flow edge through objects: a tainted write taints the entire object.
exists(DataFlow::PropWrite write |
write.getRhs() = src and
trg.(DataFlow::SourceNode).flowsTo(write.getBase())
)
or
// A property-copy step,
// dst[x] = src[x]
// dst[x] = JSON.stringify(src[x])
@@ -221,7 +226,7 @@ module CleartextLogging {
not exists(read.getPropertyName()) and
not isFilteredPropertyName(read.getPropertyNameExpr().flow().getALocalSource()) and
src = read.getBase() and
trg = write.getBase().getALocalSource()
trg = write.getBase().getPostUpdateNode()
)
or
// Taint through the arguments object.

View File

@@ -20,7 +20,38 @@ private import CleartextLoggingCustomizations::CleartextLogging as CleartextLogg
* added either by extending the relevant class, or by subclassing this configuration itself,
* and amending the sources and sinks.
*/
class Configuration extends TaintTracking::Configuration {
module CleartextLoggingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Barrier }
predicate isBarrierIn(DataFlow::Node node) {
// We rely on heuristic sources, which tends to cause sources to overlap
isSource(node)
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
CleartextLogging::isAdditionalTaintStep(node1, node2)
}
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet contents) {
// Assume all properties of a logged object are themselves logged.
contents = DataFlow::ContentSet::anyProperty() and
isSink(node)
}
}
/**
* Taint tracking flow for clear-text logging of sensitive information.
*/
module CleartextLoggingFlow = TaintTracking::Global<CleartextLoggingConfig>;
/**
* DEPRECATED. Use the `CleartextLoggingFlow` module instead.
*/
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CleartextLogging" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowLabel lbl) {

View File

@@ -19,7 +19,20 @@ import CleartextStorageCustomizations::CleartextStorage
* added either by extending the relevant class, or by subclassing this configuration itself,
* and amending the sources and sinks.
*/
class Configuration extends TaintTracking::Configuration {
module ClearTextStorageConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
module ClearTextStorageFlow = TaintTracking::Global<ClearTextStorageConfig>;
/**
* DEPRECATED. Use the `ClearTextStorageFlow` module instead.
*/
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "ClearTextStorage" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }

View File

@@ -14,7 +14,34 @@ import RequestForgeryCustomizations::RequestForgery
/**
* A taint tracking configuration for client-side request forgery.
*/
class Configuration extends TaintTracking::Configuration {
module ClientSideRequestForgeryConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(Source src |
source = src and
not src.isServerSide()
)
}
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
predicate isBarrierOut(DataFlow::Node node) { sanitizingPrefixEdge(node, _) }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalRequestForgeryStep(node1, node2)
}
}
/**
* Taint tracking for client-side request forgery.
*/
module ClientSideRequestForgeryFlow = TaintTracking::Global<ClientSideRequestForgeryConfig>;
/**
* DEPRECATED. Use the `ClientSideRequestForgeryFlow` module instead.
*/
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "ClientSideRequestForgery" }
override predicate isSource(DataFlow::Node source) {

View File

@@ -5,14 +5,20 @@
*/
import javascript
private import semmle.javascript.security.TaintedUrlSuffixCustomizations
module ClientSideUrlRedirect {
import semmle.javascript.security.CommonFlowState
/**
* A data flow source for unvalidated URL redirect vulnerabilities.
*/
abstract class Source extends DataFlow::Node {
/** Gets a flow label to associate with this source. */
DataFlow::FlowLabel getAFlowLabel() { result.isTaint() }
/** Gets a flow state to associate with this source. */
FlowState getAFlowState() { result.isTaint() }
/** DEPRECATED. Use `getAFlowState()` instead. */
deprecated DataFlow::FlowLabel getAFlowLabel() { result = this.getAFlowState().toFlowLabel() }
}
/**
@@ -31,12 +37,12 @@ module ClientSideUrlRedirect {
abstract class Sanitizer extends DataFlow::Node { }
/**
* DEPRECATED. Replaced by functionality from the `TaintedUrlSuffix` library.
*
* A flow label for values that represent the URL of the current document, and
* hence are only partially user-controlled.
*/
abstract class DocumentUrl extends DataFlow::FlowLabel {
DocumentUrl() { this = "document.url" }
}
deprecated class DocumentUrl = TaintedUrlSuffix::TaintedUrlSuffixLabel;
/**
* DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
@@ -49,18 +55,26 @@ module ClientSideUrlRedirect {
private class ActiveThreatModelSourceAsSource extends Source instanceof ActiveThreatModelSource {
ActiveThreatModelSourceAsSource() { not this.(ClientSideRemoteFlowSource).getKind().isPath() }
override DataFlow::FlowLabel getAFlowLabel() {
if this.(ClientSideRemoteFlowSource).getKind().isUrl()
then result instanceof DocumentUrl
else result.isTaint()
override FlowState getAFlowState() {
if this = TaintedUrlSuffix::source() then result.isTaintedUrlSuffix() else result.isTaint()
}
}
/**
* Holds if `node` extracts a part of a URL that does not contain the suffix.
*/
pragma[inline]
deprecated predicate isPrefixExtraction(DataFlow::MethodCallNode node) {
// Block flow through prefix-extraction `substring(0, ...)` and `split("#")[0]`
node.getMethodName() = [StringOps::substringMethodName(), "split"] and
not untrustedUrlSubstring(_, node)
}
/**
* Holds if `substring` refers to a substring of `base` which is considered untrusted
* when `base` is the current URL.
*/
predicate untrustedUrlSubstring(DataFlow::Node base, DataFlow::Node substring) {
deprecated predicate untrustedUrlSubstring(DataFlow::Node base, DataFlow::Node substring) {
exists(DataFlow::MethodCallNode mcn, string methodName |
mcn = substring and mcn.calls(base, methodName)
|

View File

@@ -10,16 +10,61 @@
import javascript
import UrlConcatenation
import ClientSideUrlRedirectCustomizations::ClientSideUrlRedirect
import semmle.javascript.security.TaintedUrlSuffix
// Materialize flow labels
private class ConcreteDocumentUrl extends DocumentUrl {
deprecated private class ConcreteDocumentUrl extends DocumentUrl {
ConcreteDocumentUrl() { this = this }
}
/**
* A taint-tracking configuration for reasoning about unvalidated URL redirections.
*/
class Configuration extends TaintTracking::Configuration {
module ClientSideUrlRedirectConfig implements DataFlow::StateConfigSig {
import semmle.javascript.security.CommonFlowState
predicate isSource(DataFlow::Node source, FlowState state) {
source.(Source).getAFlowState() = state
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof Sink and state.isTaint()
}
predicate isBarrier(DataFlow::Node node) {
node instanceof Sanitizer or node = HostnameSanitizerGuard::getABarrierNode()
}
predicate isBarrier(DataFlow::Node node, FlowState state) {
TaintedUrlSuffix::isStateBarrier(node, state)
}
predicate isBarrierOut(DataFlow::Node node) { hostnameSanitizingPrefixEdge(node, _) }
predicate isBarrierOut(DataFlow::Node node, FlowState state) { isSink(node, state) }
predicate isAdditionalFlowStep(
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
) {
TaintedUrlSuffix::isAdditionalFlowStep(node1, state1, node2, state2)
or
exists(HtmlSanitizerCall call |
node1 = call.getInput() and
node2 = call and
state1 = state2
)
}
}
/**
* Taint-tracking flow for reasoning about unvalidated URL redirections.
*/
module ClientSideUrlRedirectFlow = TaintTracking::GlobalWithState<ClientSideUrlRedirectConfig>;
/**
* A taint-tracking configuration for reasoning about unvalidated URL redirections.
*/
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "ClientSideUrlRedirect" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowLabel lbl) {
@@ -36,21 +81,23 @@ class Configuration extends TaintTracking::Configuration {
override predicate isSanitizerOut(DataFlow::Node node) { hostnameSanitizingPrefixEdge(node, _) }
override predicate isAdditionalFlowStep(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel f, DataFlow::FlowLabel g
DataFlow::Node node1, DataFlow::Node node2, DataFlow::FlowLabel state1,
DataFlow::FlowLabel state2
) {
untrustedUrlSubstring(pred, succ) and
f instanceof DocumentUrl and
g.isTaint()
ClientSideUrlRedirectConfig::isAdditionalFlowStep(node1, FlowState::fromFlowLabel(state1),
node2, FlowState::fromFlowLabel(state2))
or
// preserve document.url label in step from `location` to `location.href`
f instanceof DocumentUrl and
g instanceof DocumentUrl and
succ.(DataFlow::PropRead).accesses(pred, "href")
or
exists(HtmlSanitizerCall call |
pred = call.getInput() and
succ = call and
f = g
// Preserve document.url label in step from `location` to `location.href` or `location.toString()`
state1 instanceof DocumentUrl and
state2 instanceof DocumentUrl and
(
node2.(DataFlow::PropRead).accesses(node1, "href")
or
exists(DataFlow::CallNode call |
call.getCalleeName() = "toString" and
node1 = call.getReceiver() and
node2 = call
)
)
}

View File

@@ -13,7 +13,28 @@ import CodeInjectionCustomizations::CodeInjection
/**
* A taint-tracking configuration for reasoning about code injection vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
module CodeInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
// HTML sanitizers are insufficient protection against code injection
node1 = node2.(HtmlSanitizerCall).getInput()
}
}
/**
* Taint-tracking for reasoning about code injection vulnerabilities.
*/
module CodeInjectionFlow = TaintTracking::Global<CodeInjectionConfig>;
/**
* DEPRRECATED. Use the `CodeInjectionFlow` module instead.
*/
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CodeInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -25,8 +46,7 @@ class Configuration extends TaintTracking::Configuration {
node instanceof Sanitizer
}
override predicate isAdditionalTaintStep(DataFlow::Node src, DataFlow::Node trg) {
// HTML sanitizers are insufficient protection against code injection
src = trg.(HtmlSanitizerCall).getInput()
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
CodeInjectionConfig::isAdditionalFlowStep(node1, node2)
}
}

View File

@@ -11,25 +11,41 @@ import javascript
import CommandInjectionCustomizations::CommandInjection
import IndirectCommandArgument
/**
* Holds if `sink` is a data flow sink for command-injection vulnerabilities, and
* the alert should be placed at the node `highlight`.
*/
predicate isSinkWithHighlight(DataFlow::Node sink, DataFlow::Node highlight) {
sink instanceof Sink and highlight = sink
or
isIndirectCommandArgument(sink, highlight)
}
/**
* A taint-tracking configuration for reasoning about command-injection vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
module CommandInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { isSinkWithHighlight(sink, _) }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/**
* Taint-tracking for reasoning about command-injection vulnerabilities.
*/
module CommandInjectionFlow = TaintTracking::Global<CommandInjectionConfig>;
/**
* DEPRECATED. Use the `CommandInjectionFlow` module instead.
*/
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CommandInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSource(DataFlow::Node source) { CommandInjectionConfig::isSource(source) }
/**
* Holds if `sink` is a data flow sink for command-injection vulnerabilities, and
* the alert should be placed at the node `highlight`.
*/
predicate isSinkWithHighlight(DataFlow::Node sink, DataFlow::Node highlight) {
sink instanceof Sink and highlight = sink
or
isIndirectCommandArgument(sink, highlight)
}
override predicate isSink(DataFlow::Node sink) { CommandInjectionConfig::isSink(sink) }
override predicate isSink(DataFlow::Node sink) { this.isSinkWithHighlight(sink, _) }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizer(DataFlow::Node node) { CommandInjectionConfig::isBarrier(node) }
}

View File

@@ -13,7 +13,28 @@ import ConditionalBypassCustomizations::ConditionalBypass
/**
* A taint tracking configuration for bypass of sensitive action guards.
*/
class Configuration extends TaintTracking::Configuration {
module ConditionalBypassConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
// comparing a tainted expression against a constant gives a tainted result
node2.asExpr().(Comparison).hasOperands(node1.asExpr(), any(ConstantExpr c))
}
}
/**
* Taint tracking flow for bypass of sensitive action guards.
*/
module ConditionalBypassFlow = TaintTracking::Global<ConditionalBypassConfig>;
/**
* DEPRECATED. Use the `ConditionalBypassFlow` module instead.
*/
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "ConditionalBypass" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -26,8 +47,7 @@ class Configuration extends TaintTracking::Configuration {
}
override predicate isAdditionalTaintStep(DataFlow::Node src, DataFlow::Node dst) {
// comparing a tainted expression against a constant gives a tainted result
dst.asExpr().(Comparison).hasOperands(src.asExpr(), any(ConstantExpr c))
ConditionalBypassConfig::isAdditionalFlowStep(src, dst)
}
}
@@ -72,7 +92,67 @@ class SensitiveActionGuardComparisonOperand extends Sink {
* If flow from `source` taints `sink`, then an attacker can
* control if `action` should be executed or not.
*/
predicate isTaintedGuardForSensitiveAction(
predicate isTaintedGuardNodeForSensitiveAction(
ConditionalBypassFlow::PathNode sink, ConditionalBypassFlow::PathNode source,
SensitiveAction action
) {
action = sink.getNode().(Sink).getAction() and
// exclude the intermediary sink
not sink.getNode() instanceof SensitiveActionGuardComparisonOperand and
(
// ordinary taint tracking to a guard
ConditionalBypassFlow::flowPath(source, sink)
or
// taint tracking to both operands of a guard comparison
exists(
SensitiveActionGuardComparison cmp, ConditionalBypassFlow::PathNode lSource,
ConditionalBypassFlow::PathNode rSource, ConditionalBypassFlow::PathNode lSink,
ConditionalBypassFlow::PathNode rSink
|
sink.getNode() = cmp.getGuard() and
ConditionalBypassFlow::flowPath(lSource, lSink) and
lSink.getNode() = DataFlow::valueNode(cmp.getLeftOperand()) and
ConditionalBypassFlow::flowPath(rSource, rSink) and
rSink.getNode() = DataFlow::valueNode(cmp.getRightOperand())
|
source = lSource or
source = rSource
)
)
}
/**
* Holds if `e` effectively guards access to `action` by returning or throwing early.
*
* Example: `if (e) return; action(x)`.
*/
predicate isEarlyAbortGuardNode(ConditionalBypassFlow::PathNode e, SensitiveAction action) {
exists(IfStmt guard |
// `e` is in the condition of an if-statement ...
e.getNode().(Sink).asExpr().getParentExpr*() = guard.getCondition() and
// ... where the then-branch always throws or returns
exists(Stmt abort |
abort instanceof ThrowStmt or
abort instanceof ReturnStmt
|
abort.nestedIn(guard) and
abort.getBasicBlock().(ReachableBasicBlock).postDominates(guard.getThen().getBasicBlock())
) and
// ... and the else-branch does not exist
not exists(guard.getElse())
|
// ... and `action` is outside the if-statement
not action.asExpr().getEnclosingStmt().nestedIn(guard)
)
}
/**
* Holds if `sink` guards `action`, and `source` taints `sink`.
*
* If flow from `source` taints `sink`, then an attacker can
* control if `action` should be executed or not.
*/
deprecated predicate isTaintedGuardForSensitiveAction(
DataFlow::PathNode sink, DataFlow::PathNode source, SensitiveAction action
) {
action = sink.getNode().(Sink).getAction() and
@@ -104,7 +184,7 @@ predicate isTaintedGuardForSensitiveAction(
*
* Example: `if (e) return; action(x)`.
*/
predicate isEarlyAbortGuard(DataFlow::PathNode e, SensitiveAction action) {
deprecated predicate isEarlyAbortGuard(DataFlow::PathNode e, SensitiveAction action) {
exists(IfStmt guard |
// `e` is in the condition of an if-statement ...
e.getNode().(Sink).asExpr().getParentExpr*() = guard.getCondition() and

View File

@@ -14,7 +14,26 @@ import CorsMisconfigurationForCredentialsCustomizations::CorsMisconfigurationFor
/**
* A data flow configuration for CORS misconfiguration for credentials transfer.
*/
class Configuration extends TaintTracking::Configuration {
module CorsMisconfigurationConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) {
node instanceof Sanitizer or
node = TaintTracking::AdHocWhitelistCheckSanitizer::getABarrierNode()
}
}
/**
* Data flow for CORS misconfiguration for credentials transfer.
*/
module CorsMisconfigurationFlow = TaintTracking::Global<CorsMisconfigurationConfig>;
/**
* DEPRECATED. Use the `CorsMisconfigurationFlow` module instead.
*/
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CorsMisconfigurationForCredentials" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }

Some files were not shown because too many files have changed in this diff Show More