Files
codeql-info/ql/docs/language/learn-ql/build.html-5f4acb8/codeql-language-guides/using-flow-labels-for-precise-data-flow-analysis.html
2023-11-20 11:57:03 -08:00

573 lines
43 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="en" data-content_root="../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Using flow labels for precise data flow analysis &#8212; CodeQL</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=fa44fd50" />
<link rel="stylesheet" type="text/css" href="../_static/alabaster.css?v=93459777" />
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../_static/doctools.js?v=888ff710"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<link rel="icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Specifying additional remote flow sources for JavaScript" href="specifying-additional-remote-flow-sources-for-javascript.html" />
<link rel="prev" title="Analyzing data flow in JavaScript and TypeScript" href="analyzing-data-flow-in-javascript-and-typescript.html" />
<title>CodeQL docs</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="../_static/custom.css" type="text/css" />
<link rel="stylesheet" href="../_static/primer.css" type="text/css" />
</head><body>
<header class="Header">
<div class="Header-item--full">
<a href="https://codeql.github.com/docs" class="Header-link f2 d-flex flex-items-center">
<!-- <%= octicon "mark-github", class: "mr-2", height: 32 %> -->
<svg height="32" class="octicon octicon-mark-github mr-2" viewBox="0 0 16 16" version="1.1" width="32"
aria-hidden="true">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
<span class="hide-sm">CodeQL documentation</span>
</a>
</div>
<div class="Header-item hide-sm hide-md">
<script src="https://addsearch.com/js/?key=93b4d287e2fc079a4089412b669785d5&categories=!0xhelp.semmle.com,0xcodeql.github.com,1xdocs,1xcodeql-standard-libraries,1xcodeql-query-help"></script>
</div>
<div class="Header-item">
<details class="dropdown details-reset details-overlay d-inline-block">
<summary class="btn bg-gray-dark text-white border" aria-haspopup="true">
CodeQL resources
<div class="dropdown-caret"></div>
</summary>
<ul class="dropdown-menu dropdown-menu-se dropdown-menu-dark">
<li><a class="dropdown-item" href="https://codeql.github.com/docs/codeql-overview">CodeQL overview</a></li>
<li class="dropdown-divider" role="separator"></li>
<div class="dropdown-header">
CodeQL tools
</div>
<li><a class="dropdown-item" href="https://codeql.github.com/docs/codeql-for-visual-studio-code">CodeQL for VS Code</a>
<li><a class="dropdown-item" href="https://codeql.github.com/docs/codeql-cli">CodeQL CLI</a>
</li>
<li class="dropdown-divider" role="separator"></li>
<div class="dropdown-header">
CodeQL guides
</div>
<li><a class="dropdown-item" href="https://codeql.github.com/docs/writing-codeql-queries">Writing CodeQL queries</a></li>
<li><a class="dropdown-item" href="https://codeql.github.com/docs/codeql-language-guides">CodeQL language guides</a>
<li class="dropdown-divider" role="separator"></li>
<div class="dropdown-header">
Reference docs
</div>
<li><a class="dropdown-item" href="https://codeql.github.com/docs/ql-language-reference/">QL language
reference</a>
<li><a class="dropdown-item" href="https://codeql.github.com/codeql-standard-libraries">CodeQL
standard-libraries</a>
<li><a class="dropdown-item" href="https://codeql.github.com/codeql-query-help">CodeQL
query help</a>
<li class="dropdown-divider" role="separator"></li>
<div class="dropdown-header">
Source files
</div>
<li><a class="dropdown-item" href="https://github.com/github/codeql">CodeQL repository</a>
</ul>
</details>
</div>
</header>
<main class="bg-gray-light clearfix">
<nav class="SideNav position-sticky top-0 col-lg-3 col-md-3 float-left p-4 hide-sm hide-md overflow-y-auto">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../codeql-overview/index.html">CodeQL overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../codeql-for-visual-studio-code/index.html">CodeQL for Visual Studio Code</a></li>
<li class="toctree-l1"><a class="reference internal" href="../codeql-cli/index.html">CodeQL CLI</a></li>
<li class="toctree-l1"><a class="reference internal" href="../writing-codeql-queries/index.html">Writing CodeQL queries</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html">CodeQL language guides</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="codeql-for-cpp.html">CodeQL for C and C++</a></li>
<li class="toctree-l2"><a class="reference internal" href="codeql-for-csharp.html">CodeQL for C#</a></li>
<li class="toctree-l2"><a class="reference internal" href="codeql-for-go.html">CodeQL for Go</a></li>
<li class="toctree-l2"><a class="reference internal" href="codeql-for-java.html">CodeQL for Java</a></li>
<li class="toctree-l2 current"><a class="reference internal" href="codeql-for-javascript.html">CodeQL for JavaScript</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="basic-query-for-javascript-code.html">Basic query for JavaScript code</a></li>
<li class="toctree-l3"><a class="reference internal" href="codeql-library-for-javascript.html">CodeQL library for JavaScript</a></li>
<li class="toctree-l3"><a class="reference internal" href="codeql-library-for-typescript.html">CodeQL library for TypeScript</a></li>
<li class="toctree-l3"><a class="reference internal" href="analyzing-data-flow-in-javascript-and-typescript.html">Analyzing data flow in JavaScript and TypeScript</a></li>
<li class="toctree-l3 current"><a class="current reference internal" href="#">Using flow labels for precise data flow analysis</a></li>
<li class="toctree-l3"><a class="reference internal" href="specifying-additional-remote-flow-sources-for-javascript.html">Specifying additional remote flow sources for JavaScript</a></li>
<li class="toctree-l3"><a class="reference internal" href="using-type-tracking-for-api-modeling.html">Using type tracking for API modeling</a></li>
<li class="toctree-l3"><a class="reference internal" href="abstract-syntax-tree-classes-for-working-with-javascript-and-typescript-programs.html">Abstract syntax tree classes for working with JavaScript and TypeScript programs</a></li>
<li class="toctree-l3"><a class="reference internal" href="data-flow-cheat-sheet-for-javascript.html">Data flow cheat sheet for JavaScript</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="codeql-for-python.html">CodeQL for Python</a></li>
<li class="toctree-l2"><a class="reference internal" href="codeql-for-ruby.html">CodeQL for Ruby</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../ql-language-reference/index.html">QL language reference</a></li>
</ul>
</nav>
<div class="body col-sm-12 col-md-9 col-lg-9 float-left border-left">
<div class="hide-lg hide-xl px-4 pt-4">
<div class="related" role="navigation" aria-label="related navigation">
<ul>
<li class="nav-item nav-item-0"><a href="../contents.html">CodeQL</a> &#187;</li>
<li class="nav-item nav-item-1"><a href="index.html"
>CodeQL language guides</a> &#187;</li>
<li class="nav-item nav-item-2"><a href="codeql-for-javascript.html"
accesskey="U">CodeQL for JavaScript</a> &#187;</li>
</ul>
</div>
</div>
<article class="p-4 col-lg-10 col-md-10 col-sm-12">
<section id="using-flow-labels-for-precise-data-flow-analysis">
<span id="id1"></span><h1>Using flow labels for precise data flow analysis<a class="headerlink" href="#using-flow-labels-for-precise-data-flow-analysis" title="Link to this heading"></a></h1>
<p>You can associate flow labels with each value tracked by the flow analysis to determine whether the flow contains potential vulnerabilities.</p>
<section id="overview">
<h2>Overview<a class="headerlink" href="#overview" title="Link to this heading"></a></h2>
<p>You can use basic inter-procedural data-flow analysis and taint tracking as described in
<a class="reference internal" href="analyzing-data-flow-in-javascript-and-typescript.html"><span class="doc">Analyzing data flow in JavaScript and TypeScript</span></a>” to check whether there is a path in
the data-flow graph from some source node to a sink node that does not pass through any sanitizer
nodes. Another way of thinking about this is that it statically models the flow of data through the
program, and associates a flag with every data value telling us whether it might have come from a
source node.</p>
<p>In some cases, you may want to track more detailed information about data values. This can be done
by associating flow labels with data values, as shown in this tutorial. We will first discuss the
general idea behind flow labels and then show how to use them in practice. Finally, we will give an
overview of the API involved and provide some pointers to standard queries that use flow labels.</p>
</section>
<section id="limitations-of-basic-data-flow-analysis">
<h2>Limitations of basic data-flow analysis<a class="headerlink" href="#limitations-of-basic-data-flow-analysis" title="Link to this heading"></a></h2>
<p>In many applications we are interested in tracking more than just the reachability information provided by inter-procedural data flow analysis.</p>
<p>For example, when tracking object values that originate from untrusted input, we might want to
remember whether the entire object is tainted or whether only part of it is tainted. The former
happens, for example, when parsing a user-controlled string as JSON, meaning that the entire
resulting object is tainted. A typical example of the latter is assigning a tainted value to a
property of an object, which only taints that property but not the rest of the object.</p>
<p>While reading a property of a completely tainted object yields a tainted value, reading a property
of a partially tainted object does not. On the other hand, JSON-encoding even a partially tainted
object and including it in an HTML document is not safe.</p>
<p>Another example where more fine-grained information about tainted values is needed is for tracking
partial sanitization. For example, before interpreting a user-controlled string as a file-system
path, we generally want to make sure that it is neither an absolute path (which could refer to any
file on the file system) nor a relative path containing <code class="docutils literal notranslate"><span class="pre">..</span></code> components (which still could refer
to any file). Usually, checking both of these properties would involve two separate checks. Both
checks taken together should count as a sanitizer, but each individual check is not by itself enough
to make the string safe for use as a path. To handle this case precisely, we want to associate two
bits of information with each tainted value, namely whether it may be absolute, and whether it may
contain <code class="docutils literal notranslate"><span class="pre">..</span></code> components. Untrusted user input has both bits set initially, individual checks turn
off individual bits, and if a value that has at least one bit set is interpreted as a path, a
potential vulnerability is flagged.</p>
</section>
<section id="using-flow-labels">
<h2>Using flow labels<a class="headerlink" href="#using-flow-labels" title="Link to this heading"></a></h2>
<p>You can handle these cases and others like them by associating a set of <cite>flow labels</cite> (sometimes
also referred to as <cite>taint kinds</cite>) with each value being tracked by the analysis. Value-preserving
data-flow steps (such as flow steps from writes to a variable to its reads) preserve the set of flow
labels, but other steps may add or remove flow labels. Sanitizers, in particular, are simply flow
steps that remove some or all flow labels. The initial set of flow labels for a value is determined
by the source node that gives rise to it. Similarly, sink nodes can specify that an incoming value
needs to have a certain flow label (or one of a set of flow labels) in order for the flow to be
flagged as a potential vulnerability.</p>
</section>
<section id="example">
<h2>Example<a class="headerlink" href="#example" title="Link to this heading"></a></h2>
<p>As an example of using flow labels, we will show how to write a query that flags property accesses
on JSON values that come from user-controlled input where we have not checked whether the value is
<code class="docutils literal notranslate"><span class="pre">null</span></code>, so that the property access may cause a runtime exception.</p>
<p>For example, we would like to flag this code:</p>
<div class="highlight-javascript notranslate"><div class="highlight"><pre><span></span><span class="kd">var</span><span class="w"> </span><span class="nx">data</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">JSON</span><span class="p">.</span><span class="nx">parse</span><span class="p">(</span><span class="nx">str</span><span class="p">);</span>
<span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">data</span><span class="p">.</span><span class="nx">length</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mf">0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="c1">// problematic: `data` may be `null`</span>
<span class="w"> </span><span class="p">...</span>
<span class="p">}</span>
</pre></div>
</div>
<p>This code, on the other hand, should not be flagged:</p>
<div class="highlight-javascript notranslate"><div class="highlight"><pre><span></span><span class="kd">var</span><span class="w"> </span><span class="nx">data</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">JSON</span><span class="p">.</span><span class="nx">parse</span><span class="p">(</span><span class="nx">str</span><span class="p">);</span>
<span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">data</span><span class="w"> </span><span class="o">&amp;&amp;</span><span class="w"> </span><span class="nx">data</span><span class="p">.</span><span class="nx">length</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mf">0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="c1">// unproblematic: `data` is first checked for nullness</span>
<span class="w"> </span><span class="p">...</span>
<span class="p">}</span>
</pre></div>
</div>
<p>We will first try to write a query to find this kind of problem without flow labels, and use the
difficulties we encounter as a motivation for bringing flow labels into play, which will make the
query much easier to implement.</p>
<p>To get started, lets write a query that simply flags any flow from <code class="docutils literal notranslate"><span class="pre">JSON.parse</span></code> into the base of
a property access:</p>
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>import javascript
class JsonTrackingConfig extends DataFlow::Configuration {
JsonTrackingConfig() { this = &quot;JsonTrackingConfig&quot; }
override predicate isSource(DataFlow::Node nd) {
exists(JsonParserCall jpc |
nd = jpc.getOutput()
)
}
override predicate isSink(DataFlow::Node nd) {
exists(DataFlow::PropRef pr |
nd = pr.getBase()
)
}
}
from JsonTrackingConfig cfg, DataFlow::Node source, DataFlow::Node sink
where cfg.hasFlow(source, sink)
select sink, &quot;Property access on JSON value originating $@.&quot;, source, &quot;here&quot;
</pre></div>
</div>
<p>Note that we use the <code class="docutils literal notranslate"><span class="pre">JsonParserCall</span></code> class from the standard library to model various JSON
parsers, including the standard <code class="docutils literal notranslate"><span class="pre">JSON.parse</span></code> API as well as a number of popular npm packages.</p>
<p>Of course, as written this query flags both the good and the bad example above, since we have not
introduced any sanitizers yet.</p>
<p>There are many ways of checking for nullness directly or indirectly. Since this is not the main
focus of this tutorial, we will only show how to model one specific case: if some variable <code class="docutils literal notranslate"><span class="pre">v</span></code> is
known to be truthy, it cannot be <code class="docutils literal notranslate"><span class="pre">null</span></code>. This kind of condition is easily expressed using a
<code class="docutils literal notranslate"><span class="pre">BarrierGuardNode</span></code> (or its counterpart <code class="docutils literal notranslate"><span class="pre">SanitizerGuardNode</span></code> for taint-tracking configurations).
A barrier guard node is a data-flow node <code class="docutils literal notranslate"><span class="pre">b</span></code> that blocks flow through some other node <code class="docutils literal notranslate"><span class="pre">nd</span></code>,
provided that some condition checked at <code class="docutils literal notranslate"><span class="pre">b</span></code> is known to hold, that is, evaluate to a truthy value.</p>
<p>In our case, the barrier guard node is a use of some variable <code class="docutils literal notranslate"><span class="pre">v</span></code>, and the condition is that use
itself: it blocks flow through any use of <code class="docutils literal notranslate"><span class="pre">v</span></code> where the guarding use is known to evaluate to a
truthy value. In our second example above, the use of <code class="docutils literal notranslate"><span class="pre">data</span></code> on the left-hand side of the <code class="docutils literal notranslate"><span class="pre">&amp;&amp;</span></code>
is a barrier guard blocking flow through the use of <code class="docutils literal notranslate"><span class="pre">data</span></code> on the right-hand side of the <code class="docutils literal notranslate"><span class="pre">&amp;&amp;</span></code>.
At this point we know that <code class="docutils literal notranslate"><span class="pre">data</span></code> has evaluated to a truthy value, so it cannot be <code class="docutils literal notranslate"><span class="pre">null</span></code>
anymore.</p>
<p>Implementing this additional condition is easy. We implement a subclass of <code class="docutils literal notranslate"><span class="pre">DataFlow::BarrierGuardNode</span></code>:</p>
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>class TruthinessCheck extends DataFlow::BarrierGuardNode, DataFlow::ValueNode {
SsaVariable v;
TruthinessCheck() {
astNode = v.getAUse()
}
override predicate blocks(boolean outcome, Expr e) {
outcome = true and
e = astNode
}
}
</pre></div>
</div>
<p>and then use it to override predicate <code class="docutils literal notranslate"><span class="pre">isBarrierGuard</span></code> in our configuration class:</p>
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>override predicate isBarrierGuard(DataFlow::BarrierGuardNode guard) {
guard instanceof TruthinessCheck
}
</pre></div>
</div>
<p>With this change, we now flag the problematic case and dont flag the unproblematic case above.</p>
<p>However, as it stands our analysis has many false negatives: if we read a property of a JSON object,
our analysis will not continue tracking it, so property accesses on the resulting value will not be
checked for null-guardedness:</p>
<div class="highlight-javascript notranslate"><div class="highlight"><pre><span></span><span class="kd">var</span><span class="w"> </span><span class="nx">root</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">JSON</span><span class="p">.</span><span class="nx">parse</span><span class="p">(</span><span class="nx">str</span><span class="p">);</span>
<span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">root</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="kd">var</span><span class="w"> </span><span class="nx">payload</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nx">root</span><span class="p">.</span><span class="nx">data</span><span class="p">;</span><span class="w"> </span><span class="c1">// unproblematic: `root` cannot be `null` here</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">payload</span><span class="p">.</span><span class="nx">length</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mf">0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="c1">// problematic: `payload` may be `null` here</span>
<span class="w"> </span><span class="p">...</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<p>We could try to remedy the situation by overriding <code class="docutils literal notranslate"><span class="pre">isAdditionalFlowStep</span></code> in our configuration class to track values through property reads:</p>
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
succ.(DataFlow::PropRead).getBase() = pred
}
</pre></div>
</div>
<p>But this does not actually allow us to flag the problem above as once we have checked <code class="docutils literal notranslate"><span class="pre">root</span></code> for
truthiness, all further uses are considered to be sanitized. In particular, the reference to
<code class="docutils literal notranslate"><span class="pre">root</span></code> in <code class="docutils literal notranslate"><span class="pre">root.data</span></code> is sanitized, so no flow tracking through the property read happens.</p>
<p>The problem is, of course, that our sanitizer sanitizes too much. It should not stop flow
altogether, it should simply record the fact that <code class="docutils literal notranslate"><span class="pre">root</span></code> itself is known to be non-null.
Any property read from <code class="docutils literal notranslate"><span class="pre">root</span></code>, on the other hand, may well be null and needs to be checked
separately.</p>
<p>We can achieve this by introducing two different flow labels, <code class="docutils literal notranslate"><span class="pre">json</span></code> and <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code>. The former
means that the value we are dealing with comes from a JSON object, the latter that it may be
<code class="docutils literal notranslate"><span class="pre">null</span></code>. The result of any call to <code class="docutils literal notranslate"><span class="pre">JSON.parse</span></code> has both labels. A property read from a value
with label <code class="docutils literal notranslate"><span class="pre">json</span></code> also has both labels. Checking truthiness removes the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label.
Accessing a property on a value that has the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label should be flagged.</p>
<p>To implement this, we start by defining two new subclasses of the class <code class="docutils literal notranslate"><span class="pre">DataFlow::FlowLabel</span></code>:</p>
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>class JsonLabel extends DataFlow::FlowLabel {
JsonLabel() {
this = &quot;json&quot;
}
}
class MaybeNullLabel extends DataFlow::FlowLabel {
MaybeNullLabel() {
this = &quot;maybe-null&quot;
}
}
</pre></div>
</div>
<p>Then we extend our <code class="docutils literal notranslate"><span class="pre">isSource</span></code> predicate from above to track flow labels by overriding the two-argument version instead of the one-argument version:</p>
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>override predicate isSource(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
exists(JsonParserCall jpc |
nd = jpc.getOutput() and
(lbl instanceof JsonLabel or lbl instanceof MaybeNullLabel)
)
}
</pre></div>
</div>
<p>Similarly, we make <code class="docutils literal notranslate"><span class="pre">isSink</span></code> flow-label aware and require the base of the property read to have the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label:</p>
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>override predicate isSink(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
exists(DataFlow::PropRef pr |
nd = pr.getBase() and
lbl instanceof MaybeNullLabel
)
}
</pre></div>
</div>
<p>Our overriding definition of <code class="docutils literal notranslate"><span class="pre">isAdditionalFlowStep</span></code> now needs to specify two flow labels, a
predecessor label <code class="docutils literal notranslate"><span class="pre">predlbl</span></code> and a successor label <code class="docutils literal notranslate"><span class="pre">succlbl</span></code>. In addition to specifying flow from
the predecessor node <code class="docutils literal notranslate"><span class="pre">pred</span></code> to the successor node <code class="docutils literal notranslate"><span class="pre">succ</span></code>, it requires that <code class="docutils literal notranslate"><span class="pre">pred</span></code> has label
<code class="docutils literal notranslate"><span class="pre">predlbl</span></code>, and adds label <code class="docutils literal notranslate"><span class="pre">succlbl</span></code> to <code class="docutils literal notranslate"><span class="pre">succ</span></code>. In our case, we use this to add both the
<code class="docutils literal notranslate"><span class="pre">json</span></code> label and the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label to any property read from a value labeled with <code class="docutils literal notranslate"><span class="pre">json</span></code>
(no matter whether it has the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label):</p>
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ,
DataFlow::FlowLabel predlbl, DataFlow::FlowLabel succlbl) {
succ.(DataFlow::PropRead).getBase() = pred and
predlbl instanceof JsonLabel and
(succlbl instanceof JsonLabel or succlbl instanceof MaybeNullLabel)
}
</pre></div>
</div>
<p>Finally, we turn <code class="docutils literal notranslate"><span class="pre">TruthinessCheck</span></code> from a <code class="docutils literal notranslate"><span class="pre">BarrierGuardNode</span></code> into a <code class="docutils literal notranslate"><span class="pre">LabeledBarrierGuardNode</span></code>,
specifying that it only removes the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label (but not the <code class="docutils literal notranslate"><span class="pre">json</span></code> label) from the
sanitized value:</p>
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>class TruthinessCheck extends DataFlow::LabeledBarrierGuardNode, DataFlow::ValueNode {
...
override predicate blocks(boolean outcome, Expr e, DataFlow::FlowLabel lbl) {
outcome = true and
e = astNode and
lbl instanceof MaybeNullLabel
}
}
</pre></div>
</div>
<p>Here is the final query, expressed as a <a class="reference internal" href="../writing-codeql-queries/creating-path-queries.html#creating-path-queries"><span class="std std-ref">path query</span></a> so we can examine paths from sources to sinks
step by step in the UI:</p>
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>/** @kind path-problem */
import javascript
import DataFlow::PathGraph
class JsonLabel extends DataFlow::FlowLabel {
JsonLabel() {
this = &quot;json&quot;
}
}
class MaybeNullLabel extends DataFlow::FlowLabel {
MaybeNullLabel() {
this = &quot;maybe-null&quot;
}
}
class TruthinessCheck extends DataFlow::LabeledBarrierGuardNode, DataFlow::ValueNode {
SsaVariable v;
TruthinessCheck() {
astNode = v.getAUse()
}
override predicate blocks(boolean outcome, Expr e, DataFlow::FlowLabel lbl) {
outcome = true and
e = astNode and
lbl instanceof MaybeNullLabel
}
}
class JsonTrackingConfig extends DataFlow::Configuration {
JsonTrackingConfig() { this = &quot;JsonTrackingConfig&quot; }
override predicate isSource(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
exists(JsonParserCall jpc |
nd = jpc.getOutput() and
(lbl instanceof JsonLabel or lbl instanceof MaybeNullLabel)
)
}
override predicate isSink(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
exists(DataFlow::PropRef pr |
nd = pr.getBase() and
lbl instanceof MaybeNullLabel
)
}
override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ,
DataFlow::FlowLabel predlbl, DataFlow::FlowLabel succlbl) {
succ.(DataFlow::PropRead).getBase() = pred and
predlbl instanceof JsonLabel and
(succlbl instanceof JsonLabel or succlbl instanceof MaybeNullLabel)
}
override predicate isBarrierGuard(DataFlow::BarrierGuardNode guard) {
guard instanceof TruthinessCheck
}
}
from JsonTrackingConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink, source, sink, &quot;Property access on JSON value originating $@.&quot;, source, &quot;here&quot;
</pre></div>
</div>
<p><a class="reference external" href="https://lgtm.com/query/5347702611074820306">Here</a> is a run of this query on the <a class="reference external" href="https://lgtm.com/projects/g/finos-plexus/plexus-interop/">plexus-interop</a> project on LGTM.com. Many of the 19
results are false positives since we currently do not model many ways in which a value can be
checked for nullness. In particular, after a property reference <code class="docutils literal notranslate"><span class="pre">x.p</span></code> we implicitly know that
<code class="docutils literal notranslate"><span class="pre">x</span></code> cannot be null anymore, since otherwise the reference would have thrown an exception.
Modeling this would allow us to get rid of most of the false positives, but is beyond the scope of
this tutorial.</p>
</section>
<section id="api">
<h2>API<a class="headerlink" href="#api" title="Link to this heading"></a></h2>
<p>Plain data-flow configurations implicitly use a single flow label “data”, which indicates that a
data value originated from a source. You can use the predicate <code class="docutils literal notranslate"><span class="pre">DataFlow::FlowLabel::data()</span></code>,
which returns this flow label, as a symbolic name for it.</p>
<p>Taint-tracking configurations add a second flow label “taint” (<code class="docutils literal notranslate"><span class="pre">DataFlow::FlowLabel::taint()</span></code>),
which is similar to “data”, but includes values that have passed through non-value preserving steps
such as string operations.</p>
<p>Each of the three member predicates <code class="docutils literal notranslate"><span class="pre">isSource</span></code>, <code class="docutils literal notranslate"><span class="pre">isSink</span></code> and
<code class="docutils literal notranslate"><span class="pre">isAdditionalFlowStep</span></code>/<code class="docutils literal notranslate"><span class="pre">isAdditionalTaintStep</span></code> has one version that uses the default flow
labels, and one version that allows specifying custom flow labels through additional arguments.</p>
<p>For <code class="docutils literal notranslate"><span class="pre">isSource</span></code>, there is one additional argument specifying which flow label(s) should be
associated with values originating from this source. If multiple flow labels are specified, each
value is associated with <cite>all</cite> of them.</p>
<p>For <code class="docutils literal notranslate"><span class="pre">isSink</span></code>, the additional argument specifies which flow label(s) a value that flows into this
source may be associated with. If multiple flow labels are specified, then any value that is
associated with <cite>at least one</cite> of them will be considered by the configuration.</p>
<p>For <code class="docutils literal notranslate"><span class="pre">isAdditionalFlowStep</span></code> there are two additional arguments <code class="docutils literal notranslate"><span class="pre">predlbl</span></code> and <code class="docutils literal notranslate"><span class="pre">succlbl</span></code>, which
allow flow steps to act as flow label transformers. If a value associated with <code class="docutils literal notranslate"><span class="pre">predlbl</span></code> arrives
at the start node of the additional step, it is propagated to the end node and associated with
<code class="docutils literal notranslate"><span class="pre">succlbl</span></code>. Of course, <code class="docutils literal notranslate"><span class="pre">predlbl</span></code> and <code class="docutils literal notranslate"><span class="pre">succlbl</span></code> may be the same, indicating that the flow step
preserves this label. There can also be multiple values of <code class="docutils literal notranslate"><span class="pre">succlbl</span></code> for a single <code class="docutils literal notranslate"><span class="pre">predlbl</span></code> or
vice versa.</p>
<p>Note that if you do not restrict <code class="docutils literal notranslate"><span class="pre">succlbl</span></code> then it will be allowed to range over all flow labels.
This may cause labels that were previously blocked on a path to reappear, which is not usually what
you want.</p>
<p>The flow label-aware version of <code class="docutils literal notranslate"><span class="pre">isBarrier</span></code> is called <code class="docutils literal notranslate"><span class="pre">isLabeledBarrier</span></code>: unlike <code class="docutils literal notranslate"><span class="pre">isBarrier</span></code>,
which prevents any flow past the given node, it only blocks flow of values associated with one of
the specified flow labels.</p>
</section>
<section id="standard-queries-using-flow-labels">
<h2>Standard queries using flow labels<a class="headerlink" href="#standard-queries-using-flow-labels" title="Link to this heading"></a></h2>
<p>Some of our standard security queries use flow labels. You can look at their implementation
to get a feeling for how to use flow labels in practice.</p>
<p>In particular, both of the examples mentioned in the section on limitations of basic data flow above
are from standard security queries that use flow labels. The <a class="reference external" href="https://lgtm.com/rules/1508857356317">Prototype pollution</a> query uses two flow labels to distinguish completely
tainted objects from partially tainted objects. The <a class="reference external" href="https://lgtm.com/rules/1971530250">Uncontrolled data used in path expression</a> query uses four flow labels to track whether a user-controlled
string may be an absolute path and whether it may contain <code class="docutils literal notranslate"><span class="pre">..</span></code> components.</p>
</section>
<section id="further-reading">
<h2>Further reading<a class="headerlink" href="#further-reading" title="Link to this heading"></a></h2>
<ul class="simple">
<li><p><a class="reference internal" href="../codeql-for-visual-studio-code/exploring-data-flow-with-path-queries.html#exploring-data-flow-with-path-queries"><span class="std std-ref">Exploring data flow with path queries</span></a></p></li>
</ul>
<ul class="simple">
<li><p><a class="reference external" href="https://github.com/github/codeql/tree/main/javascript/ql/src">CodeQL queries for JavaScript</a></p></li>
<li><p><a class="reference external" href="https://github.com/github/codeql/tree/main/javascript/ql/examples">Example queries for JavaScript</a></p></li>
<li><p><a class="reference external" href="https://codeql.github.com/codeql-standard-libraries/javascript/">CodeQL library reference for JavaScript</a></p></li>
</ul>
<ul class="simple">
<li><p><a class="reference internal" href="../ql-language-reference/index.html#ql-language-reference"><span class="std std-ref">QL language reference</span></a></p></li>
<li><p><a class="reference internal" href="../codeql-overview/codeql-tools.html#codeql-tools"><span class="std std-ref">CodeQL tools</span></a></p></li>
</ul>
</section>
</section>
</article>
<!-- GitHub footer, with links to terms and privacy statement -->
<div class="px-3 px-md-6 f6 py-4 d-sm-flex flex-justify-between flex-row-reverse flex-items-center border-top">
<ul class="list-style-none d-flex flex-items-center mb-3 mb-sm-0 lh-condensed-ultra">
<li class="mr-3">
<a href="https://twitter.com/github" title="GitHub on Twitter" style="color: #959da5;">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 273.5 222.3" class="d-block" height="18">
<path
d="M273.5 26.3a109.77 109.77 0 0 1-32.2 8.8 56.07 56.07 0 0 0 24.7-31 113.39 113.39 0 0 1-35.7 13.6 56.1 56.1 0 0 0-97 38.4 54 54 0 0 0 1.5 12.8A159.68 159.68 0 0 1 19.1 10.3a56.12 56.12 0 0 0 17.4 74.9 56.06 56.06 0 0 1-25.4-7v.7a56.11 56.11 0 0 0 45 55 55.65 55.65 0 0 1-14.8 2 62.39 62.39 0 0 1-10.6-1 56.24 56.24 0 0 0 52.4 39 112.87 112.87 0 0 1-69.7 24 119 119 0 0 1-13.4-.8 158.83 158.83 0 0 0 86 25.2c103.2 0 159.6-85.5 159.6-159.6 0-2.4-.1-4.9-.2-7.3a114.25 114.25 0 0 0 28.1-29.1"
fill="currentColor"></path>
</svg>
</a>
</li>
<li class="mr-3">
<a href="https://www.facebook.com/GitHub" title="GitHub on Facebook" style="color: #959da5;">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 15.3 15.4" class="d-block" height="18">
<path
d="M14.5 0H.8a.88.88 0 0 0-.8.9v13.6a.88.88 0 0 0 .8.9h7.3v-6h-2V7.1h2V5.4a2.87 2.87 0 0 1 2.5-3.1h.5a10.87 10.87 0 0 1 1.8.1v2.1h-1.3c-1 0-1.1.5-1.1 1.1v1.5h2.3l-.3 2.3h-2v5.9h3.9a.88.88 0 0 0 .9-.8V.8a.86.86 0 0 0-.8-.8z"
fill="currentColor"></path>
</svg>
</a>
</li>
<li class="mr-3">
<a href="https://www.youtube.com/github" title="GitHub on YouTube" style="color: #959da5;">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 19.17 13.6" class="d-block" height="16">
<path
d="M18.77 2.13A2.4 2.4 0 0 0 17.09.42C15.59 0 9.58 0 9.58 0a57.55 57.55 0 0 0-7.5.4A2.49 2.49 0 0 0 .39 2.13 26.27 26.27 0 0 0 0 6.8a26.15 26.15 0 0 0 .39 4.67 2.43 2.43 0 0 0 1.69 1.71c1.52.42 7.5.42 7.5.42a57.69 57.69 0 0 0 7.51-.4 2.4 2.4 0 0 0 1.68-1.71 25.63 25.63 0 0 0 .4-4.67 24 24 0 0 0-.4-4.69zM7.67 9.71V3.89l5 2.91z"
fill="currentColor"></path>
</svg>
</a>
</li>
<li class="mr-3 flex-self-start">
<a href="https://www.linkedin.com/company/github" title="GitHub on Linkedin" style="color: #959da5;">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 19 18" class="d-block" height="18">
<path
d="M3.94 2A2 2 0 1 1 2 0a2 2 0 0 1 1.94 2zM4 5.48H0V18h4zm6.32 0H6.34V18h3.94v-6.57c0-3.66 4.77-4 4.77 0V18H19v-7.93c0-6.17-7.06-5.94-8.72-2.91z"
fill="currentColor"></path>
</svg>
</a>
</li>
<li>
<a href="https://github.com/github" title="GitHub's organization" style="color: #959da5;">
<svg version="1.1" width="20" height="20" viewBox="0 0 16 16" class="octicon octicon-mark-github"
aria-hidden="true">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
</a>
</li>
</ul>
<ul class="list-style-none d-flex text-gray">
<li class="mr-3">&copy;
<script type="text/javascript">document.write(new Date().getFullYear());</script> GitHub, Inc.</li>
<li class="mr-3"><a
href="https://docs.github.com/github/site-policy/github-terms-of-service"
class="link-gray">Terms </a></li>
<li><a href="https://docs.github.com/github/site-policy/github-privacy-statement"
class="link-gray">Privacy </a></li>
</ul>
</div>
</div>
</main>
<script type="text/javascript">
$(document).ready(function () {
$(".toggle > *").hide();
$(".toggle .name").show();
$(".toggle .name").click(function () {
$(this).parent().children().not(".name").toggle(400);
$(this).parent().children(".name").toggleClass("open");
})
});
</script>
</body>
</html>