mirror of
https://github.com/hohn/codeql-info.git
synced 2025-12-16 20:53:04 +01:00
573 lines
43 KiB
HTML
573 lines
43 KiB
HTML
<!DOCTYPE html>
|
||
|
||
<html lang="en" data-content_root="../">
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
|
||
<title>Using flow labels for precise data flow analysis — CodeQL</title>
|
||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=fa44fd50" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/alabaster.css?v=93459777" />
|
||
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
|
||
<script src="../_static/doctools.js?v=888ff710"></script>
|
||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<link rel="icon" href="../_static/favicon.ico"/>
|
||
<link rel="index" title="Index" href="../genindex.html" />
|
||
<link rel="search" title="Search" href="../search.html" />
|
||
<link rel="next" title="Specifying additional remote flow sources for JavaScript" href="specifying-additional-remote-flow-sources-for-javascript.html" />
|
||
<link rel="prev" title="Analyzing data flow in JavaScript and TypeScript" href="analyzing-data-flow-in-javascript-and-typescript.html" />
|
||
|
||
<title>CodeQL docs</title>
|
||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
<link rel="stylesheet" href="../_static/custom.css" type="text/css" />
|
||
<link rel="stylesheet" href="../_static/primer.css" type="text/css" />
|
||
|
||
|
||
</head><body>
|
||
<header class="Header">
|
||
<div class="Header-item--full">
|
||
<a href="https://codeql.github.com/docs" class="Header-link f2 d-flex flex-items-center">
|
||
<!-- <%= octicon "mark-github", class: "mr-2", height: 32 %> -->
|
||
<svg height="32" class="octicon octicon-mark-github mr-2" viewBox="0 0 16 16" version="1.1" width="32"
|
||
aria-hidden="true">
|
||
<path fill-rule="evenodd"
|
||
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
|
||
</path>
|
||
</svg>
|
||
<span class="hide-sm">CodeQL documentation</span>
|
||
</a>
|
||
</div>
|
||
<div class="Header-item hide-sm hide-md">
|
||
<script src="https://addsearch.com/js/?key=93b4d287e2fc079a4089412b669785d5&categories=!0xhelp.semmle.com,0xcodeql.github.com,1xdocs,1xcodeql-standard-libraries,1xcodeql-query-help"></script>
|
||
</div>
|
||
<div class="Header-item">
|
||
|
||
<details class="dropdown details-reset details-overlay d-inline-block">
|
||
<summary class="btn bg-gray-dark text-white border" aria-haspopup="true">
|
||
CodeQL resources
|
||
<div class="dropdown-caret"></div>
|
||
</summary>
|
||
|
||
<ul class="dropdown-menu dropdown-menu-se dropdown-menu-dark">
|
||
<li><a class="dropdown-item" href="https://codeql.github.com/docs/codeql-overview">CodeQL overview</a></li>
|
||
<li class="dropdown-divider" role="separator"></li>
|
||
<div class="dropdown-header">
|
||
CodeQL tools
|
||
</div>
|
||
<li><a class="dropdown-item" href="https://codeql.github.com/docs/codeql-for-visual-studio-code">CodeQL for VS Code</a>
|
||
<li><a class="dropdown-item" href="https://codeql.github.com/docs/codeql-cli">CodeQL CLI</a>
|
||
</li>
|
||
<li class="dropdown-divider" role="separator"></li>
|
||
<div class="dropdown-header">
|
||
CodeQL guides
|
||
</div>
|
||
<li><a class="dropdown-item" href="https://codeql.github.com/docs/writing-codeql-queries">Writing CodeQL queries</a></li>
|
||
<li><a class="dropdown-item" href="https://codeql.github.com/docs/codeql-language-guides">CodeQL language guides</a>
|
||
<li class="dropdown-divider" role="separator"></li>
|
||
<div class="dropdown-header">
|
||
Reference docs
|
||
</div>
|
||
<li><a class="dropdown-item" href="https://codeql.github.com/docs/ql-language-reference/">QL language
|
||
reference</a>
|
||
<li><a class="dropdown-item" href="https://codeql.github.com/codeql-standard-libraries">CodeQL
|
||
standard-libraries</a>
|
||
<li><a class="dropdown-item" href="https://codeql.github.com/codeql-query-help">CodeQL
|
||
query help</a>
|
||
<li class="dropdown-divider" role="separator"></li>
|
||
<div class="dropdown-header">
|
||
Source files
|
||
</div>
|
||
<li><a class="dropdown-item" href="https://github.com/github/codeql">CodeQL repository</a>
|
||
</ul>
|
||
</details>
|
||
|
||
</div>
|
||
|
||
</header>
|
||
<main class="bg-gray-light clearfix">
|
||
<nav class="SideNav position-sticky top-0 col-lg-3 col-md-3 float-left p-4 hide-sm hide-md overflow-y-auto">
|
||
|
||
<ul class="current">
|
||
<li class="toctree-l1"><a class="reference internal" href="../codeql-overview/index.html">CodeQL overview</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../codeql-for-visual-studio-code/index.html">CodeQL for Visual Studio Code</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../codeql-cli/index.html">CodeQL CLI</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../writing-codeql-queries/index.html">Writing CodeQL queries</a></li>
|
||
<li class="toctree-l1 current"><a class="reference internal" href="index.html">CodeQL language guides</a><ul class="current">
|
||
<li class="toctree-l2"><a class="reference internal" href="codeql-for-cpp.html">CodeQL for C and C++</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="codeql-for-csharp.html">CodeQL for C#</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="codeql-for-go.html">CodeQL for Go</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="codeql-for-java.html">CodeQL for Java</a></li>
|
||
<li class="toctree-l2 current"><a class="reference internal" href="codeql-for-javascript.html">CodeQL for JavaScript</a><ul class="current">
|
||
<li class="toctree-l3"><a class="reference internal" href="basic-query-for-javascript-code.html">Basic query for JavaScript code</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="codeql-library-for-javascript.html">CodeQL library for JavaScript</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="codeql-library-for-typescript.html">CodeQL library for TypeScript</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="analyzing-data-flow-in-javascript-and-typescript.html">Analyzing data flow in JavaScript and TypeScript</a></li>
|
||
<li class="toctree-l3 current"><a class="current reference internal" href="#">Using flow labels for precise data flow analysis</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="specifying-additional-remote-flow-sources-for-javascript.html">Specifying additional remote flow sources for JavaScript</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="using-type-tracking-for-api-modeling.html">Using type tracking for API modeling</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="abstract-syntax-tree-classes-for-working-with-javascript-and-typescript-programs.html">Abstract syntax tree classes for working with JavaScript and TypeScript programs</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="data-flow-cheat-sheet-for-javascript.html">Data flow cheat sheet for JavaScript</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="codeql-for-python.html">CodeQL for Python</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="codeql-for-ruby.html">CodeQL for Ruby</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../ql-language-reference/index.html">QL language reference</a></li>
|
||
</ul>
|
||
|
||
|
||
</nav>
|
||
|
||
|
||
<div class="body col-sm-12 col-md-9 col-lg-9 float-left border-left">
|
||
|
||
<div class="hide-lg hide-xl px-4 pt-4">
|
||
|
||
<div class="related" role="navigation" aria-label="related navigation">
|
||
<ul>
|
||
<li class="nav-item nav-item-0"><a href="../contents.html">CodeQL</a> »</li>
|
||
<li class="nav-item nav-item-1"><a href="index.html"
|
||
>CodeQL language guides</a> »</li>
|
||
<li class="nav-item nav-item-2"><a href="codeql-for-javascript.html"
|
||
accesskey="U">CodeQL for JavaScript</a> »</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
|
||
<article class="p-4 col-lg-10 col-md-10 col-sm-12">
|
||
|
||
<section id="using-flow-labels-for-precise-data-flow-analysis">
|
||
<span id="id1"></span><h1>Using flow labels for precise data flow analysis<a class="headerlink" href="#using-flow-labels-for-precise-data-flow-analysis" title="Link to this heading">¶</a></h1>
|
||
<p>You can associate flow labels with each value tracked by the flow analysis to determine whether the flow contains potential vulnerabilities.</p>
|
||
<section id="overview">
|
||
<h2>Overview<a class="headerlink" href="#overview" title="Link to this heading">¶</a></h2>
|
||
<p>You can use basic inter-procedural data-flow analysis and taint tracking as described in
|
||
“<a class="reference internal" href="analyzing-data-flow-in-javascript-and-typescript.html"><span class="doc">Analyzing data flow in JavaScript and TypeScript</span></a>” to check whether there is a path in
|
||
the data-flow graph from some source node to a sink node that does not pass through any sanitizer
|
||
nodes. Another way of thinking about this is that it statically models the flow of data through the
|
||
program, and associates a flag with every data value telling us whether it might have come from a
|
||
source node.</p>
|
||
<p>In some cases, you may want to track more detailed information about data values. This can be done
|
||
by associating flow labels with data values, as shown in this tutorial. We will first discuss the
|
||
general idea behind flow labels and then show how to use them in practice. Finally, we will give an
|
||
overview of the API involved and provide some pointers to standard queries that use flow labels.</p>
|
||
</section>
|
||
<section id="limitations-of-basic-data-flow-analysis">
|
||
<h2>Limitations of basic data-flow analysis<a class="headerlink" href="#limitations-of-basic-data-flow-analysis" title="Link to this heading">¶</a></h2>
|
||
<p>In many applications we are interested in tracking more than just the reachability information provided by inter-procedural data flow analysis.</p>
|
||
<p>For example, when tracking object values that originate from untrusted input, we might want to
|
||
remember whether the entire object is tainted or whether only part of it is tainted. The former
|
||
happens, for example, when parsing a user-controlled string as JSON, meaning that the entire
|
||
resulting object is tainted. A typical example of the latter is assigning a tainted value to a
|
||
property of an object, which only taints that property but not the rest of the object.</p>
|
||
<p>While reading a property of a completely tainted object yields a tainted value, reading a property
|
||
of a partially tainted object does not. On the other hand, JSON-encoding even a partially tainted
|
||
object and including it in an HTML document is not safe.</p>
|
||
<p>Another example where more fine-grained information about tainted values is needed is for tracking
|
||
partial sanitization. For example, before interpreting a user-controlled string as a file-system
|
||
path, we generally want to make sure that it is neither an absolute path (which could refer to any
|
||
file on the file system) nor a relative path containing <code class="docutils literal notranslate"><span class="pre">..</span></code> components (which still could refer
|
||
to any file). Usually, checking both of these properties would involve two separate checks. Both
|
||
checks taken together should count as a sanitizer, but each individual check is not by itself enough
|
||
to make the string safe for use as a path. To handle this case precisely, we want to associate two
|
||
bits of information with each tainted value, namely whether it may be absolute, and whether it may
|
||
contain <code class="docutils literal notranslate"><span class="pre">..</span></code> components. Untrusted user input has both bits set initially, individual checks turn
|
||
off individual bits, and if a value that has at least one bit set is interpreted as a path, a
|
||
potential vulnerability is flagged.</p>
|
||
</section>
|
||
<section id="using-flow-labels">
|
||
<h2>Using flow labels<a class="headerlink" href="#using-flow-labels" title="Link to this heading">¶</a></h2>
|
||
<p>You can handle these cases and others like them by associating a set of <cite>flow labels</cite> (sometimes
|
||
also referred to as <cite>taint kinds</cite>) with each value being tracked by the analysis. Value-preserving
|
||
data-flow steps (such as flow steps from writes to a variable to its reads) preserve the set of flow
|
||
labels, but other steps may add or remove flow labels. Sanitizers, in particular, are simply flow
|
||
steps that remove some or all flow labels. The initial set of flow labels for a value is determined
|
||
by the source node that gives rise to it. Similarly, sink nodes can specify that an incoming value
|
||
needs to have a certain flow label (or one of a set of flow labels) in order for the flow to be
|
||
flagged as a potential vulnerability.</p>
|
||
</section>
|
||
<section id="example">
|
||
<h2>Example<a class="headerlink" href="#example" title="Link to this heading">¶</a></h2>
|
||
<p>As an example of using flow labels, we will show how to write a query that flags property accesses
|
||
on JSON values that come from user-controlled input where we have not checked whether the value is
|
||
<code class="docutils literal notranslate"><span class="pre">null</span></code>, so that the property access may cause a runtime exception.</p>
|
||
<p>For example, we would like to flag this code:</p>
|
||
<div class="highlight-javascript notranslate"><div class="highlight"><pre><span></span><span class="kd">var</span><span class="w"> </span><span class="nx">data</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">JSON</span><span class="p">.</span><span class="nx">parse</span><span class="p">(</span><span class="nx">str</span><span class="p">);</span>
|
||
<span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">data</span><span class="p">.</span><span class="nx">length</span><span class="w"> </span><span class="o">></span><span class="w"> </span><span class="mf">0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="c1">// problematic: `data` may be `null`</span>
|
||
<span class="w"> </span><span class="p">...</span>
|
||
<span class="p">}</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This code, on the other hand, should not be flagged:</p>
|
||
<div class="highlight-javascript notranslate"><div class="highlight"><pre><span></span><span class="kd">var</span><span class="w"> </span><span class="nx">data</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">JSON</span><span class="p">.</span><span class="nx">parse</span><span class="p">(</span><span class="nx">str</span><span class="p">);</span>
|
||
<span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">data</span><span class="w"> </span><span class="o">&&</span><span class="w"> </span><span class="nx">data</span><span class="p">.</span><span class="nx">length</span><span class="w"> </span><span class="o">></span><span class="w"> </span><span class="mf">0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="c1">// unproblematic: `data` is first checked for nullness</span>
|
||
<span class="w"> </span><span class="p">...</span>
|
||
<span class="p">}</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>We will first try to write a query to find this kind of problem without flow labels, and use the
|
||
difficulties we encounter as a motivation for bringing flow labels into play, which will make the
|
||
query much easier to implement.</p>
|
||
<p>To get started, let’s write a query that simply flags any flow from <code class="docutils literal notranslate"><span class="pre">JSON.parse</span></code> into the base of
|
||
a property access:</p>
|
||
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>import javascript
|
||
|
||
class JsonTrackingConfig extends DataFlow::Configuration {
|
||
JsonTrackingConfig() { this = "JsonTrackingConfig" }
|
||
|
||
override predicate isSource(DataFlow::Node nd) {
|
||
exists(JsonParserCall jpc |
|
||
nd = jpc.getOutput()
|
||
)
|
||
}
|
||
|
||
override predicate isSink(DataFlow::Node nd) {
|
||
exists(DataFlow::PropRef pr |
|
||
nd = pr.getBase()
|
||
)
|
||
}
|
||
}
|
||
|
||
from JsonTrackingConfig cfg, DataFlow::Node source, DataFlow::Node sink
|
||
where cfg.hasFlow(source, sink)
|
||
select sink, "Property access on JSON value originating $@.", source, "here"
|
||
</pre></div>
|
||
</div>
|
||
<p>Note that we use the <code class="docutils literal notranslate"><span class="pre">JsonParserCall</span></code> class from the standard library to model various JSON
|
||
parsers, including the standard <code class="docutils literal notranslate"><span class="pre">JSON.parse</span></code> API as well as a number of popular npm packages.</p>
|
||
<p>Of course, as written this query flags both the good and the bad example above, since we have not
|
||
introduced any sanitizers yet.</p>
|
||
<p>There are many ways of checking for nullness directly or indirectly. Since this is not the main
|
||
focus of this tutorial, we will only show how to model one specific case: if some variable <code class="docutils literal notranslate"><span class="pre">v</span></code> is
|
||
known to be truthy, it cannot be <code class="docutils literal notranslate"><span class="pre">null</span></code>. This kind of condition is easily expressed using a
|
||
<code class="docutils literal notranslate"><span class="pre">BarrierGuardNode</span></code> (or its counterpart <code class="docutils literal notranslate"><span class="pre">SanitizerGuardNode</span></code> for taint-tracking configurations).
|
||
A barrier guard node is a data-flow node <code class="docutils literal notranslate"><span class="pre">b</span></code> that blocks flow through some other node <code class="docutils literal notranslate"><span class="pre">nd</span></code>,
|
||
provided that some condition checked at <code class="docutils literal notranslate"><span class="pre">b</span></code> is known to hold, that is, evaluate to a truthy value.</p>
|
||
<p>In our case, the barrier guard node is a use of some variable <code class="docutils literal notranslate"><span class="pre">v</span></code>, and the condition is that use
|
||
itself: it blocks flow through any use of <code class="docutils literal notranslate"><span class="pre">v</span></code> where the guarding use is known to evaluate to a
|
||
truthy value. In our second example above, the use of <code class="docutils literal notranslate"><span class="pre">data</span></code> on the left-hand side of the <code class="docutils literal notranslate"><span class="pre">&&</span></code>
|
||
is a barrier guard blocking flow through the use of <code class="docutils literal notranslate"><span class="pre">data</span></code> on the right-hand side of the <code class="docutils literal notranslate"><span class="pre">&&</span></code>.
|
||
At this point we know that <code class="docutils literal notranslate"><span class="pre">data</span></code> has evaluated to a truthy value, so it cannot be <code class="docutils literal notranslate"><span class="pre">null</span></code>
|
||
anymore.</p>
|
||
<p>Implementing this additional condition is easy. We implement a subclass of <code class="docutils literal notranslate"><span class="pre">DataFlow::BarrierGuardNode</span></code>:</p>
|
||
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>class TruthinessCheck extends DataFlow::BarrierGuardNode, DataFlow::ValueNode {
|
||
SsaVariable v;
|
||
|
||
TruthinessCheck() {
|
||
astNode = v.getAUse()
|
||
}
|
||
|
||
override predicate blocks(boolean outcome, Expr e) {
|
||
outcome = true and
|
||
e = astNode
|
||
}
|
||
}
|
||
</pre></div>
|
||
</div>
|
||
<p>and then use it to override predicate <code class="docutils literal notranslate"><span class="pre">isBarrierGuard</span></code> in our configuration class:</p>
|
||
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>override predicate isBarrierGuard(DataFlow::BarrierGuardNode guard) {
|
||
guard instanceof TruthinessCheck
|
||
}
|
||
</pre></div>
|
||
</div>
|
||
<p>With this change, we now flag the problematic case and don’t flag the unproblematic case above.</p>
|
||
<p>However, as it stands our analysis has many false negatives: if we read a property of a JSON object,
|
||
our analysis will not continue tracking it, so property accesses on the resulting value will not be
|
||
checked for null-guardedness:</p>
|
||
<div class="highlight-javascript notranslate"><div class="highlight"><pre><span></span><span class="kd">var</span><span class="w"> </span><span class="nx">root</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">JSON</span><span class="p">.</span><span class="nx">parse</span><span class="p">(</span><span class="nx">str</span><span class="p">);</span>
|
||
<span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">root</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="kd">var</span><span class="w"> </span><span class="nx">payload</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nx">root</span><span class="p">.</span><span class="nx">data</span><span class="p">;</span><span class="w"> </span><span class="c1">// unproblematic: `root` cannot be `null` here</span>
|
||
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="nx">payload</span><span class="p">.</span><span class="nx">length</span><span class="w"> </span><span class="o">></span><span class="w"> </span><span class="mf">0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="c1">// problematic: `payload` may be `null` here</span>
|
||
<span class="w"> </span><span class="p">...</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="p">}</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>We could try to remedy the situation by overriding <code class="docutils literal notranslate"><span class="pre">isAdditionalFlowStep</span></code> in our configuration class to track values through property reads:</p>
|
||
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
|
||
succ.(DataFlow::PropRead).getBase() = pred
|
||
}
|
||
</pre></div>
|
||
</div>
|
||
<p>But this does not actually allow us to flag the problem above as once we have checked <code class="docutils literal notranslate"><span class="pre">root</span></code> for
|
||
truthiness, all further uses are considered to be sanitized. In particular, the reference to
|
||
<code class="docutils literal notranslate"><span class="pre">root</span></code> in <code class="docutils literal notranslate"><span class="pre">root.data</span></code> is sanitized, so no flow tracking through the property read happens.</p>
|
||
<p>The problem is, of course, that our sanitizer sanitizes too much. It should not stop flow
|
||
altogether, it should simply record the fact that <code class="docutils literal notranslate"><span class="pre">root</span></code> itself is known to be non-null.
|
||
Any property read from <code class="docutils literal notranslate"><span class="pre">root</span></code>, on the other hand, may well be null and needs to be checked
|
||
separately.</p>
|
||
<p>We can achieve this by introducing two different flow labels, <code class="docutils literal notranslate"><span class="pre">json</span></code> and <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code>. The former
|
||
means that the value we are dealing with comes from a JSON object, the latter that it may be
|
||
<code class="docutils literal notranslate"><span class="pre">null</span></code>. The result of any call to <code class="docutils literal notranslate"><span class="pre">JSON.parse</span></code> has both labels. A property read from a value
|
||
with label <code class="docutils literal notranslate"><span class="pre">json</span></code> also has both labels. Checking truthiness removes the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label.
|
||
Accessing a property on a value that has the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label should be flagged.</p>
|
||
<p>To implement this, we start by defining two new subclasses of the class <code class="docutils literal notranslate"><span class="pre">DataFlow::FlowLabel</span></code>:</p>
|
||
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>class JsonLabel extends DataFlow::FlowLabel {
|
||
JsonLabel() {
|
||
this = "json"
|
||
}
|
||
}
|
||
|
||
class MaybeNullLabel extends DataFlow::FlowLabel {
|
||
MaybeNullLabel() {
|
||
this = "maybe-null"
|
||
}
|
||
}
|
||
</pre></div>
|
||
</div>
|
||
<p>Then we extend our <code class="docutils literal notranslate"><span class="pre">isSource</span></code> predicate from above to track flow labels by overriding the two-argument version instead of the one-argument version:</p>
|
||
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>override predicate isSource(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
|
||
exists(JsonParserCall jpc |
|
||
nd = jpc.getOutput() and
|
||
(lbl instanceof JsonLabel or lbl instanceof MaybeNullLabel)
|
||
)
|
||
}
|
||
</pre></div>
|
||
</div>
|
||
<p>Similarly, we make <code class="docutils literal notranslate"><span class="pre">isSink</span></code> flow-label aware and require the base of the property read to have the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label:</p>
|
||
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>override predicate isSink(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
|
||
exists(DataFlow::PropRef pr |
|
||
nd = pr.getBase() and
|
||
lbl instanceof MaybeNullLabel
|
||
)
|
||
}
|
||
</pre></div>
|
||
</div>
|
||
<p>Our overriding definition of <code class="docutils literal notranslate"><span class="pre">isAdditionalFlowStep</span></code> now needs to specify two flow labels, a
|
||
predecessor label <code class="docutils literal notranslate"><span class="pre">predlbl</span></code> and a successor label <code class="docutils literal notranslate"><span class="pre">succlbl</span></code>. In addition to specifying flow from
|
||
the predecessor node <code class="docutils literal notranslate"><span class="pre">pred</span></code> to the successor node <code class="docutils literal notranslate"><span class="pre">succ</span></code>, it requires that <code class="docutils literal notranslate"><span class="pre">pred</span></code> has label
|
||
<code class="docutils literal notranslate"><span class="pre">predlbl</span></code>, and adds label <code class="docutils literal notranslate"><span class="pre">succlbl</span></code> to <code class="docutils literal notranslate"><span class="pre">succ</span></code>. In our case, we use this to add both the
|
||
<code class="docutils literal notranslate"><span class="pre">json</span></code> label and the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label to any property read from a value labeled with <code class="docutils literal notranslate"><span class="pre">json</span></code>
|
||
(no matter whether it has the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label):</p>
|
||
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ,
|
||
DataFlow::FlowLabel predlbl, DataFlow::FlowLabel succlbl) {
|
||
succ.(DataFlow::PropRead).getBase() = pred and
|
||
predlbl instanceof JsonLabel and
|
||
(succlbl instanceof JsonLabel or succlbl instanceof MaybeNullLabel)
|
||
}
|
||
</pre></div>
|
||
</div>
|
||
<p>Finally, we turn <code class="docutils literal notranslate"><span class="pre">TruthinessCheck</span></code> from a <code class="docutils literal notranslate"><span class="pre">BarrierGuardNode</span></code> into a <code class="docutils literal notranslate"><span class="pre">LabeledBarrierGuardNode</span></code>,
|
||
specifying that it only removes the <code class="docutils literal notranslate"><span class="pre">maybe-null</span></code> label (but not the <code class="docutils literal notranslate"><span class="pre">json</span></code> label) from the
|
||
sanitized value:</p>
|
||
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>class TruthinessCheck extends DataFlow::LabeledBarrierGuardNode, DataFlow::ValueNode {
|
||
...
|
||
|
||
override predicate blocks(boolean outcome, Expr e, DataFlow::FlowLabel lbl) {
|
||
outcome = true and
|
||
e = astNode and
|
||
lbl instanceof MaybeNullLabel
|
||
}
|
||
}
|
||
</pre></div>
|
||
</div>
|
||
<p>Here is the final query, expressed as a <a class="reference internal" href="../writing-codeql-queries/creating-path-queries.html#creating-path-queries"><span class="std std-ref">path query</span></a> so we can examine paths from sources to sinks
|
||
step by step in the UI:</p>
|
||
<div class="highlight-ql notranslate"><div class="highlight"><pre><span></span>/** @kind path-problem */
|
||
|
||
import javascript
|
||
import DataFlow::PathGraph
|
||
|
||
class JsonLabel extends DataFlow::FlowLabel {
|
||
JsonLabel() {
|
||
this = "json"
|
||
}
|
||
}
|
||
|
||
class MaybeNullLabel extends DataFlow::FlowLabel {
|
||
MaybeNullLabel() {
|
||
this = "maybe-null"
|
||
}
|
||
}
|
||
|
||
class TruthinessCheck extends DataFlow::LabeledBarrierGuardNode, DataFlow::ValueNode {
|
||
SsaVariable v;
|
||
|
||
TruthinessCheck() {
|
||
astNode = v.getAUse()
|
||
}
|
||
|
||
override predicate blocks(boolean outcome, Expr e, DataFlow::FlowLabel lbl) {
|
||
outcome = true and
|
||
e = astNode and
|
||
lbl instanceof MaybeNullLabel
|
||
}
|
||
}
|
||
|
||
class JsonTrackingConfig extends DataFlow::Configuration {
|
||
JsonTrackingConfig() { this = "JsonTrackingConfig" }
|
||
|
||
override predicate isSource(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
|
||
exists(JsonParserCall jpc |
|
||
nd = jpc.getOutput() and
|
||
(lbl instanceof JsonLabel or lbl instanceof MaybeNullLabel)
|
||
)
|
||
}
|
||
|
||
override predicate isSink(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
|
||
exists(DataFlow::PropRef pr |
|
||
nd = pr.getBase() and
|
||
lbl instanceof MaybeNullLabel
|
||
)
|
||
}
|
||
|
||
override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ,
|
||
DataFlow::FlowLabel predlbl, DataFlow::FlowLabel succlbl) {
|
||
succ.(DataFlow::PropRead).getBase() = pred and
|
||
predlbl instanceof JsonLabel and
|
||
(succlbl instanceof JsonLabel or succlbl instanceof MaybeNullLabel)
|
||
}
|
||
|
||
override predicate isBarrierGuard(DataFlow::BarrierGuardNode guard) {
|
||
guard instanceof TruthinessCheck
|
||
}
|
||
}
|
||
|
||
from JsonTrackingConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink
|
||
where cfg.hasFlowPath(source, sink)
|
||
select sink, source, sink, "Property access on JSON value originating $@.", source, "here"
|
||
</pre></div>
|
||
</div>
|
||
<p><a class="reference external" href="https://lgtm.com/query/5347702611074820306">Here</a> is a run of this query on the <a class="reference external" href="https://lgtm.com/projects/g/finos-plexus/plexus-interop/">plexus-interop</a> project on LGTM.com. Many of the 19
|
||
results are false positives since we currently do not model many ways in which a value can be
|
||
checked for nullness. In particular, after a property reference <code class="docutils literal notranslate"><span class="pre">x.p</span></code> we implicitly know that
|
||
<code class="docutils literal notranslate"><span class="pre">x</span></code> cannot be null anymore, since otherwise the reference would have thrown an exception.
|
||
Modeling this would allow us to get rid of most of the false positives, but is beyond the scope of
|
||
this tutorial.</p>
|
||
</section>
|
||
<section id="api">
|
||
<h2>API<a class="headerlink" href="#api" title="Link to this heading">¶</a></h2>
|
||
<p>Plain data-flow configurations implicitly use a single flow label “data”, which indicates that a
|
||
data value originated from a source. You can use the predicate <code class="docutils literal notranslate"><span class="pre">DataFlow::FlowLabel::data()</span></code>,
|
||
which returns this flow label, as a symbolic name for it.</p>
|
||
<p>Taint-tracking configurations add a second flow label “taint” (<code class="docutils literal notranslate"><span class="pre">DataFlow::FlowLabel::taint()</span></code>),
|
||
which is similar to “data”, but includes values that have passed through non-value preserving steps
|
||
such as string operations.</p>
|
||
<p>Each of the three member predicates <code class="docutils literal notranslate"><span class="pre">isSource</span></code>, <code class="docutils literal notranslate"><span class="pre">isSink</span></code> and
|
||
<code class="docutils literal notranslate"><span class="pre">isAdditionalFlowStep</span></code>/<code class="docutils literal notranslate"><span class="pre">isAdditionalTaintStep</span></code> has one version that uses the default flow
|
||
labels, and one version that allows specifying custom flow labels through additional arguments.</p>
|
||
<p>For <code class="docutils literal notranslate"><span class="pre">isSource</span></code>, there is one additional argument specifying which flow label(s) should be
|
||
associated with values originating from this source. If multiple flow labels are specified, each
|
||
value is associated with <cite>all</cite> of them.</p>
|
||
<p>For <code class="docutils literal notranslate"><span class="pre">isSink</span></code>, the additional argument specifies which flow label(s) a value that flows into this
|
||
source may be associated with. If multiple flow labels are specified, then any value that is
|
||
associated with <cite>at least one</cite> of them will be considered by the configuration.</p>
|
||
<p>For <code class="docutils literal notranslate"><span class="pre">isAdditionalFlowStep</span></code> there are two additional arguments <code class="docutils literal notranslate"><span class="pre">predlbl</span></code> and <code class="docutils literal notranslate"><span class="pre">succlbl</span></code>, which
|
||
allow flow steps to act as flow label transformers. If a value associated with <code class="docutils literal notranslate"><span class="pre">predlbl</span></code> arrives
|
||
at the start node of the additional step, it is propagated to the end node and associated with
|
||
<code class="docutils literal notranslate"><span class="pre">succlbl</span></code>. Of course, <code class="docutils literal notranslate"><span class="pre">predlbl</span></code> and <code class="docutils literal notranslate"><span class="pre">succlbl</span></code> may be the same, indicating that the flow step
|
||
preserves this label. There can also be multiple values of <code class="docutils literal notranslate"><span class="pre">succlbl</span></code> for a single <code class="docutils literal notranslate"><span class="pre">predlbl</span></code> or
|
||
vice versa.</p>
|
||
<p>Note that if you do not restrict <code class="docutils literal notranslate"><span class="pre">succlbl</span></code> then it will be allowed to range over all flow labels.
|
||
This may cause labels that were previously blocked on a path to reappear, which is not usually what
|
||
you want.</p>
|
||
<p>The flow label-aware version of <code class="docutils literal notranslate"><span class="pre">isBarrier</span></code> is called <code class="docutils literal notranslate"><span class="pre">isLabeledBarrier</span></code>: unlike <code class="docutils literal notranslate"><span class="pre">isBarrier</span></code>,
|
||
which prevents any flow past the given node, it only blocks flow of values associated with one of
|
||
the specified flow labels.</p>
|
||
</section>
|
||
<section id="standard-queries-using-flow-labels">
|
||
<h2>Standard queries using flow labels<a class="headerlink" href="#standard-queries-using-flow-labels" title="Link to this heading">¶</a></h2>
|
||
<p>Some of our standard security queries use flow labels. You can look at their implementation
|
||
to get a feeling for how to use flow labels in practice.</p>
|
||
<p>In particular, both of the examples mentioned in the section on limitations of basic data flow above
|
||
are from standard security queries that use flow labels. The <a class="reference external" href="https://lgtm.com/rules/1508857356317">Prototype pollution</a> query uses two flow labels to distinguish completely
|
||
tainted objects from partially tainted objects. The <a class="reference external" href="https://lgtm.com/rules/1971530250">Uncontrolled data used in path expression</a> query uses four flow labels to track whether a user-controlled
|
||
string may be an absolute path and whether it may contain <code class="docutils literal notranslate"><span class="pre">..</span></code> components.</p>
|
||
</section>
|
||
<section id="further-reading">
|
||
<h2>Further reading<a class="headerlink" href="#further-reading" title="Link to this heading">¶</a></h2>
|
||
<ul class="simple">
|
||
<li><p>“<a class="reference internal" href="../codeql-for-visual-studio-code/exploring-data-flow-with-path-queries.html#exploring-data-flow-with-path-queries"><span class="std std-ref">Exploring data flow with path queries</span></a>”</p></li>
|
||
</ul>
|
||
<ul class="simple">
|
||
<li><p><a class="reference external" href="https://github.com/github/codeql/tree/main/javascript/ql/src">CodeQL queries for JavaScript</a></p></li>
|
||
<li><p><a class="reference external" href="https://github.com/github/codeql/tree/main/javascript/ql/examples">Example queries for JavaScript</a></p></li>
|
||
<li><p><a class="reference external" href="https://codeql.github.com/codeql-standard-libraries/javascript/">CodeQL library reference for JavaScript</a></p></li>
|
||
</ul>
|
||
<ul class="simple">
|
||
<li><p>“<a class="reference internal" href="../ql-language-reference/index.html#ql-language-reference"><span class="std std-ref">QL language reference</span></a>”</p></li>
|
||
<li><p>“<a class="reference internal" href="../codeql-overview/codeql-tools.html#codeql-tools"><span class="std std-ref">CodeQL tools</span></a>”</p></li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
|
||
|
||
</article>
|
||
|
||
<!-- GitHub footer, with links to terms and privacy statement -->
|
||
<div class="px-3 px-md-6 f6 py-4 d-sm-flex flex-justify-between flex-row-reverse flex-items-center border-top">
|
||
<ul class="list-style-none d-flex flex-items-center mb-3 mb-sm-0 lh-condensed-ultra">
|
||
<li class="mr-3">
|
||
<a href="https://twitter.com/github" title="GitHub on Twitter" style="color: #959da5;">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 273.5 222.3" class="d-block" height="18">
|
||
<path
|
||
d="M273.5 26.3a109.77 109.77 0 0 1-32.2 8.8 56.07 56.07 0 0 0 24.7-31 113.39 113.39 0 0 1-35.7 13.6 56.1 56.1 0 0 0-97 38.4 54 54 0 0 0 1.5 12.8A159.68 159.68 0 0 1 19.1 10.3a56.12 56.12 0 0 0 17.4 74.9 56.06 56.06 0 0 1-25.4-7v.7a56.11 56.11 0 0 0 45 55 55.65 55.65 0 0 1-14.8 2 62.39 62.39 0 0 1-10.6-1 56.24 56.24 0 0 0 52.4 39 112.87 112.87 0 0 1-69.7 24 119 119 0 0 1-13.4-.8 158.83 158.83 0 0 0 86 25.2c103.2 0 159.6-85.5 159.6-159.6 0-2.4-.1-4.9-.2-7.3a114.25 114.25 0 0 0 28.1-29.1"
|
||
fill="currentColor"></path>
|
||
</svg>
|
||
</a>
|
||
</li>
|
||
<li class="mr-3">
|
||
<a href="https://www.facebook.com/GitHub" title="GitHub on Facebook" style="color: #959da5;">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 15.3 15.4" class="d-block" height="18">
|
||
<path
|
||
d="M14.5 0H.8a.88.88 0 0 0-.8.9v13.6a.88.88 0 0 0 .8.9h7.3v-6h-2V7.1h2V5.4a2.87 2.87 0 0 1 2.5-3.1h.5a10.87 10.87 0 0 1 1.8.1v2.1h-1.3c-1 0-1.1.5-1.1 1.1v1.5h2.3l-.3 2.3h-2v5.9h3.9a.88.88 0 0 0 .9-.8V.8a.86.86 0 0 0-.8-.8z"
|
||
fill="currentColor"></path>
|
||
</svg>
|
||
</a>
|
||
</li>
|
||
<li class="mr-3">
|
||
<a href="https://www.youtube.com/github" title="GitHub on YouTube" style="color: #959da5;">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 19.17 13.6" class="d-block" height="16">
|
||
<path
|
||
d="M18.77 2.13A2.4 2.4 0 0 0 17.09.42C15.59 0 9.58 0 9.58 0a57.55 57.55 0 0 0-7.5.4A2.49 2.49 0 0 0 .39 2.13 26.27 26.27 0 0 0 0 6.8a26.15 26.15 0 0 0 .39 4.67 2.43 2.43 0 0 0 1.69 1.71c1.52.42 7.5.42 7.5.42a57.69 57.69 0 0 0 7.51-.4 2.4 2.4 0 0 0 1.68-1.71 25.63 25.63 0 0 0 .4-4.67 24 24 0 0 0-.4-4.69zM7.67 9.71V3.89l5 2.91z"
|
||
fill="currentColor"></path>
|
||
</svg>
|
||
</a>
|
||
</li>
|
||
<li class="mr-3 flex-self-start">
|
||
<a href="https://www.linkedin.com/company/github" title="GitHub on Linkedin" style="color: #959da5;">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 19 18" class="d-block" height="18">
|
||
<path
|
||
d="M3.94 2A2 2 0 1 1 2 0a2 2 0 0 1 1.94 2zM4 5.48H0V18h4zm6.32 0H6.34V18h3.94v-6.57c0-3.66 4.77-4 4.77 0V18H19v-7.93c0-6.17-7.06-5.94-8.72-2.91z"
|
||
fill="currentColor"></path>
|
||
</svg>
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/github" title="GitHub's organization" style="color: #959da5;">
|
||
<svg version="1.1" width="20" height="20" viewBox="0 0 16 16" class="octicon octicon-mark-github"
|
||
aria-hidden="true">
|
||
<path fill-rule="evenodd"
|
||
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z">
|
||
</path>
|
||
</svg>
|
||
</a>
|
||
</li>
|
||
</ul>
|
||
<ul class="list-style-none d-flex text-gray">
|
||
<li class="mr-3">©
|
||
<script type="text/javascript">document.write(new Date().getFullYear());</script> GitHub, Inc.</li>
|
||
<li class="mr-3"><a
|
||
href="https://docs.github.com/github/site-policy/github-terms-of-service"
|
||
class="link-gray">Terms </a></li>
|
||
<li><a href="https://docs.github.com/github/site-policy/github-privacy-statement"
|
||
class="link-gray">Privacy </a></li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</main>
|
||
|
||
<script type="text/javascript">
|
||
$(document).ready(function () {
|
||
$(".toggle > *").hide();
|
||
$(".toggle .name").show();
|
||
$(".toggle .name").click(function () {
|
||
$(this).parent().children().not(".name").toggle(400);
|
||
$(this).parent().children(".name").toggleClass("open");
|
||
})
|
||
});
|
||
</script>
|
||
|
||
</body>
|
||
</html> |