Merge branch 'main' of github.com:github/codeql into 'main'

Conflicts:
	docs/codeql/query-help/codeql-cwe-coverage.rst
This commit is contained in:
Arthur Baars
2021-10-22 21:51:25 +02:00
51 changed files with 2761 additions and 150 deletions

View File

@@ -5,5 +5,7 @@
"cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml",
"*/ql/examples/qlpack.yml",
"*/upgrades/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml",
"misc/legacy-support/*/qlpack.yml",
"misc/suite-helpers/qlpack.yml" ] }

View File

@@ -126,13 +126,7 @@ class MallocSizeExpr extends BufferAccess, FunctionCall {
}
class NetworkFunctionCall extends FunctionCall {
NetworkFunctionCall() {
getTarget().hasName("ntohd") or
getTarget().hasName("ntohf") or
getTarget().hasName("ntohl") or
getTarget().hasName("ntohll") or
getTarget().hasName("ntohs")
}
NetworkFunctionCall() { getTarget().hasName(["ntohd", "ntohf", "ntohl", "ntohll", "ntohs"]) }
}
class NetworkToBufferSizeConfiguration extends DataFlow::Configuration {

View File

@@ -103,12 +103,7 @@ private predicate posixSystemInfo(FunctionCall source, Element use) {
// - various filesystem parameters
// int uname(struct utsname *buf)
// - OS name and version
(
source.getTarget().hasName("confstr") or
source.getTarget().hasName("statvfs") or
source.getTarget().hasName("fstatvfs") or
source.getTarget().hasName("uname")
) and
source.getTarget().hasName(["confstr", "statvfs", "fstatvfs", "uname"]) and
use = source.getArgument(1)
}
@@ -128,14 +123,9 @@ private predicate posixPWInfo(FunctionCall source, Element use) {
// struct group *getgrnam(const char *name);
// struct group *getgrgid(gid_t);
// struct group *getgrent(void);
(
source.getTarget().hasName("getpwnam") or
source.getTarget().hasName("getpwuid") or
source.getTarget().hasName("getpwent") or
source.getTarget().hasName("getgrnam") or
source.getTarget().hasName("getgrgid") or
source.getTarget().hasName("getgrent")
) and
source
.getTarget()
.hasName(["getpwnam", "getpwuid", "getpwent", "getgrnam", "getgrgid", "getgrent"]) and
use = source
or
// int getpwnam_r(const char *name, struct passwd *pwd,
@@ -146,31 +136,15 @@ private predicate posixPWInfo(FunctionCall source, Element use) {
// char *buf, size_t buflen, struct group **result);
// int getgrnam_r(const char *name, struct group *grp,
// char *buf, size_t buflen, struct group **result);
(
source.getTarget().hasName("getpwnam_r") or
source.getTarget().hasName("getpwuid_r") or
source.getTarget().hasName("getgrgid_r") or
source.getTarget().hasName("getgrnam_r")
) and
(
use = source.getArgument(1) or
use = source.getArgument(2) or
use = source.getArgument(4)
)
source.getTarget().hasName(["getpwnam_r", "getpwuid_r", "getgrgid_r", "getgrnam_r"]) and
use = source.getArgument([1, 2, 4])
or
// int getpwent_r(struct passwd *pwd, char *buffer, size_t bufsize,
// struct passwd **result);
// int getgrent_r(struct group *gbuf, char *buf,
// size_t buflen, struct group **gbufp);
(
source.getTarget().hasName("getpwent_r") or
source.getTarget().hasName("getgrent_r")
) and
(
use = source.getArgument(0) or
use = source.getArgument(1) or
use = source.getArgument(3)
)
source.getTarget().hasName(["getpwent_r", "getgrent_r"]) and
use = source.getArgument([0, 1, 3])
}
/**
@@ -190,13 +164,11 @@ private predicate windowsSystemInfo(FunctionCall source, Element use) {
// BOOL WINAPI GetVersionEx(_Inout_ LPOSVERSIONINFO lpVersionInfo);
// void WINAPI GetSystemInfo(_Out_ LPSYSTEM_INFO lpSystemInfo);
// void WINAPI GetNativeSystemInfo(_Out_ LPSYSTEM_INFO lpSystemInfo);
(
source.getTarget().hasGlobalName("GetVersionEx") or
source.getTarget().hasGlobalName("GetVersionExA") or
source.getTarget().hasGlobalName("GetVersionExW") or
source.getTarget().hasGlobalName("GetSystemInfo") or
source.getTarget().hasGlobalName("GetNativeSystemInfo")
) and
source
.getTarget()
.hasGlobalName([
"GetVersionEx", "GetVersionExA", "GetVersionExW", "GetSystemInfo", "GetNativeSystemInfo"
]) and
use = source.getArgument(0)
}
@@ -216,11 +188,11 @@ private predicate windowsFolderPath(FunctionCall source, Element use) {
// _In_ int csidl,
// _In_ BOOL fCreate
// );
(
source.getTarget().hasGlobalName("SHGetSpecialFolderPath") or
source.getTarget().hasGlobalName("SHGetSpecialFolderPathA") or
source.getTarget().hasGlobalName("SHGetSpecialFolderPathW")
) and
source
.getTarget()
.hasGlobalName([
"SHGetSpecialFolderPath", "SHGetSpecialFolderPathA", "SHGetSpecialFolderPathW"
]) and
use = source.getArgument(1)
or
// HRESULT SHGetKnownFolderPath(
@@ -239,11 +211,7 @@ private predicate windowsFolderPath(FunctionCall source, Element use) {
// _In_ DWORD dwFlags,
// _Out_ LPTSTR pszPath
// );
(
source.getTarget().hasGlobalName("SHGetFolderPath") or
source.getTarget().hasGlobalName("SHGetFolderPathA") or
source.getTarget().hasGlobalName("SHGetFolderPathW")
) and
source.getTarget().hasGlobalName(["SHGetFolderPath", "SHGetFolderPathA", "SHGetFolderPathW"]) and
use = source.getArgument(4)
or
// HRESULT SHGetFolderPathAndSubDir(
@@ -254,11 +222,11 @@ private predicate windowsFolderPath(FunctionCall source, Element use) {
// _In_ LPCTSTR pszSubDir,
// _Out_ LPTSTR pszPath
// );
(
source.getTarget().hasGlobalName("SHGetFolderPathAndSubDir") or
source.getTarget().hasGlobalName("SHGetFolderPathAndSubDirA") or
source.getTarget().hasGlobalName("SHGetFolderPathAndSubDirW")
) and
source
.getTarget()
.hasGlobalName([
"SHGetFolderPathAndSubDir", "SHGetFolderPathAndSubDirA", "SHGetFolderPathAndSubDirW"
]) and
use = source.getArgument(5)
}
@@ -273,11 +241,7 @@ class WindowsFolderPath extends SystemData {
}
private predicate logonUser(FunctionCall source, VariableAccess use) {
(
source.getTarget().hasGlobalName("LogonUser") or
source.getTarget().hasGlobalName("LogonUserW") or
source.getTarget().hasGlobalName("LogonUserA")
) and
source.getTarget().hasGlobalName(["LogonUser", "LogonUserW", "LogonUserA"]) and
use = source.getAnArgument()
}
@@ -297,11 +261,7 @@ private predicate regQuery(FunctionCall source, VariableAccess use) {
// _Out_opt_ LPTSTR lpValue,
// _Inout_opt_ PLONG lpcbValue
// );
(
source.getTarget().hasGlobalName("RegQueryValue") or
source.getTarget().hasGlobalName("RegQueryValueA") or
source.getTarget().hasGlobalName("RegQueryValueW")
) and
source.getTarget().hasGlobalName(["RegQueryValue", "RegQueryValueA", "RegQueryValueW"]) and
use = source.getArgument(2)
or
// LONG WINAPI RegQueryMultipleValues(
@@ -311,11 +271,11 @@ private predicate regQuery(FunctionCall source, VariableAccess use) {
// _Out_opt_ LPTSTR lpValueBuf,
// _Inout_opt_ LPDWORD ldwTotsize
// );
(
source.getTarget().hasGlobalName("RegQueryMultipleValues") or
source.getTarget().hasGlobalName("RegQueryMultipleValuesA") or
source.getTarget().hasGlobalName("RegQueryMultipleValuesW")
) and
source
.getTarget()
.hasGlobalName([
"RegQueryMultipleValues", "RegQueryMultipleValuesA", "RegQueryMultipleValuesW"
]) and
use = source.getArgument(3)
or
// LONG WINAPI RegQueryValueEx(
@@ -326,11 +286,7 @@ private predicate regQuery(FunctionCall source, VariableAccess use) {
// _Out_opt_ LPBYTE lpData,
// _Inout_opt_ LPDWORD lpcbData
// );
(
source.getTarget().hasGlobalName("RegQueryValueEx") or
source.getTarget().hasGlobalName("RegQueryValueExA") or
source.getTarget().hasGlobalName("RegQueryValueExW")
) and
source.getTarget().hasGlobalName(["RegQueryValueEx", "RegQueryValueExA", "RegQueryValueExW"]) and
use = source.getArgument(4)
or
// LONG WINAPI RegGetValue(
@@ -342,11 +298,7 @@ private predicate regQuery(FunctionCall source, VariableAccess use) {
// _Out_opt_ PVOID pvData,
// _Inout_opt_ LPDWORD pcbData
// );
(
source.getTarget().hasGlobalName("RegGetValue") or
source.getTarget().hasGlobalName("RegGetValueA") or
source.getTarget().hasGlobalName("RegGetValueW")
) and
source.getTarget().hasGlobalName(["RegGetValue", "RegGetValueA", "RegGetValueW"]) and
use = source.getArgument(5)
}
@@ -408,12 +360,7 @@ private predicate socketOutput(FunctionCall call, Expr data) {
// const struct sockaddr *dest_addr, socklen_t addrlen);
// ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags);
// int write(int handle, void *buffer, int nbyte);
(
call.getTarget().hasGlobalName("send") or
call.getTarget().hasGlobalName("sendto") or
call.getTarget().hasGlobalName("sendmsg") or
call.getTarget().hasGlobalName("write")
) and
call.getTarget().hasGlobalName(["send", "sendto", "sendmsg", "write"]) and
data = call.getArgument(1) and
socketFileDescriptor(call.getArgument(0))
)

View File

@@ -44,14 +44,13 @@ class SetuidLikeWrapperCall extends FunctionCall {
class CallBeforeSetuidFunctionCall extends FunctionCall {
CallBeforeSetuidFunctionCall() {
(
getTarget().hasGlobalName("setgid") or
getTarget().hasGlobalName("setresgid") or
// Compatibility may require skipping initgroups and setgroups return checks.
// A stricter best practice is to check the result and errnor for EPERM.
getTarget().hasGlobalName("initgroups") or
getTarget().hasGlobalName("setgroups")
) and
getTarget()
.hasGlobalName([
"setgid", "setresgid",
// Compatibility may require skipping initgroups and setgroups return checks.
// A stricter best practice is to check the result and errnor for EPERM.
"initgroups", "setgroups"
]) and
// setgid/setresgid/etc with the root group are false positives.
not argumentMayBeRoot(getArgument(0))
}

View File

@@ -15,13 +15,7 @@ import cpp
from Element u, ArithmeticType at
where
(
at.hasName("int") or
at.hasName("short") or
at.hasName("long") or
at.hasName("float") or
at.hasName("double")
) and
at.hasName(["int", "short", "long", "float", "double"]) and
u = at.getATypeNameUse() and
not at instanceof WideCharType
select u, "AV Rule 209: The basic types of int, short, long, float and double shall not be used."

View File

@@ -552,11 +552,16 @@ private predicate defaultDynamicConversion(Type fromType, Type toType) {
fromType instanceof RefType and toType instanceof DynamicType
}
pragma[noinline]
private predicate systemDelegateBaseType(RefType t) {
t = any(SystemDelegateClass c).getABaseType*()
}
// This is a deliberate, small cartesian product, so we have manually lifted it to force the
// evaluator to evaluate it in its entirety, rather than trying to optimize it in context.
pragma[noinline]
private predicate defaultDelegateConversion(RefType fromType, RefType toType) {
fromType instanceof DelegateType and toType = any(SystemDelegateClass c).getABaseType*()
fromType instanceof DelegateType and systemDelegateBaseType(toType)
}
private predicate convRefTypeRefType(RefType fromType, RefType toType) {

View File

@@ -7,7 +7,7 @@ QL packs are used to organize the files used in CodeQL analysis. They
contain queries, library files, query suites, and important metadata.
The `CodeQL repository <https://github.com/github/codeql>`__ contains QL packs for
C/C++, C#, Java, JavaScript, and Python. The `CodeQL for Go
C/C++, C#, Java, JavaScript, Python, and Ruby. The `CodeQL for Go
<https://github.com/github/codeql-go/>`__ repository contains a QL pack for Go
analysis. You can also make custom QL packs to contain your own queries and
libraries.

View File

@@ -88,15 +88,15 @@ Creating databases for non-compiled languages
---------------------------------------------
The CodeQL CLI includes extractors to create databases for non-compiled
languages---specifically, JavaScript (and TypeScript) and Python. These
extractors are automatically invoked when you specify JavaScript or Python as
languages---specifically, JavaScript (and TypeScript), Python, and Ruby. These
extractors are automatically invoked when you specify JavaScript, Python, or Ruby as
the ``--language`` option when executing ``database create``. When creating
databases for these languages you must ensure that all additional dependencies
are available.
.. pull-quote:: Important
When you run ``database create`` for JavaScript, TypeScript, and Python, you should not
When you run ``database create`` for JavaScript, TypeScript, Python, and Ruby, you should not
specify a ``--command`` option. Otherwise this overrides the normal
extractor invocation, which will create an empty database. If you create
databases for multiple languages and one of them is a compiled language,
@@ -116,6 +116,8 @@ Here, we have specified a ``--source-root`` path, which is the location where
database creation is executed, but is not necessarily the checkout root of the
codebase.
By default, files in ``node_modules`` and ``bower_components`` directories are not extracted.
Python
~~~~~~
@@ -127,14 +129,25 @@ When creating databases for Python you must ensure:
packages that the codebase depends on.
- You have installed the `virtualenv <https://pypi.org/project/virtualenv/>`__ pip module.
In the command line you must specify ``--language=python``. For example
In the command line you must specify ``--language=python``. For example::
::
codeql database create --language=python <output-folder>/python-database
executes the ``database create`` subcommand from the code's checkout root,
This executes the ``database create`` subcommand from the code's checkout root,
generating a new Python database at ``<output-folder>/python-database``.
Ruby
~~~~
Creating databases for Ruby requires no additional dependencies.
In the command line you must specify ``--language=ruby``. For example::
codeql database create --language=ruby --source-root <folder-to-extract> <output-folder>/ruby-database
Here, we have specified a ``--source-root`` path, which is the location where
database creation is executed, but is not necessarily the checkout root of the
codebase.
Creating databases for compiled languages
-----------------------------------------

View File

@@ -100,7 +100,7 @@ further options on the command line.
The `CodeQL repository <https://github.com/github/codeql>`__ contains
the queries and libraries required for CodeQL analysis of C/C++, C#, Java,
JavaScript/TypeScript, and Python.
JavaScript/TypeScript, Python, and Ruby.
Clone a copy of this repository into ``codeql-home``.
By default, the root of the cloned repository will be called ``codeql``.

View File

@@ -78,7 +78,7 @@ Using the starter workspace
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The starter workspace is a Git repository. It contains:
* The `repository of CodeQL libraries and queries <https://github.com/github/codeql>`__ for C/C++, C#, Java, JavaScript, and Python. This is included as a submodule, so it can be updated without affecting your custom queries.
* The `repository of CodeQL libraries and queries <https://github.com/github/codeql>`__ for C/C++, C#, Java, JavaScript, Python, and Ruby. This is included as a submodule, so it can be updated without affecting your custom queries.
* The `repository of CodeQL libraries and queries <https://github.com/github/codeql-go>`__ for Go. This is also included as a submodule.
* A series of folders named ``codeql-custom-queries-<language>``. These are ready for you to start developing your own custom queries for each language, using the standard libraries. There are some example queries to get you started.

View File

@@ -14,3 +14,5 @@ Experiment and learn how to write effective and efficient queries for CodeQL dat
- :doc:`Basic query for Ruby code <basic-query-for-ruby-code>`: Learn to write and run a simple CodeQL query using LGTM.
- :doc:`CodeQL library for Ruby <codeql-library-for-ruby>`: When you're analyzing a Ruby program, you can make use of the large collection of classes in the CodeQL library for Ruby.
.. include:: ../reusables/ruby-beta-note.rst

View File

@@ -1333,7 +1333,7 @@ The values of a set literal expression are all the values of all the contained e
Set literals are supported from release 2.1.0 of the CodeQL CLI, and release 1.24 of LGTM Enterprise.
Since release 2.6.3 of the CodeQL CLI, and release 1.28 of LGTM Enterprise, a trailing comma is allowed in a set literal.
Since release 2.7.0 of the CodeQL CLI, and release 1.28 of LGTM Enterprise, a trailing comma is allowed in a set literal.
Disambiguation of expressions
-----------------------------

View File

@@ -34,3 +34,5 @@ Note that the CWE coverage includes both "`supported queries <https://github.com
javascript-cwe
python-cwe
ruby-cwe
.. include:: ../reusables/ruby-beta-note.rst

View File

@@ -11,7 +11,6 @@ View the query help for the queries included in the ``code-scanning``, ``securit
- :doc:`CodeQL query help for Python <python>`
- :doc:`CodeQL query help for Ruby <ruby>`
.. pull-quote:: Information
Each query help article includes:
@@ -24,6 +23,8 @@ View the query help for the queries included in the ``code-scanning``, ``securit
For a full list of the CWEs covered by these queries, see ":doc:`CodeQL CWE coverage <codeql-cwe-coverage>`."
.. include:: ../reusables/ruby-beta-note.rst
.. toctree::
:hidden:
:titlesonly:

View File

@@ -15,4 +15,6 @@
* - JavaScript/TypeScript
- ``javascript``
* - Python
- ``python``
- ``python``
* - Ruby
- ``ruby``

View File

@@ -0,0 +1,4 @@
.. pull-quote:: Note
CodeQL analysis for Ruby is currently in beta. During the beta, analysis of Ruby code,
and the accompanying documentation, will not be as comprehensive as for other languages.

View File

@@ -22,7 +22,8 @@
Eclipse compiler for Java (ECJ) [5]_",``.java``
JavaScript,ECMAScript 2021 or lower,Not applicable,"``.js``, ``.jsx``, ``.mjs``, ``.es``, ``.es6``, ``.htm``, ``.html``, ``.xhm``, ``.xhtml``, ``.vue``, ``.json``, ``.yaml``, ``.yml``, ``.raml``, ``.xml`` [6]_"
Python,"2.7, 3.5, 3.6, 3.7, 3.8, 3.9",Not applicable,``.py``
TypeScript [7]_,"2.6-4.4",Standard TypeScript compiler,"``.ts``, ``.tsx``"
Ruby [7]_,"up to 3.02",Not applicable,"``.rb``, ``.erb``, ``.gemspec``, ``Gemfile``"
TypeScript [8]_,"2.6-4.4",Standard TypeScript compiler,"``.ts``, ``.tsx``"
.. container:: footnote-group
@@ -32,4 +33,5 @@
.. [4] Builds that execute on Java 7 to 16 can be analyzed. The analysis understands Java 16 standard language features.
.. [5] ECJ is supported when the build invokes it via the Maven Compiler plugin or the Takari Lifecycle plugin.
.. [6] JSX and Flow code, YAML, JSON, HTML, and XML files may also be analyzed with JavaScript files.
.. [7] TypeScript analysis is performed by running the JavaScript extractor with TypeScript enabled. This is the default for LGTM.
.. [7] Requires glibc 2.17.
.. [8] TypeScript analysis is performed by running the JavaScript extractor with TypeScript enabled. This is the default for LGTM.

View File

@@ -116,7 +116,7 @@ Declaring sources and sinks
You must provide information about the ``source`` and ``sink`` in your path query. These are objects that correspond to the nodes of the paths that you are exploring.
The name and the type of the ``source`` and the ``sink`` must be declared in the ``from`` statement of the query, and the types must be compatible with the nodes of the graph computed by the ``edges`` predicate.
If you are querying C/C++, C#, Java, or JavaScript code (and you have used ``import DataFlow::PathGraph`` in your query), the definitions of the ``source`` and ``sink`` are accessed via the ``Configuration`` class in the data flow library. You should declare all three of these objects in the ``from`` statement.
If you are querying C/C++, C#, Java, JavaScript, Python, or Ruby code (and you have used ``import DataFlow::PathGraph`` in your query), the definitions of the ``source`` and ``sink`` are accessed via the ``Configuration`` class in the data flow library. You should declare all three of these objects in the ``from`` statement.
For example:
.. code-block:: ql

View File

@@ -104,7 +104,9 @@ private class ContainerFlowSummaries extends SummaryModelCsv {
"java.util;Map$Entry;true;setValue;;;Argument[0];MapValue of Argument[-1];value",
"java.lang;Iterable;true;iterator;();;Element of Argument[-1];Element of ReturnValue;value",
"java.lang;Iterable;true;spliterator;();;Element of Argument[-1];Element of ReturnValue;value",
"java.lang;Iterable;true;forEach;(Consumer);;Element of Argument[-1];Parameter[0] of Argument[0];value",
"java.util;Iterator;true;next;;;Element of Argument[-1];ReturnValue;value",
"java.util;Iterator;true;forEachRemaining;(Consumer);;Element of Argument[-1];Parameter[0] of Argument[0];value",
"java.util;ListIterator;true;previous;;;Element of Argument[-1];ReturnValue;value",
"java.util;ListIterator;true;add;(Object);;Argument[0];Element of Argument[-1];value",
"java.util;ListIterator;true;set;(Object);;Argument[0];Element of Argument[-1];value",
@@ -135,6 +137,8 @@ private class ContainerFlowSummaries extends SummaryModelCsv {
"java.util;Map;true;merge;(Object,Object,BiFunction);;Argument[1];MapValue of Argument[-1];value",
"java.util;Map;true;putAll;(Map);;MapKey of Argument[0];MapKey of Argument[-1];value",
"java.util;Map;true;putAll;(Map);;MapValue of Argument[0];MapValue of Argument[-1];value",
"java.util;Map;true;forEach;(BiConsumer);;MapKey of Argument[-1];Parameter[0] of Argument[0];value",
"java.util;Map;true;forEach;(BiConsumer);;MapValue of Argument[-1];Parameter[1] of Argument[0];value",
"java.util;Collection;true;parallelStream;();;Element of Argument[-1];Element of ReturnValue;value",
"java.util;Collection;true;stream;();;Element of Argument[-1];Element of ReturnValue;value",
"java.util;Collection;true;toArray;;;Element of Argument[-1];ArrayElement of ReturnValue;value",

View File

@@ -25,5 +25,57 @@ public class Test {
Iterator<String> it = m.values().iterator();
String x5 = it.next();
sink(x5); // Flow
it.forEachRemaining(x6 -> {
sink(x6); // Flow
});
m.forEach((x7_k, x8_v) -> {
sink(x7_k); // No flow
sink(x8_v); // Flow
});
m.entrySet().forEach(entry -> {
String x9 = entry.getKey();
String x10 = entry.getValue();
sink(x9); // No flow
sink(x10); // Flow
});
}
public void run2() {
HashMap<String, String> m = new HashMap<>();
m.put(tainted, tainted);
m.forEach((x11_k, x12_v) -> {
sink(x11_k); // Flow
sink(x12_v); // Flow
});
m.entrySet().forEach(entry -> {
String x13 = entry.getKey();
String x14 = entry.getValue();
sink(x13); // Flow
sink(x14); // Flow
});
}
public void run3() {
Set<String> s = new HashSet<>();
String x15 = s.iterator().next();
sink(x15); // No flow
s.forEach(x16 -> {
sink(x16); // No flow
});
s.add(tainted);
String x17 = s.iterator().next();
sink(x17); // Flow
s.forEach(x18 -> {
sink(x18); // Flow
});
}
}

View File

@@ -2,3 +2,12 @@
| Test.java:13:18:13:24 | tainted | Test.java:18:10:18:11 | x3 |
| Test.java:13:18:13:24 | tainted | Test.java:22:12:22:13 | x4 |
| Test.java:13:18:13:24 | tainted | Test.java:27:10:27:11 | x5 |
| Test.java:13:18:13:24 | tainted | Test.java:30:12:30:13 | x6 |
| Test.java:13:18:13:24 | tainted | Test.java:35:12:35:15 | x8_v |
| Test.java:13:18:13:24 | tainted | Test.java:42:12:42:14 | x10 |
| Test.java:49:11:49:17 | tainted | Test.java:52:12:52:16 | x11_k |
| Test.java:49:11:49:17 | tainted | Test.java:59:12:59:14 | x13 |
| Test.java:49:20:49:26 | tainted | Test.java:53:12:53:16 | x12_v |
| Test.java:49:20:49:26 | tainted | Test.java:60:12:60:14 | x14 |
| Test.java:73:11:73:17 | tainted | Test.java:75:10:75:12 | x17 |
| Test.java:73:11:73:17 | tainted | Test.java:78:12:78:14 | x18 |

View File

@@ -0,0 +1,6 @@
# [Internal only] Adaptive Threat Modeling for JavaScript
This directory contains CodeQL libraries and queries that power adaptive threat modeling for JavaScript.
All APIs are experimental and may change in the future.
These queries can only be run by internal users; for external users they will return no results.

View File

@@ -0,0 +1,112 @@
/*
* For internal use only.
*
* Configures boosting for adaptive threat modeling (ATM).
*/
private import javascript as raw
import EndpointTypes
/**
* EXPERIMENTAL. This API may change in the future.
*
* A configuration class for defining known endpoints and endpoint filters for adaptive threat
* modeling (ATM). Each boosted query must define its own extension of this abstract class.
*
* A configuration defines a set of known sources (`isKnownSource`) and sinks (`isKnownSink`).
* It must also define a sink endpoint filter (`isEffectiveSink`) that filters candidate sinks
* predicted by the machine learning model to a set of effective sinks.
*
* To get started with ATM, you can copy-paste an implementation of the relevant predicates from a
* `DataFlow::Configuration` or `TaintTracking::Configuration` class for a standard security query.
* For example, for SQL injection you can start by defining the `isKnownSource` and `isKnownSink`
* predicates in the ATM configuration by copying and pasting the implementations of `isSource` and
* `isSink` from `SqlInjection::Configuration`.
*
* Note that if the security query configuration defines additional edges beyond the standard data
* flow edges, such as `NosqlInjection::Configuration`, you may need to replace the definition of
* `isAdditionalFlowStep` with a more generalised definition of additional edges. See
* `NosqlInjectionATM.qll` for an example of doing this.
*/
abstract class ATMConfig extends string {
bindingset[this]
ATMConfig() { any() }
/**
* EXPERIMENTAL. This API may change in the future.
*
* Holds if `source` is a known source of flow.
*/
predicate isKnownSource(raw::DataFlow::Node source) { none() }
/**
* EXPERIMENTAL. This API may change in the future.
*
* Holds if `sink` is a known sink of flow.
*/
predicate isKnownSink(raw::DataFlow::Node sink) { none() }
/**
* EXPERIMENTAL. This API may change in the future.
*
* Holds if the candidate source `candidateSource` predicted by the machine learning model should be
* an effective source, i.e. one considered as a possible source of flow in the boosted query.
*/
predicate isEffectiveSource(raw::DataFlow::Node candidateSource) { none() }
/**
* EXPERIMENTAL. This API may change in the future.
*
* Holds if the candidate sink `candidateSink` predicted by the machine learning model should be
* an effective sink, i.e. one considered as a possible sink of flow in the boosted query.
*/
predicate isEffectiveSink(raw::DataFlow::Node candidateSink) { none() }
/**
* EXPERIMENTAL. This API may change in the future.
*
* Holds if the candidate sink `candidateSink` predicted by the machine learning model should be
* an effective sink that overrides the score provided by the machine learning model with the
* score `score` for reason `why`. The effective sinks identified by this predicate MUST be a
* subset of those identified by the `isEffectiveSink` predicate.
*
* For example, in the ATM external API query, we use this method to ensure the ATM external API
* query produces the same results as the standard external API query, but assigns flows
* involving sinks that are filtered out by the endpoint filters a score of 0.
*
* This predicate can be phased out once we no longer need to rely on predicates like
* `paddedScore` in the ATM CodeQL libraries to add scores to alert messages in a way that works
* with lexical sort orders.
*/
predicate isEffectiveSinkWithOverridingScore(
raw::DataFlow::Node candidateSink, float score, string why
) {
none()
}
/**
* EXPERIMENTAL. This API may change in the future.
*
* Get an endpoint type for the sources of this query. A query may have multiple applicable
* endpoint types for its sources.
*/
EndpointType getASourceEndpointType() { none() }
/**
* EXPERIMENTAL. This API may change in the future.
*
* Get an endpoint type for the sinks of this query. A query may have multiple applicable
* endpoint types for its sinks.
*/
EndpointType getASinkEndpointType() { none() }
/**
* EXPERIMENTAL. This API may change in the future.
*
* Specifies the default cut-off value that controls how many alerts are produced.
* The cut-off value must be in the range [0,1].
* A cut-off value of 0 only produces alerts that are likely true-positives.
* A cut-off value of 1 produces all alerts including those that are likely false-positives.
*/
float getScoreCutoff() { result = 0.0 }
}

View File

@@ -0,0 +1,125 @@
/*
* For internal use only.
*
* Provides information about the results of boosted queries for use in adaptive threat modeling (ATM).
*/
private import javascript as raw
private import raw::DataFlow as DataFlow
import ATMConfig
private import BaseScoring
private import EndpointScoring as EndpointScoring
module ATM {
/**
* EXPERIMENTAL. This API may change in the future.
*
* This module contains informational predicates about the results returned by adaptive threat
* modeling (ATM).
*/
module ResultsInfo {
/**
* Indicates whether the flow from source to sink represents a result with
* sufficiently high likelihood of being a true-positive.
*/
pragma[inline]
private predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
any(ScoringResults results).shouldResultBeIncluded(source, sink)
}
/**
* EXPERIMENTAL. This API may change in the future.
*
* Returns the score for the flow between the source `source` and the `sink` sink in the
* boosted query.
*/
pragma[inline]
float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) {
any(DataFlow::Configuration cfg).hasFlow(source, sink) and
shouldResultBeIncluded(source, sink) and
result = unique(float s | s = any(ScoringResults results).getScoreForFlow(source, sink))
}
/**
* Pad a score returned from `getKnownScoreForFlow` to a particular length by adding a decimal
* point if one does not already exist, and "0"s after that decimal point.
*
* Note that this predicate must itself define an upper bound on `length`, so that it has a
* finite number of results. Currently this is defined as 12.
*/
private string paddedScore(float score, int length) {
// In this definition, we must restrict the values that `length` and `score` can take on so
// that the predicate has a finite number of results.
(score = getScoreForFlow(_, _) or score = 0) and
length = result.length() and
(
// We need to make sure the padded score contains a "." so lexically sorting the padded
// scores is equivalent to numerically sorting the scores.
score.toString().charAt(_) = "." and
result = score.toString()
or
not score.toString().charAt(_) = "." and
result = score.toString() + "."
)
or
result = paddedScore(score, length - 1) + "0" and
length <= 12
}
/**
* EXPERIMENTAL. This API may change in the future.
*
* Return a string representing the score of the flow between `source` and `sink` in the
* boosted query.
*
* The returned string is a fixed length, such that lexically sorting the strings returned by
* this predicate gives the same sort order as numerically sorting the scores of the flows.
*/
pragma[inline]
string getScoreStringForFlow(DataFlow::Node source, DataFlow::Node sink) {
exists(float score |
score = getScoreForFlow(source, sink) and
(
// A length of 12 is equivalent to 10 decimal places.
score.toString().length() >= 12 and
result = score.toString().substring(0, 12)
or
score.toString().length() < 12 and
result = paddedScore(score, 12)
)
)
}
/**
* EXPERIMENTAL. This API may change in the future.
*
* Indicates whether the flow from source to sink is likely to be reported by the base security
* query.
*
* Currently this is a heuristic: it ignores potential differences in the definitions of
* additional flow steps.
*/
pragma[inline]
predicate isFlowLikelyInBaseQuery(DataFlow::Node source, DataFlow::Node sink) {
getCfg().isKnownSource(source) and getCfg().isKnownSink(sink)
}
/**
* EXPERIMENTAL. This API may change in the future.
*
* Get additional information about why ATM included the flow from source to sink as an alert.
*/
pragma[inline]
string getAdditionalAlertInfo(DataFlow::Node source, DataFlow::Node sink) {
exists(string sourceOrigins, string sinkOrigins |
sourceOrigins = concat(any(ScoringResults results).getASourceOrigin(source), ", ") and
sinkOrigins = concat(any(ScoringResults results).getASinkOrigin(sink), ", ") and
result =
"[Source origins: " +
any(string s | if sourceOrigins != "" then s = sourceOrigins else s = "unknown") +
"; sink origins: " +
any(string s | if sinkOrigins != "" then s = sinkOrigins else s = "unknown") + "]"
)
}
}
}

View File

@@ -0,0 +1,121 @@
/*
* For internal use only.
*
* Provides shared scoring functionality for use in adaptive threat modeling (ATM).
*/
private import javascript
private import ATMConfig
external predicate adaptiveThreatModelingModels(
string modelChecksum, string modelLanguage, string modelName, string modelType
);
/** Get the ATM configuration. */
ATMConfig getCfg() { any() }
/**
* This module provides functionality that takes an endpoint and provides an entity that encloses that
* endpoint and is suitable for similarity analysis.
*/
module EndpointToEntity {
private import CodeToFeatures
/**
* Get an entity enclosing the endpoint that is suitable for similarity analysis. In general,
* this may associate multiple entities to a single endpoint.
*/
DatabaseFeatures::Entity getAnEntityForEndpoint(DataFlow::Node endpoint) {
DatabaseFeatures::entities(result, _, _, _, _, _, _, _, _) and
result.getDefinedFunction() = endpoint.getContainer().getEnclosingContainer*()
}
}
/**
* This module provides functionality that takes an entity and provides effective endpoints within
* that entity.
*
* We use the following terminology to describe endpoints:
*
* - The *candidate* endpoints are the set of data flow nodes that should be passed to the
* appropriate endpoint filter to produce the set of effective endpoints.
* When we have a model that beats the performance of the baseline, we will likely define the
* candidate endpoints based on the most confident predictions of the model.
* - An *effective* endpoint is a candidate endpoint which passes through the endpoint filter.
* In other words, it is a candidate endpoint for which the `isEffectiveSink` (or
* `isEffectiveSource`) predicate defined in the `ATMConfig` instance in scope holds.
*/
module EntityToEffectiveEndpoint {
private import CodeToFeatures
/**
* Returns endpoint candidates within the specified entities.
*
* The baseline implementation of this is that a candidate endpoint is any data flow node that is
* enclosed within the specified entity.
*/
private DataFlow::Node getABaselineEndpointCandidate(DatabaseFeatures::Entity entity) {
result.getContainer().getEnclosingContainer*() = entity.getDefinedFunction()
}
/**
* Get an effective source enclosed by the specified entity.
*
* N.B. This is _not_ an inverse of `EndpointToEntity::getAnEntityForEndpoint`: the effective
* source may occur in a function defined within the specified entity.
*/
DataFlow::Node getAnEffectiveSource(DatabaseFeatures::Entity entity) {
result = getABaselineEndpointCandidate(entity) and
getCfg().isEffectiveSource(result)
}
/**
* Get an effective sink enclosed by the specified entity.
*
* N.B. This is _not_ an inverse of `EndpointToEntity::getAnEntityForEndpoint`: the effective
* sink may occur in a function defined within the specified entity.
*/
DataFlow::Node getAnEffectiveSink(DatabaseFeatures::Entity entity) {
result = getABaselineEndpointCandidate(entity) and
getCfg().isEffectiveSink(result)
}
}
/**
* Scoring information produced by a scoring model.
*
* Scoring models include embedding models and endpoint scoring models.
*/
abstract class ScoringResults extends string {
bindingset[this]
ScoringResults() { any() }
/**
* Get ATM's confidence that a path between `source` and `sink` represents a security
* vulnerability. This will be a number between 0.0 and 1.0.
*/
abstract float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink);
/**
* Get a string representing why ATM included the given source in the dataflow analysis.
*
* In general, there may be multiple reasons why ATM included the given source, in which case
* this predicate should have multiple results.
*/
abstract string getASourceOrigin(DataFlow::Node source);
/**
* Get a string representing why ATM included the given sink in the dataflow analysis.
*
* In general, there may be multiple reasons why ATM included the given sink, in which case this
* predicate should have multiple results.
*/
abstract string getASinkOrigin(DataFlow::Node sink);
/**
* Indicates whether the flow from source to sink represents a result with
* sufficiently high likelihood of being a true-positive.
*/
pragma[inline]
abstract predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink);
}

View File

@@ -0,0 +1,444 @@
/*
* For internal use only.
*
* Extracts data about the functions in the database for use in adaptive threat modeling (ATM).
*/
module Raw {
private import javascript as raw
class RawAstNode = raw::ASTNode;
class Entity = raw::Function;
class Location = raw::Location;
/**
* Exposed as a tool for defining anchors for semantic search.
*/
class UnderlyingFunction = raw::Function;
/**
* Determines whether an entity should be omitted from ATM.
*/
predicate isEntityIgnored(Entity entity) {
// Ignore entities which don't have definitions, for example those in TypeScript
// declaration files.
not exists(entity.getBody())
or
// Ignore entities with an empty body, for example the JavaScript function () => {}.
entity.getNumBodyStmt() = 0 and not exists(entity.getAReturnedExpr())
}
newtype WrappedAstNode = TAstNode(RawAstNode rawNode)
/**
* This class represents nodes in the AST.
*/
class AstNode extends TAstNode {
RawAstNode rawNode;
AstNode() { this = TAstNode(rawNode) }
AstNode getAChildNode() { result = TAstNode(rawNode.getAChild()) }
AstNode getParentNode() { result = TAstNode(rawNode.getParent()) }
/**
* Holds if the AST node has `result` as its `index`th attribute.
*
* The index is not intended to mean anything, and is only here for disambiguation.
* There are no guarantees about any particular index being used (or not being used).
*/
string astNodeAttribute(int index) {
(
// NB: Unary and binary operator expressions e.g. -a, a + b and compound
// assignments e.g. a += b can be identified by the expression type.
result = rawNode.(raw::Identifier).getName()
or
// Computed property accesses for which we can predetermine the property being accessed.
// NB: May alias with operators e.g. could have '+' as a property name.
result = rawNode.(raw::IndexExpr).getPropertyName()
or
// We use `getRawValue` to give us distinct representations for `0xa`, `0xA`, and `10`.
result = rawNode.(raw::NumberLiteral).getRawValue()
or
// We use `getValue` rather than `getRawValue` so we assign `"a"` and `'a'` the same representation.
not rawNode instanceof raw::NumberLiteral and
result = rawNode.(raw::Literal).getValue()
or
result = rawNode.(raw::TemplateElement).getRawValue()
) and
index = 0
}
/**
* Returns a string indicating the "type" of the AST node.
*/
string astNodeType() {
// The definition of this method should correspond with that of the `@ast_node` entry in the
// dbscheme.
result = "js_exprs." + any(int kind | exprs(rawNode, kind, _, _, _))
or
result = "js_properties." + any(int kind | properties(rawNode, _, _, kind, _))
or
result = "js_stmts." + any(int kind | stmts(rawNode, kind, _, _, _))
or
result = "js_toplevel" and rawNode instanceof raw::TopLevel
or
result = "js_typeexprs." + any(int kind | typeexprs(rawNode, kind, _, _, _))
}
/**
* Holds if `result` is the `index`'th child of the AST node, for some arbitrary indexing.
* A root of the AST should be its own child, with an arbitrary (though conventionally
* 0) index.
*
* Notably, the order in which child nodes are visited is not required to be meaningful,
* and no particular index is required to be meaningful. However, `(parent, index)`
* should be a keyset.
*/
pragma[nomagic]
AstNode astNodeChild(int index) {
result =
rank[index - 1](AstNode child, raw::Location l |
child = this.getAChildNode() and l = child.getLocation()
|
child
order by
l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn(),
child.astNodeType()
)
or
not exists(result.getParentNode()) and this = result and index = 0
}
raw::Location getLocation() { result = rawNode.getLocation() }
string toString() { result = rawNode.toString() }
predicate isEntityNameNode(Entity entity) {
exists(int index |
TAstNode(entity) = getParentNode() and
this = getParentNode().astNodeChild(index) and
// An entity name node must be the first child of the entity.
index = min(int otherIndex | exists(getParentNode().astNodeChild(otherIndex))) and
entity.getName() = rawNode.(raw::VarDecl).getName()
)
}
}
/**
* Holds if `result` is the `index`'th child of the `parent` entity. Such
* a node is a root of an AST associated with this entity.
*/
AstNode entityChild(AstNode parent, int index) {
// In JavaScript, entities appear in the AST parent/child relationship.
result = parent.astNodeChild(index)
}
/**
* Holds if `node` is contained in `entity`. Note that a single node may be contained
* in multiple entities, if they are nested. An entity, in particular, should be
* reported as contained within itself.
*/
predicate entityContains(Entity entity, AstNode node) {
node.getParentNode*() = TAstNode(entity) and not node.isEntityNameNode(entity)
}
/**
* Get the name of the entity.
*
* We attempt to assign unnamed entities approximate names if they are passed to a likely
* external library function. If we can't assign them an approximate name, we give them the name
* `""`, so that these entities are included in `AdaptiveThreatModeling.qll`.
*
* For entities which have multiple names, we choose the lexically smallest name.
*/
string getEntityName(Entity entity) {
if exists(entity.getName())
then
// https://github.com/github/ml-ql-adaptive-threat-modeling/issues/244 discusses making use
// of all the names during training.
result = min(entity.getName())
else
if exists(getApproximateNameForEntity(entity))
then result = getApproximateNameForEntity(entity)
else result = ""
}
/**
* Holds if the call `call` has `entity` is its `argumentIndex`th argument.
*/
private predicate entityUsedAsArgumentToCall(
Entity entity, raw::DataFlow::CallNode call, int argumentIndex
) {
raw::DataFlow::localFlowStep*(call.getArgument(argumentIndex), entity.flow())
}
/**
* Returns a generated name for the entity. This name is generated such that
* entities with the same names have similar behaviour.
*/
private string getApproximateNameForEntity(Entity entity) {
count(raw::DataFlow::CallNode call, int index | entityUsedAsArgumentToCall(entity, call, index)) =
1 and
exists(raw::DataFlow::CallNode call, int index, string basePart |
entityUsedAsArgumentToCall(entity, call, index) and
(
if count(getReceiverName(call)) = 1
then basePart = getReceiverName(call) + "."
else basePart = ""
) and
result = basePart + call.getCalleeName() + "#functionalargument"
)
}
private string getReceiverName(raw::DataFlow::CallNode call) {
result = call.getReceiver().asExpr().(raw::VarAccess).getName()
}
/** Consistency checks: these predicates should each have no results */
module Consistency {
/** `getEntityName` should assign each entity a single name. */
query predicate entityWithManyNames(Entity entity, string name) {
name = getEntityName(entity) and
count(getEntityName(entity)) > 1
}
query predicate nodeWithNoType(AstNode node) { not exists(node.astNodeType()) }
query predicate nodeWithManyTypes(AstNode node, string type) {
type = node.astNodeType() and
count(node.astNodeType()) > 1
}
query predicate nodeWithNoParent(AstNode node, string type) {
not node = any(AstNode parent).astNodeChild(_) and
type = node.astNodeType() and
not exists(RawAstNode rawNode | node = TAstNode(rawNode) and rawNode instanceof raw::Module)
}
query predicate duplicateChildIndex(AstNode parent, int index, AstNode child) {
child = parent.astNodeChild(index) and
count(parent.astNodeChild(index)) > 1
}
query predicate duplicateAttributeIndex(AstNode node, int index) {
exists(node.astNodeAttribute(index)) and
count(node.astNodeAttribute(index)) > 1
}
}
}
module Wrapped {
/*
* We require any node with attributes to be a leaf. Where a non-leaf node
* has an attribute, we instead create a synthetic leaf node that has that
* attribute.
*/
/**
* Holds if the AST node `e` is a leaf node.
*/
private predicate isLeaf(Raw::AstNode e) { not exists(e.astNodeChild(_)) }
newtype WrappedEntity =
TEntity(Raw::Entity entity) {
exists(entity.getLocation().getFile().getRelativePath()) and
Raw::entityContains(entity, _)
}
/**
* A type ranging over the kinds of entities for which we want to consider embeddings.
*/
class Entity extends WrappedEntity {
Raw::Entity rawEntity;
Entity() { this = TEntity(rawEntity) and not Raw::isEntityIgnored(rawEntity) }
string getName() { result = Raw::getEntityName(rawEntity) }
AstNode getAstRoot(int index) {
result = TAstNode(rawEntity, Raw::entityChild(Raw::TAstNode(rawEntity), index))
}
string toString() { result = rawEntity.toString() }
Raw::Location getLocation() { result = rawEntity.getLocation() }
Raw::UnderlyingFunction getDefinedFunction() { result = rawEntity }
}
newtype WrappedAstNode =
TAstNode(Raw::Entity enclosingEntity, Raw::AstNode node) {
Raw::entityContains(enclosingEntity, node)
} or
TSyntheticNode(
Raw::Entity enclosingEntity, Raw::AstNode node, int syntheticChildIndex, int attrIndex
) {
Raw::entityContains(enclosingEntity, node) and
exists(node.astNodeAttribute(attrIndex)) and
not isLeaf(node) and
if exists(node.astNodeChild(_))
then
syntheticChildIndex =
attrIndex - min(int other | exists(node.astNodeAttribute(other))) +
max(int other | exists(node.astNodeChild(other))) + 1
else syntheticChildIndex = attrIndex
}
pragma[nomagic]
private AstNode injectedChild(Raw::Entity enclosingEntity, Raw::AstNode parent, int index) {
result = TAstNode(enclosingEntity, parent.astNodeChild(index)) or
result = TSyntheticNode(enclosingEntity, parent, index, _)
}
/**
* A type ranging over AST nodes. Ultimately, only nodes contained in entities will
* be considered.
*/
class AstNode extends WrappedAstNode {
Raw::Entity enclosingEntity;
Raw::AstNode rawNode;
AstNode() {
(
this = TAstNode(enclosingEntity, rawNode) or
this = TSyntheticNode(enclosingEntity, rawNode, _, _)
) and
not Raw::isEntityIgnored(enclosingEntity)
}
string getAttribute(int index) {
result = rawNode.astNodeAttribute(index) and
not exists(TSyntheticNode(enclosingEntity, rawNode, _, index))
}
string getType() { result = rawNode.astNodeType() }
AstNode getChild(int index) { result = injectedChild(enclosingEntity, rawNode, index) }
string toString() { result = getType() }
Raw::Location getLocation() { result = rawNode.getLocation() }
}
/**
* A synthetic AST node, created to be a leaf for an otherwise non-leaf attribute.
*/
class SyntheticAstNode extends AstNode, TSyntheticNode {
int childIndex;
int attributeIndex;
SyntheticAstNode() {
this = TSyntheticNode(enclosingEntity, rawNode, childIndex, attributeIndex)
}
override string getAttribute(int index) {
result = rawNode.astNodeAttribute(attributeIndex) and index = attributeIndex
}
override string getType() {
result = rawNode.astNodeType() + "::<synthetic " + childIndex + ">"
}
override AstNode getChild(int index) { none() }
}
}
module DatabaseFeatures {
/**
* Exposed as a tool for defining anchors for semantic search.
*/
class UnderlyingFunction = Raw::UnderlyingFunction;
private class Location = Raw::Location;
private newtype TEntityOrAstNode =
TEntity(Wrapped::Entity entity) or
TAstNode(Wrapped::AstNode astNode)
class EntityOrAstNode extends TEntityOrAstNode {
abstract string getType();
abstract string toString();
abstract Location getLocation();
}
class Entity extends EntityOrAstNode, TEntity {
Wrapped::Entity entity;
Entity() { this = TEntity(entity) }
string getName() { result = entity.getName() }
AstNode getAstRoot(int index) { result = TAstNode(entity.getAstRoot(index)) }
override string getType() { result = "javascript function" }
override string toString() { result = "Entity: " + getName() }
override Location getLocation() { result = entity.getLocation() }
UnderlyingFunction getDefinedFunction() { result = entity.getDefinedFunction() }
}
class AstNode extends EntityOrAstNode, TAstNode {
Wrapped::AstNode rawNode;
AstNode() { this = TAstNode(rawNode) }
AstNode getChild(int index) { result = TAstNode(rawNode.getChild(index)) }
string getAttribute(int index) { result = rawNode.getAttribute(index) }
override string getType() { result = rawNode.getType() }
override string toString() { result = this.getType() }
override Location getLocation() { result = rawNode.getLocation() }
}
/** Consistency checks: these predicates should each have no results */
module Consistency {
query predicate nonLeafAttribute(AstNode node, int index, string attribute) {
attribute = node.getAttribute(index) and
exists(node.getChild(_))
}
}
query predicate entities(
Entity entity, string entity_name, string entity_type, string path, int startLine,
int startColumn, int endLine, int endColumn, string absolutePath
) {
entity_name = entity.getName() and
entity_type = entity.getType() and
exists(Location l | l = entity.getLocation() |
path = l.getFile().getRelativePath() and
absolutePath = l.getFile().getAbsolutePath() and
l.hasLocationInfo(_, startLine, startColumn, endLine, endColumn)
)
}
query predicate astNodes(
Entity enclosingEntity, EntityOrAstNode parent, int index, AstNode node, string node_type
) {
node = enclosingEntity.getAstRoot(index) and
parent = enclosingEntity and
node_type = node.getType()
or
astNodes(enclosingEntity, _, _, parent, _) and
node = parent.(AstNode).getChild(index) and
node_type = node.getType()
}
query predicate nodeAttributes(AstNode node, string attr) {
// Only get attributes of AST nodes we extract.
// This excludes nodes in standard libraries since the standard library files
// are located outside the source root.
astNodes(_, _, _, node, _) and
attr = node.getAttribute(_)
}
}

View File

@@ -0,0 +1,208 @@
/*
* For internal use only.
*
* Provides predicates that expose the knowledge of models
* in the core CodeQL JavaScript libraries.
*/
private import javascript
private import semmle.javascript.security.dataflow.XxeCustomizations
private import semmle.javascript.security.dataflow.RemotePropertyInjectionCustomizations
private import semmle.javascript.security.dataflow.TypeConfusionThroughParameterTamperingCustomizations
private import semmle.javascript.security.dataflow.ZipSlipCustomizations
private import semmle.javascript.security.dataflow.TaintedPathCustomizations
private import semmle.javascript.security.dataflow.CleartextLoggingCustomizations
private import semmle.javascript.security.dataflow.XpathInjectionCustomizations
private import semmle.javascript.security.dataflow.Xss::Shared as Xss
private import semmle.javascript.security.dataflow.StackTraceExposureCustomizations
private import semmle.javascript.security.dataflow.ClientSideUrlRedirectCustomizations
private import semmle.javascript.security.dataflow.CodeInjectionCustomizations
private import semmle.javascript.security.dataflow.RequestForgeryCustomizations
private import semmle.javascript.security.dataflow.CorsMisconfigurationForCredentialsCustomizations
private import semmle.javascript.security.dataflow.ShellCommandInjectionFromEnvironmentCustomizations
private import semmle.javascript.security.dataflow.DifferentKindsComparisonBypassCustomizations
private import semmle.javascript.security.dataflow.CommandInjectionCustomizations
private import semmle.javascript.security.dataflow.PrototypePollutionCustomizations
private import semmle.javascript.security.dataflow.UnvalidatedDynamicMethodCallCustomizations
private import semmle.javascript.security.dataflow.TaintedFormatStringCustomizations
private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
private import semmle.javascript.security.dataflow.PostMessageStarCustomizations
private import semmle.javascript.security.dataflow.RegExpInjectionCustomizations
private import semmle.javascript.security.dataflow.SqlInjectionCustomizations
private import semmle.javascript.security.dataflow.InsecureRandomnessCustomizations
private import semmle.javascript.security.dataflow.XmlBombCustomizations
private import semmle.javascript.security.dataflow.InsufficientPasswordHashCustomizations
private import semmle.javascript.security.dataflow.HardcodedCredentialsCustomizations
private import semmle.javascript.security.dataflow.FileAccessToHttpCustomizations
private import semmle.javascript.security.dataflow.UnsafeDynamicMethodAccessCustomizations
private import semmle.javascript.security.dataflow.UnsafeDeserializationCustomizations
private import semmle.javascript.security.dataflow.HardcodedDataInterpretedAsCodeCustomizations
private import semmle.javascript.security.dataflow.ServerSideUrlRedirectCustomizations
private import semmle.javascript.security.dataflow.IndirectCommandInjectionCustomizations
private import semmle.javascript.security.dataflow.ConditionalBypassCustomizations
private import semmle.javascript.security.dataflow.HttpToFileAccessCustomizations
private import semmle.javascript.security.dataflow.BrokenCryptoAlgorithmCustomizations
private import semmle.javascript.security.dataflow.LoopBoundInjectionCustomizations
private import semmle.javascript.security.dataflow.CleartextStorageCustomizations
import FilteringReasons
/**
* Holds if the node `n` is a known sink in a modeled library, or a sibling-argument of such a sink.
*/
predicate isArgumentToKnownLibrarySinkFunction(DataFlow::Node n) {
exists(DataFlow::InvokeNode invk, DataFlow::Node known |
invk.getAnArgument() = n and invk.getAnArgument() = known and isKnownLibrarySink(known)
)
}
/**
* Holds if the node `n` is a known sink for the external API security query.
*
* This corresponds to known sinks from security queries whose sources include remote flow and
* DOM-based sources.
*/
predicate isKnownExternalAPIQuerySink(DataFlow::Node n) {
n instanceof Xxe::Sink or
n instanceof TaintedPath::Sink or
n instanceof XpathInjection::Sink or
n instanceof Xss::Sink or
n instanceof ClientSideUrlRedirect::Sink or
n instanceof CodeInjection::Sink or
n instanceof RequestForgery::Sink or
n instanceof CorsMisconfigurationForCredentials::Sink or
n instanceof CommandInjection::Sink or
n instanceof PrototypePollution::Sink or
n instanceof UnvalidatedDynamicMethodCall::Sink or
n instanceof TaintedFormatString::Sink or
n instanceof NosqlInjection::Sink or
n instanceof PostMessageStar::Sink or
n instanceof RegExpInjection::Sink or
n instanceof SqlInjection::Sink or
n instanceof XmlBomb::Sink or
n instanceof ZipSlip::Sink or
n instanceof UnsafeDeserialization::Sink or
n instanceof ServerSideUrlRedirect::Sink or
n instanceof CleartextStorage::Sink or
n instanceof HttpToFileAccess::Sink
}
/**
* Holds if the node `n` is a known sink in a modeled library.
*/
predicate isKnownLibrarySink(DataFlow::Node n) {
isKnownExternalAPIQuerySink(n) or
n instanceof CleartextLogging::Sink or
n instanceof StackTraceExposure::Sink or
n instanceof ShellCommandInjectionFromEnvironment::Sink or
n instanceof InsecureRandomness::Sink or
n instanceof FileAccessToHttp::Sink or
n instanceof IndirectCommandInjection::Sink
}
/**
* Holds if the node `n` is known as the predecessor in a modeled flow step.
*/
predicate isKnownStepSrc(DataFlow::Node n) {
any(TaintTracking::AdditionalTaintStep s).step(n, _) or
any(DataFlow::AdditionalFlowStep s).step(n, _) or
any(DataFlow::AdditionalFlowStep s).step(n, _, _, _)
}
/**
* Holds if `n` is an argument to a function of a builtin object.
*/
private predicate isArgumentToBuiltinFunction(DataFlow::Node n, FilteringReason reason) {
exists(DataFlow::SourceNode builtin, DataFlow::SourceNode receiver, DataFlow::InvokeNode invk |
(
builtin instanceof DataFlow::ArrayCreationNode and
reason instanceof ArgumentToArrayReason
or
builtin =
DataFlow::globalVarRef([
"Map", "Set", "WeakMap", "WeakSet", "Number", "Object", "String", "Array", "Error",
"Math", "Boolean"
]) and
reason instanceof ArgumentToBuiltinGlobalVarRefReason
)
|
receiver = [builtin.getAnInvocation(), builtin] and
invk = [receiver, receiver.getAPropertyRead()].getAnInvocation() and
invk.getAnArgument() = n
)
or
exists(Expr primitive, MethodCallExpr c |
primitive instanceof ConstantString or
primitive instanceof NumberLiteral or
primitive instanceof BooleanLiteral
|
c.calls(primitive, _) and
c.getAnArgument() = n.asExpr() and
reason instanceof ConstantReceiverReason
)
or
exists(DataFlow::CallNode call |
call.getAnArgument() = n and
call.getCalleeName() =
[
"indexOf", "hasOwnProperty", "substring", "isDecimal", "decode", "encode", "keys", "shift",
"values", "forEach", "toString", "slice", "splice", "push", "isArray", "sort"
] and
reason instanceof BuiltinCallNameReason
)
}
predicate isOtherModeledArgument(DataFlow::Node n, FilteringReason reason) {
isArgumentToBuiltinFunction(n, reason)
or
any(LodashUnderscore::Member m).getACall().getAnArgument() = n and
reason instanceof LodashUnderscoreArgumentReason
or
exists(ClientRequest r |
r.getAnArgument() = n or n = r.getUrl() or n = r.getHost() or n = r.getADataNode()
) and
reason instanceof ClientRequestReason
or
exists(PromiseDefinition p |
n = [p.getResolveParameter(), p.getRejectParameter()].getACall().getAnArgument()
) and
reason instanceof PromiseDefinitionReason
or
n instanceof CryptographicKey and reason instanceof CryptographicKeyReason
or
any(CryptographicOperation op).getInput().flow() = n and
reason instanceof CryptographicOperationFlowReason
or
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call.getCalleeName() = getAStandardLoggerMethodName() and
reason instanceof LoggerMethodReason
or
call.getCalleeName() = ["setTimeout", "clearTimeout"] and
reason instanceof TimeoutReason
or
call.getReceiver() = DataFlow::globalVarRef(["localStorage", "sessionStorage"]) and
reason instanceof ReceiverStorageReason
or
call instanceof StringOps::StartsWith and reason instanceof StringStartsWithReason
or
call instanceof StringOps::EndsWith and reason instanceof StringEndsWithReason
or
call instanceof StringOps::RegExpTest and reason instanceof StringRegExpTestReason
or
call instanceof EventRegistration and reason instanceof EventRegistrationReason
or
call instanceof EventDispatch and reason instanceof EventDispatchReason
or
call = any(MembershipCandidate c).getTest() and
reason instanceof MembershipCandidateTestReason
or
call instanceof FileSystemAccess and reason instanceof FileSystemAccessReason
or
call instanceof DatabaseAccess and reason instanceof DatabaseAccessReason
or
call = DOM::domValueRef() and reason instanceof DOMReason
or
call.getCalleeName() = "next" and
exists(DataFlow::FunctionNode f | call = f.getLastParameter().getACall()) and
reason instanceof NextFunctionCallReason
)
}

View File

@@ -0,0 +1,290 @@
/*
* For internal use only.
*
* Extracts data about the database for use in adaptive threat modeling (ATM).
*/
import javascript
import CodeToFeatures
import EndpointScoring
/**
* Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
*
* This is a single string containing a space-separated list of tokens.
*/
private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
// Features for endpoints that are contained within a function.
exists(DatabaseFeatures::Entity entity | entity = getRepresentativeEntityForEndpoint(endpoint) |
// The name of the function that encloses the endpoint.
featureName = "enclosingFunctionName" and result = entity.getName()
or
// A feature containing natural language tokens from the function that encloses the endpoint in
// the order that they appear in the source code.
featureName = "enclosingFunctionBody" and
result = unique(string x | x = FunctionBodies::getBodyTokenFeatureForEntity(entity))
)
or
exists(getACallBasedTokenFeatureComponent(endpoint, _, featureName)) and
result =
concat(DataFlow::CallNode call, string component |
component = getACallBasedTokenFeatureComponent(endpoint, call, featureName)
|
component, " "
)
or
// The access path of the function being called, both with and without structural info, if the
// function being called originates from an external API. For example, the endpoint here:
//
// ```js
// const mongoose = require('mongoose'),
// User = mongoose.model('User', null);
// User.findOne(ENDPOINT);
// ```
//
// would have a callee access path with structural info of
// `mongoose member model instanceorreturn member findOne instanceorreturn`, and a callee access
// path without structural info of `mongoose model findOne`.
//
// These features indicate that the callee comes from (reading the access path backwards) an
// instance of the `findOne` member of an instance of the `model` member of the `mongoose`
// external library.
exists(AccessPaths::Boolean includeStructuralInfo |
featureName =
"calleeAccessPath" +
any(string x | if includeStructuralInfo = true then x = "WithStructuralInfo" else x = "") and
result =
concat(API::Node node, string accessPath |
node.getInducingNode().(DataFlow::CallNode).getAnArgument() = endpoint and
accessPath = AccessPaths::getAccessPath(node, includeStructuralInfo)
|
accessPath, " "
)
)
}
/**
* Gets a value of the function-call-related token-based feature named `featureName` associated
* with the function call `call` and the endpoint `endpoint`.
*
* This may in general report multiple strings, each containing a space-separated list of tokens.
*
* **Technical details:** This predicate can have multiple values per endpoint and feature name. As a
* result, the results from this predicate must be concatenated together. However concatenating
* other features like the function body tokens is expensive, so we separate out this predicate
* from others like `FunctionBodies::getBodyTokenFeatureForEntity` to avoid having to perform this
* concatenation operation on other features like the function body tokens.
*/
private string getACallBasedTokenFeatureComponent(
DataFlow::Node endpoint, DataFlow::CallNode call, string featureName
) {
// Features for endpoints that are an argument to a function call.
endpoint = call.getAnArgument() and
(
// The name of the function being called, e.g. in a call `Artist.findOne(...)`, this is `findOne`.
featureName = "calleeName" and result = call.getCalleeName()
or
// The name of the receiver of the call, e.g. in a call `Artist.findOne(...)`, this is `Artist`.
featureName = "receiverName" and result = call.getReceiver().asExpr().(VarRef).getName()
or
// The argument index of the endpoint, e.g. in `f(a, endpoint, b)`, this is 1.
featureName = "argumentIndex" and
result = any(int argIndex | call.getArgument(argIndex) = endpoint).toString()
or
// The name of the API that the function being called originates from, if the function being
// called originates from an external API. For example, the endpoint here:
//
// ```js
// const mongoose = require('mongoose'),
// User = mongoose.model('User', null);
// User.findOne(ENDPOINT);
// ```
//
// would have a callee API name of `mongoose`.
featureName = "calleeApiName" and
result = getAnApiName(call)
)
}
/** This module provides functionality for getting the function body feature associated with a particular entity. */
module FunctionBodies {
/** Holds if `node` is an AST node within the entity `entity` and `token` is a node attribute associated with `node`. */
private predicate bodyTokens(
DatabaseFeatures::Entity entity, DatabaseFeatures::AstNode node, string token
) {
DatabaseFeatures::astNodes(entity, _, _, node, _) and
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t))
}
/**
* Gets the body token feature for the specified entity.
*
* This is a string containing natural language tokens in the order that they appear in the source code for the entity.
*/
string getBodyTokenFeatureForEntity(DatabaseFeatures::Entity entity) {
// If a function has more than 256 body subtokens, then featurize it as absent. This
// approximates the behavior of the classifer on non-generic body features where large body
// features are replaced by the absent token.
if count(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) > 256
then result = ""
else
result =
concat(int i, string rankedToken |
rankedToken =
rank[i](DatabaseFeatures::AstNode node, string token, Location l |
bodyTokens(entity, node, token) and l = node.getLocation()
|
token
order by
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
l.getEndColumn(), token
)
|
rankedToken, " " order by i
)
}
}
/**
* Returns a name of the API that a node originates from, if the node originates from an API.
*
* This predicate may have multiple results if the node corresponds to multiple nodes in the API graph forest.
*/
pragma[inline]
private string getAnApiName(DataFlow::Node node) {
API::moduleImport(result).getASuccessor*().getInducingNode() = node
}
/**
* This module provides functionality for getting a representation of the access path of nodes
* within the program.
*
* For example, it gives the `User.find` callee here:
*
* ```js
* const mongoose = require('mongoose'),
* User = mongoose.model('User', null);
* User.find({ 'isAdmin': true })
* ```
* the access path `mongoose member model instanceorreturn member find instanceorreturn`.
*
* This access path is based on the simplified access path that the untrusted data flowing to
* external API query associates to each of its sinks, with modifications to optionally include
* explicit structural information and to improve how well the path tokenizes.
*/
private module AccessPaths {
bindingset[str]
private predicate isNumericString(string str) { exists(str.toInt()) }
/**
* Gets a parameter of `base` with name `name`, or a property named `name` of a destructuring parameter.
*/
private API::Node getNamedParameter(API::Node base, string name) {
exists(API::Node param |
param = base.getAParameter() and
not param = base.getReceiver()
|
result = param and
name = param.getAnImmediateUse().asExpr().(Parameter).getName()
or
param.getAnImmediateUse().asExpr() instanceof DestructuringPattern and
result = param.getMember(name)
)
}
/**
* A utility class that is equivalent to `boolean` but does not require type joining.
*/
class Boolean extends boolean {
Boolean() { this = true or this = false }
}
/** Get the access path for the node. This includes structural information like `member`, `param`, and `functionalarg` if `includeStructuralInfo` is true. */
string getAccessPath(API::Node node, Boolean includeStructuralInfo) {
node = API::moduleImport(result)
or
exists(API::Node base, string baseName |
base.getDepth() < node.getDepth() and baseName = getAccessPath(base, includeStructuralInfo)
|
// e.g. `new X`, `X()`
node = [base.getInstance(), base.getReturn()] and
if includeStructuralInfo = true
then result = baseName + " instanceorreturn"
else result = baseName
or
// e.g. `x.y`, `x[y]`, `const { y } = x`, where `y` is non-numeric and is known at analysis
// time.
exists(string member |
node = base.getMember(member) and
not node = base.getUnknownMember() and
not isNumericString(member) and
not (member = "default" and base = API::moduleImport(_)) and
not member = "then" // use the 'promised' edges for .then callbacks
|
if includeStructuralInfo = true
then result = baseName + " member " + member
else result = baseName + " " + member
)
or
// e.g. `x.y`, `x[y]`, `const { y } = x`, where `y` is numeric or not known at analysis time.
(
node = base.getUnknownMember() or
node = base.getMember(any(string s | isNumericString(s)))
) and
if includeStructuralInfo = true then result = baseName + " member" else result = baseName
or
// e.g. `x.then(y => ...)`
node = base.getPromised() and
result = baseName
or
// e.g. `x.y((a, b) => ...)`
// Name callback parameters after their name in the source code.
// For example, the `res` parameter in `express.get('/foo', (req, res) => {...})` will be
// named `express member get functionalarg param res`.
exists(string paramName |
node = getNamedParameter(base.getAParameter(), paramName) and
(
if includeStructuralInfo = true
then result = baseName + " functionalarg param " + paramName
else result = baseName + " " + paramName
)
or
exists(string callbackName, string index |
node =
getNamedParameter(base.getASuccessor("param " + index).getMember(callbackName),
paramName) and
index != "-1" and // ignore receiver
if includeStructuralInfo = true
then
result =
baseName + " functionalarg " + index + " " + callbackName + " param " + paramName
else result = baseName + " " + index + " " + callbackName + " " + paramName
)
)
)
}
}
/** Get a name of a supported generic token-based feature. */
private string getASupportedFeatureName() {
result =
[
"enclosingFunctionName", "calleeName", "receiverName", "argumentIndex", "calleeApiName",
"calleeAccessPath", "calleeAccessPathWithStructuralInfo", "enclosingFunctionBody"
]
}
/**
* Generic token-based features for ATM.
*
* This predicate holds if the generic token-based feature named `featureName` has the value
* `featureValue` for the endpoint `endpoint`.
*/
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
featureName = getASupportedFeatureName() and
(
featureValue = unique(string x | x = getTokenFeature(endpoint, featureName))
or
not exists(unique(string x | x = getTokenFeature(endpoint, featureName))) and featureValue = ""
)
}

View File

@@ -0,0 +1,223 @@
/*
* For internal use only.
*
* Provides an implementation of scoring alerts for use in adaptive threat modeling (ATM).
*/
private import javascript
import BaseScoring
import CodeToFeatures
import EndpointFeatures as EndpointFeatures
import EndpointTypes
private string getACompatibleModelChecksum() {
adaptiveThreatModelingModels(result, "javascript", _, "atm-endpoint-scoring")
}
/**
* The maximum number of AST nodes an entity containing an endpoint should have before we should
* choose a smaller entity to represent the endpoint.
*
* This is intended to represent a balance in terms of the amount of context we provide to the
* model: we don't want the function to be too small, because then it doesn't contain very much
* context and miss useful information, but also we don't want it to be too large, because then
* there's likely to be a lot of irrelevant or very loosely related context.
*/
private int getMaxNumAstNodes() { result = 1024 }
/**
* Returns the number of AST nodes contained within the specified entity.
*/
private int getNumAstNodesInEntity(DatabaseFeatures::Entity entity) {
// Restrict the values `entity` can take on
entity = EndpointToEntity::getAnEntityForEndpoint(_) and
result =
count(DatabaseFeatures::AstNode astNode | DatabaseFeatures::astNodes(entity, _, _, astNode, _))
}
/**
* Get a single entity to use as the representative entity for the endpoint.
*
* We try to use the largest entity containing the endpoint that's below the AST node limit defined
* in `getMaxNumAstNodes`. In the event of a tie, we use the entity that appears first within the
* source archive.
*
* If no entities are smaller than the AST node limit, then we use the smallest entity containing
* the endpoint.
*/
DatabaseFeatures::Entity getRepresentativeEntityForEndpoint(DataFlow::Node endpoint) {
// Check whether there's an entity containing the endpoint that's smaller than the AST node limit.
if
getNumAstNodesInEntity(EndpointToEntity::getAnEntityForEndpoint(endpoint)) <=
getMaxNumAstNodes()
then
// Use the largest entity smaller than the AST node limit, resolving ties using the entity that
// appears first in the source archive.
result =
rank[1](DatabaseFeatures::Entity entity, int numAstNodes, Location l |
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
numAstNodes = getNumAstNodesInEntity(entity) and
numAstNodes <= getMaxNumAstNodes() and
l = entity.getLocation()
|
entity
order by
numAstNodes desc, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
)
else
// Use the smallest entity, resolving ties using the entity that
// appears first in the source archive.
result =
rank[1](DatabaseFeatures::Entity entity, int numAstNodes, Location l |
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
numAstNodes = getNumAstNodesInEntity(entity) and
l = entity.getLocation()
|
entity
order by
numAstNodes, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
)
}
module ModelScoring {
predicate endpoints(DataFlow::Node endpoint) {
getCfg().isEffectiveSource(endpoint) or
getCfg().isEffectiveSink(endpoint)
}
private int requestedEndpointTypes() { result = any(EndpointType type).getEncoding() }
private predicate relevantTokenFeatures(
DataFlow::Node endpoint, string featureName, string featureValue
) {
endpoints(endpoint) and
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
}
predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) =
scoreEndpoints(endpoints/1, requestedEndpointTypes/0, relevantTokenFeatures/3,
getACompatibleModelChecksum/0)(endpoint, encodedEndpointType, score)
}
/**
* Return ATM's confidence that `source` is a source for the given security query. This will be a
* number between 0.0 and 1.0.
*/
private float getScoreForSource(DataFlow::Node source) {
if getCfg().isKnownSource(source)
then result = 1.0
else (
// This restriction on `source` has no semantic effect but improves performance.
getCfg().isEffectiveSource(source) and
ModelScoring::endpointScores(source, getCfg().getASourceEndpointType().getEncoding(), result)
)
}
/**
* Return ATM's confidence that `sink` is a sink for the given security query. This will be a
* number between 0.0 and 1.0.
*/
private float getScoreForSink(DataFlow::Node sink) {
if getCfg().isKnownSink(sink)
then result = 1.0
else
if getCfg().isEffectiveSinkWithOverridingScore(sink, result, _)
then any()
else (
// This restriction on `sink` has no semantic effect but improves performance.
getCfg().isEffectiveSink(sink) and
ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(), result)
)
}
class EndpointScoringResults extends ScoringResults {
EndpointScoringResults() {
this = "EndpointScoringResults" and exists(getACompatibleModelChecksum())
}
/**
* Get ATM's confidence that a path between `source` and `sink` represents a security
* vulnerability. This will be a number between 0.0 and 1.0.
*/
override float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) {
result = getScoreForSource(source) * getScoreForSink(sink)
}
/**
* Get a string representing why ATM included the given source in the dataflow analysis.
*
* In general, there may be multiple reasons why ATM included the given source, in which case
* this predicate should have multiple results.
*/
pragma[inline]
override string getASourceOrigin(DataFlow::Node source) {
result = "known" and getCfg().isKnownSource(source)
or
result = "predicted" and getCfg().isEffectiveSource(source)
}
/**
* Get a string representing why ATM included the given sink in the dataflow analysis.
*
* In general, there may be multiple reasons why ATM included the given sink, in which case
* this predicate should have multiple results.
*/
pragma[inline]
override string getASinkOrigin(DataFlow::Node sink) {
result = "known" and getCfg().isKnownSink(sink)
or
not getCfg().isKnownSink(sink) and
getCfg().isEffectiveSinkWithOverridingScore(sink, _, result)
or
not getCfg().isKnownSink(sink) and
not getCfg().isEffectiveSinkWithOverridingScore(sink, _, _) and
result =
"predicted (scores: " +
concat(EndpointType type, float score |
ModelScoring::endpointScores(sink, type.getEncoding(), score)
|
type.getDescription() + "=" + score.toString(), ", " order by type.getEncoding()
) + ")" and
getCfg().isEffectiveSink(sink)
}
pragma[inline]
override predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
if getCfg().isKnownSink(sink)
then any()
else
if getCfg().isEffectiveSinkWithOverridingScore(sink, _, _)
then
exists(float score |
getCfg().isEffectiveSinkWithOverridingScore(sink, score, _) and
score >= getCfg().getScoreCutoff()
)
else (
// This restriction on `sink` has no semantic effect but improves performance.
getCfg().isEffectiveSink(sink) and
exists(float sinkScore |
ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(),
sinkScore) and
// Include the endpoint if (a) the query endpoint type scores higher than all other
// endpoint types, or (b) the query endpoint type scores at least
// 0.5 - (getCfg().getScoreCutoff() / 2).
sinkScore >=
[
max(float s | ModelScoring::endpointScores(sink, _, s)),
0.5 - getCfg().getScoreCutoff() / 2
]
)
)
}
}
module Debugging {
query predicate hopInputEndpoints = ModelScoring::endpoints/1;
query predicate endpointScores = ModelScoring::endpointScores/3;
query predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink) and
any(DataFlow::Configuration cfg).hasFlow(source, sink)
}
}

View File

@@ -0,0 +1,57 @@
/**
* For internal use only.
*
* Defines the set of classes that endpoint scoring models can predict. Endpoint scoring models must
* only predict classes defined within this file. This file is the source of truth for the integer
* representation of each of these classes.
*/
newtype TEndpointType =
TNotASinkType() or
TXssSinkType() or
TNosqlInjectionSinkType() or
TSqlInjectionSinkType() or
TTaintedPathSinkType()
/** A class that can be predicted by endpoint scoring models. */
abstract class EndpointType extends TEndpointType {
abstract string getDescription();
abstract int getEncoding();
string toString() { result = getDescription() }
}
/** The `NotASink` class that can be predicted by endpoint scoring models. */
class NotASinkType extends EndpointType, TNotASinkType {
override string getDescription() { result = "NotASink" }
override int getEncoding() { result = 0 }
}
/** The `XssSink` class that can be predicted by endpoint scoring models. */
class XssSinkType extends EndpointType, TXssSinkType {
override string getDescription() { result = "XssSink" }
override int getEncoding() { result = 1 }
}
/** The `NosqlInjectionSink` class that can be predicted by endpoint scoring models. */
class NosqlInjectionSinkType extends EndpointType, TNosqlInjectionSinkType {
override string getDescription() { result = "NosqlInjectionSink" }
override int getEncoding() { result = 2 }
}
/** The `SqlInjectionSink` class that can be predicted by endpoint scoring models. */
class SqlInjectionSinkType extends EndpointType, TSqlInjectionSinkType {
override string getDescription() { result = "SqlInjectionSink" }
override int getEncoding() { result = 3 }
}
/** The `TaintedPathSink` class that can be predicted by endpoint scoring models. */
class TaintedPathSinkType extends EndpointType, TTaintedPathSinkType {
override string getDescription() { result = "TaintedPathSink" }
override int getEncoding() { result = 4 }
}

View File

@@ -0,0 +1,196 @@
/**
* For internal use only.
*
* Defines a set of reasons why a particular endpoint was filtered out. This set of reasons
* contains both reasons why an endpoint could be `NotASink` and reasons why an endpoint could be
* `LikelyNotASink`. The `NotASinkReason`s defined here are exhaustive, but the
* `LikelyNotASinkReason`s are not exhaustive.
*/
newtype TFilteringReason =
TIsArgumentToBuiltinFunctionReason() or
TLodashUnderscoreArgumentReason() or
TClientRequestReason() or
TPromiseDefinitionReason() or
TCryptographicKeyReason() or
TCryptographicOperationFlowReason() or
TLoggerMethodReason() or
TTimeoutReason() or
TReceiverStorageReason() or
TStringStartsWithReason() or
TStringEndsWithReason() or
TStringRegExpTestReason() or
TEventRegistrationReason() or
TEventDispatchReason() or
TMembershipCandidateTestReason() or
TFileSystemAccessReason() or
TDatabaseAccessReason() or
TDOMReason() or
TNextFunctionCallReason() or
TArgumentToArrayReason() or
TArgumentToBuiltinGlobalVarRefReason() or
TConstantReceiverReason() or
TBuiltinCallNameReason()
/** A reason why a particular endpoint was filtered out by the endpoint filters. */
abstract class FilteringReason extends TFilteringReason {
abstract string getDescription();
abstract int getEncoding();
string toString() { result = getDescription() }
}
/**
* A reason why a particular endpoint might be considered to be `NotASink`.
*
* An endpoint is `NotASink` if it has at least one `NotASinkReason`, it does not have any
* `LikelyNotASinkReason`s, and it is not a known sink.
*/
abstract class NotASinkReason extends FilteringReason { }
/**
* A reason why a particular endpoint might be considered to be `LikelyNotASink`.
*
* An endpoint is `LikelyNotASink` if it has at least one `LikelyNotASinkReason` and it is not a
* known sink.
*/
abstract class LikelyNotASinkReason extends FilteringReason { }
class IsArgumentToBuiltinFunctionReason extends NotASinkReason, TIsArgumentToBuiltinFunctionReason {
override string getDescription() { result = "IsArgumentToBuiltinFunction" }
override int getEncoding() { result = 5 }
}
class LodashUnderscoreArgumentReason extends NotASinkReason, TLodashUnderscoreArgumentReason {
override string getDescription() { result = "LodashUnderscoreArgument" }
override int getEncoding() { result = 6 }
}
class ClientRequestReason extends NotASinkReason, TClientRequestReason {
override string getDescription() { result = "ClientRequest" }
override int getEncoding() { result = 7 }
}
class PromiseDefinitionReason extends NotASinkReason, TPromiseDefinitionReason {
override string getDescription() { result = "PromiseDefinition" }
override int getEncoding() { result = 8 }
}
class CryptographicKeyReason extends NotASinkReason, TCryptographicKeyReason {
override string getDescription() { result = "CryptographicKey" }
override int getEncoding() { result = 9 }
}
class CryptographicOperationFlowReason extends NotASinkReason, TCryptographicOperationFlowReason {
override string getDescription() { result = "CryptographicOperationFlow" }
override int getEncoding() { result = 10 }
}
class LoggerMethodReason extends NotASinkReason, TLoggerMethodReason {
override string getDescription() { result = "LoggerMethod" }
override int getEncoding() { result = 11 }
}
class TimeoutReason extends NotASinkReason, TTimeoutReason {
override string getDescription() { result = "Timeout" }
override int getEncoding() { result = 12 }
}
class ReceiverStorageReason extends NotASinkReason, TReceiverStorageReason {
override string getDescription() { result = "ReceiverStorage" }
override int getEncoding() { result = 13 }
}
class StringStartsWithReason extends NotASinkReason, TStringStartsWithReason {
override string getDescription() { result = "StringStartsWith" }
override int getEncoding() { result = 14 }
}
class StringEndsWithReason extends NotASinkReason, TStringEndsWithReason {
override string getDescription() { result = "StringEndsWith" }
override int getEncoding() { result = 15 }
}
class StringRegExpTestReason extends NotASinkReason, TStringRegExpTestReason {
override string getDescription() { result = "StringRegExpTest" }
override int getEncoding() { result = 16 }
}
class EventRegistrationReason extends NotASinkReason, TEventRegistrationReason {
override string getDescription() { result = "EventRegistration" }
override int getEncoding() { result = 17 }
}
class EventDispatchReason extends NotASinkReason, TEventDispatchReason {
override string getDescription() { result = "EventDispatch" }
override int getEncoding() { result = 18 }
}
class MembershipCandidateTestReason extends NotASinkReason, TMembershipCandidateTestReason {
override string getDescription() { result = "MembershipCandidateTest" }
override int getEncoding() { result = 19 }
}
class FileSystemAccessReason extends NotASinkReason, TFileSystemAccessReason {
override string getDescription() { result = "FileSystemAccess" }
override int getEncoding() { result = 20 }
}
class DatabaseAccessReason extends NotASinkReason, TDatabaseAccessReason {
override string getDescription() { result = "DatabaseAccess" }
override int getEncoding() { result = 21 }
}
class DOMReason extends NotASinkReason, TDOMReason {
override string getDescription() { result = "DOM" }
override int getEncoding() { result = 22 }
}
class NextFunctionCallReason extends NotASinkReason, TNextFunctionCallReason {
override string getDescription() { result = "NextFunctionCall" }
override int getEncoding() { result = 23 }
}
class ArgumentToArrayReason extends LikelyNotASinkReason, TArgumentToArrayReason {
override string getDescription() { result = "ArgumentToArray" }
override int getEncoding() { result = 24 }
}
class ArgumentToBuiltinGlobalVarRefReason extends LikelyNotASinkReason,
TArgumentToBuiltinGlobalVarRefReason {
override string getDescription() { result = "ArgumentToBuiltinGlobalVarRef" }
override int getEncoding() { result = 25 }
}
class ConstantReceiverReason extends NotASinkReason, TConstantReceiverReason {
override string getDescription() { result = "ConstantReceiver" }
override int getEncoding() { result = 26 }
}
class BuiltinCallNameReason extends NotASinkReason, TBuiltinCallNameReason {
override string getDescription() { result = "BuiltinCallName" }
override int getEncoding() { result = 27 }
}

View File

@@ -0,0 +1,178 @@
/**
* For internal use only.
*
* Defines shared code used by the NoSQL injection boosted query.
*/
import javascript
private import semmle.javascript.heuristics.SyntacticHeuristics
private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
private import semmle.javascript.security.TaintedObject
import AdaptiveThreatModeling
private import CoreKnowledge as CoreKnowledge
private import StandardEndpointFilters as StandardEndpointFilters
module SinkEndpointFilter {
/**
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
*
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
* effective sink.
*/
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
(
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
or
// Require NoSQL injection sink candidates to be direct arguments to external library calls.
//
// The standard endpoint filters allow sink candidates which are within object literals or
// array literals, for example `req.sendFile(_, { path: ENDPOINT })`.
//
// However, the NoSQL injection query deals differently with these types of sinks compared to
// other security queries. Other security queries such as SQL injection tend to treat
// `ENDPOINT` as the ground truth sink, but the NoSQL injection query instead treats
// `{ path: ENDPOINT }` as the ground truth sink and defines an additional flow step to ensure
// data flows from `ENDPOINT` to the ground truth sink `{ path: ENDPOINT }`.
//
// Therefore for the NoSQL injection boosted query, we must explicitly ignore sink candidates
// within object literals or array literals, to avoid having multiple alerts for the same
// security vulnerability (one FP where the sink is `ENDPOINT` and one TP where the sink is
// `{ path: ENDPOINT }`).
//
// We use the same reason as in the standard endpoint filters to avoid duplicate reasons for
// endpoints that are neither direct nor indirect arguments to a likely external library call.
not sinkCandidate = StandardEndpointFilters::getALikelyExternalLibraryCall().getAnArgument() and
result = "not an argument to a likely external library call"
or
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
// additional databases accesses that aren't modeled yet
call.(DataFlow::MethodCallNode).getMethodName() =
["create", "createCollection", "createIndexes"] and
result = "matches database access call heuristic"
or
// Remove modeled sinks
CoreKnowledge::isArgumentToKnownLibrarySinkFunction(sinkCandidate) and
result = "modeled sink"
or
// Remove common kinds of unlikely sinks
CoreKnowledge::isKnownStepSrc(sinkCandidate) and
result = "predecessor in a modeled flow step"
or
// Remove modeled database calls. Arguments to modeled calls are very likely to be modeled
// as sinks if they are true positives. Therefore arguments that are not modeled as sinks
// are unlikely to be true positives.
call instanceof DatabaseAccess and
result = "modeled database access"
or
// Remove calls to APIs that aren't relevant to NoSQL injection
call.getReceiver().asExpr() instanceof HTTP::RequestExpr and
result = "receiver is a HTTP request expression"
or
call.getReceiver().asExpr() instanceof HTTP::ResponseExpr and
result = "receiver is a HTTP response expression"
)
) and
not (
// Explicitly allow the following heuristic sinks.
//
// These are copied from the `HeuristicNosqlInjectionSink` class defined within
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
// We can't reuse the class because importing that file would cause us to treat these
// heuristic sinks as known sinks.
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(nosql|query)") or
isArgTo(sinkCandidate, "(?i)(query)")
)
}
}
class NosqlInjectionATMConfig extends ATMConfig {
NosqlInjectionATMConfig() { this = "NosqlInjectionATMConfig" }
override predicate isKnownSource(DataFlow::Node source) {
source instanceof NosqlInjection::Source or TaintedObject::isSource(source, _)
}
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof NosqlInjection::Sink }
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
}
override EndpointType getASinkEndpointType() { result instanceof NosqlInjectionSinkType }
}
/** Holds if src -> trg is an additional flow step in the non-boosted NoSQL injection security query. */
predicate isBaseAdditionalFlowStep(
DataFlow::Node src, DataFlow::Node trg, DataFlow::FlowLabel inlbl, DataFlow::FlowLabel outlbl
) {
TaintedObject::step(src, trg, inlbl, outlbl)
or
// additional flow step to track taint through NoSQL query objects
inlbl = TaintedObject::label() and
outlbl = TaintedObject::label() and
exists(NoSQL::Query query, DataFlow::SourceNode queryObj |
queryObj.flowsToExpr(query) and
queryObj.flowsTo(trg) and
src = queryObj.getAPropertyWrite().getRhs()
)
}
/**
* This predicate allows us to propagate data flow through property writes and array constructors
* within a query object, enabling the security query to pick up NoSQL injection vulnerabilities
* involving more complex queries.
*/
DataFlow::Node getASubexpressionWithinQuery(DataFlow::Node query) {
exists(DataFlow::SourceNode receiver |
receiver.flowsTo(getASubexpressionWithinQuery*(query.getALocalSource())) and
result =
[
receiver.(DataFlow::SourceNode).getAPropertyWrite().getRhs(),
receiver.(DataFlow::ArrayCreationNode).getAnElement()
]
)
}
/**
* A taint-tracking configuration for reasoning about NoSQL injection vulnerabilities.
*
* This is largely a copy of the taint tracking configuration for the standard NoSQL injection
* query, except additional ATM sinks have been added and the additional flow step has been
* generalised to cover the sinks predicted by ATM.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "NosqlInjectionATM" }
override predicate isSource(DataFlow::Node source) { source instanceof NosqlInjection::Source }
override predicate isSource(DataFlow::Node source, DataFlow::FlowLabel label) {
TaintedObject::isSource(source, label)
}
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel label) {
sink.(NosqlInjection::Sink).getAFlowLabel() = label
or
// Allow effective sinks to have any taint label
any(NosqlInjectionATMConfig cfg).isEffectiveSink(sink)
}
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node) or
node instanceof NosqlInjection::Sanitizer
}
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode guard) {
guard instanceof TaintedObject::SanitizerGuard
}
override predicate isAdditionalFlowStep(
DataFlow::Node src, DataFlow::Node trg, DataFlow::FlowLabel inlbl, DataFlow::FlowLabel outlbl
) {
// additional flow steps from the base (non-boosted) security query
isBaseAdditionalFlowStep(src, trg, inlbl, outlbl)
or
// relaxed version of previous step to track taint through unmodeled NoSQL query objects
any(NosqlInjectionATMConfig cfg).isEffectiveSink(trg) and
src = getASubexpressionWithinQuery(trg)
}
}

View File

@@ -0,0 +1,94 @@
/**
* For internal use only.
*
* Defines shared code used by the SQL injection boosted query.
*/
import semmle.javascript.heuristics.SyntacticHeuristics
import semmle.javascript.security.dataflow.SqlInjectionCustomizations
import AdaptiveThreatModeling
import CoreKnowledge as CoreKnowledge
import StandardEndpointFilters as StandardEndpointFilters
/**
* This module provides logic to filter candidate sinks to those which are likely SQL injection
* sinks.
*/
module SinkEndpointFilter {
private import javascript
private import SQL
/**
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
*
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
* effective sink.
*/
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
(
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
or
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
// prepared statements for SQL
any(DataFlow::CallNode cn | cn.getCalleeName() = "prepare")
.getAMethodCall("run")
.getAnArgument() = sinkCandidate and
result = "prepared SQL statement"
or
sinkCandidate instanceof DataFlow::ArrayCreationNode and
result = "array creation"
or
// UI is unrelated to SQL
call.getCalleeName().regexpMatch("(?i).*(render|html).*") and
result = "HTML / rendering"
)
) and
not (
// Explicitly allow the following heuristic sinks.
//
// These are copied from the `HeuristicSqlInjectionSink` class defined within
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
// We can't reuse the class because importing that file would cause us to treat these
// heuristic sinks as known sinks.
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(sql|query)") or
isArgTo(sinkCandidate, "(?i)(query)") or
isConcatenatedWithString(sinkCandidate,
"(?s).*(ALTER|COUNT|CREATE|DATABASE|DELETE|DISTINCT|DROP|FROM|GROUP|INSERT|INTO|LIMIT|ORDER|SELECT|TABLE|UPDATE|WHERE).*")
)
}
}
class SqlInjectionATMConfig extends ATMConfig {
SqlInjectionATMConfig() { this = "SqlInjectionATMConfig" }
override predicate isKnownSource(DataFlow::Node source) { source instanceof SqlInjection::Source }
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof SqlInjection::Sink }
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
}
override EndpointType getASinkEndpointType() { result instanceof SqlInjectionSinkType }
}
/**
* A taint-tracking configuration for reasoning about SQL injection vulnerabilities.
*
* This is largely a copy of the taint tracking configuration for the standard SQL injection
* query, except additional sinks have been added using the sink endpoint filter.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "SqlInjectionATM" }
override predicate isSource(DataFlow::Node source) { source instanceof SqlInjection::Source }
override predicate isSink(DataFlow::Node sink) {
sink instanceof SqlInjection::Sink or any(SqlInjectionATMConfig cfg).isEffectiveSink(sink)
}
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node) or
node instanceof SqlInjection::Sanitizer
}
}

View File

@@ -0,0 +1,137 @@
/**
* For internal use only.
*
* Provides classes and predicates that are useful for endpoint filters.
*
* The standard use of this library is to make use of `isPotentialEffectiveSink/1`
*/
private import javascript
private import semmle.javascript.filters.ClassifyFiles as ClassifyFiles
private import semmle.javascript.heuristics.SyntacticHeuristics
private import CoreKnowledge as CoreKnowledge
/** Provides a set of reasons why a given data flow node should be excluded as a sink candidate. */
string getAReasonSinkExcluded(DataFlow::Node n) {
not flowsToArgumentOfLikelyExternalLibraryCall(n) and
result = "not an argument to a likely external library call"
or
isArgumentToModeledFunction(n) and result = "argument to modeled function"
or
isArgumentToSinklessLibrary(n) and result = "argument to sinkless library"
or
isSanitizer(n) and result = "sanitizer"
or
isPredicate(n) and result = "predicate"
or
isHash(n) and result = "hash"
or
isNumeric(n) and result = "numeric"
or
// Ignore candidate sinks within externs, generated, library, and test code
exists(string category | category = ["externs", "generated", "library", "test"] |
ClassifyFiles::classify(n.getFile(), category) and
result = "in " + category + " file"
)
}
/**
* Holds if the node `n` is an argument to a function that has a manual model.
*/
predicate isArgumentToModeledFunction(DataFlow::Node n) {
exists(DataFlow::InvokeNode invk, DataFlow::Node known |
invk.getAnArgument() = n and invk.getAnArgument() = known and isSomeModeledArgument(known)
)
}
/**
* Holds if the node `n` is an argument that has a manual model.
*/
predicate isSomeModeledArgument(DataFlow::Node n) {
CoreKnowledge::isKnownLibrarySink(n) or
CoreKnowledge::isKnownStepSrc(n) or
CoreKnowledge::isOtherModeledArgument(n, _)
}
/**
* Holds if `n` appears to be a numeric value.
*/
predicate isNumeric(DataFlow::Node n) { isReadFrom(n, ".*index.*") }
/**
* Holds if `n` is an argument to a library without sinks.
*/
predicate isArgumentToSinklessLibrary(DataFlow::Node n) {
exists(DataFlow::InvokeNode invk, DataFlow::SourceNode commonSafeLibrary, string libraryName |
libraryName = ["slugify", "striptags", "marked"]
|
commonSafeLibrary = DataFlow::moduleImport(libraryName) and
invk = [commonSafeLibrary, commonSafeLibrary.getAPropertyRead()].getAnInvocation() and
n = invk.getAnArgument()
)
}
predicate isSanitizer(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call.getCalleeName().regexpMatch("(?i).*(escape|valid(ate)?|sanitize|purify).*")
)
}
predicate isPredicate(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call.getCalleeName().regexpMatch("(equals|(|is|has|can)(_|[A-Z])).*")
)
}
predicate isHash(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call.getCalleeName().regexpMatch("(?i)^(sha\\d*|md5|hash)$")
)
}
/**
* Holds if the data flow node is a (possibly indirect) argument of a likely external library call.
*
* This includes direct arguments of likely external library calls as well as nested object
* literals within those calls.
*/
predicate flowsToArgumentOfLikelyExternalLibraryCall(DataFlow::Node n) {
n = getACallWithoutCallee().getAnArgument()
or
exists(DataFlow::SourceNode src | flowsToArgumentOfLikelyExternalLibraryCall(src) |
n = src.getAPropertyWrite().getRhs()
)
or
exists(DataFlow::ArrayCreationNode arr | flowsToArgumentOfLikelyExternalLibraryCall(arr) |
n = arr.getAnElement()
)
}
/**
* Get calls which are likely to be to external non-built-in libraries.
*/
DataFlow::CallNode getALikelyExternalLibraryCall() { result = getACallWithoutCallee() }
/**
* Gets a node that flows to callback-parameter `p`.
*/
private DataFlow::SourceNode getACallback(DataFlow::ParameterNode p, DataFlow::TypeBackTracker t) {
t.start() and
result = p and
any(DataFlow::FunctionNode f).getLastParameter() = p and
exists(p.getACall())
or
exists(DataFlow::TypeBackTracker t2 | result = getACallback(p, t2).backtrack(t2, t))
}
/**
* Get calls for which we do not have the callee (i.e. the definition of the called function). This
* acts as a heuristic for identifying calls to external library functions.
*/
private DataFlow::CallNode getACallWithoutCallee() {
forall(Function callee | callee = result.getACallee() | callee.getTopLevel().isExterns()) and
not exists(DataFlow::ParameterNode param, DataFlow::FunctionNode callback |
param.flowsTo(result.getCalleeNode()) and
callback = getACallback(param, DataFlow::TypeBackTracker::end())
)
}

View File

@@ -0,0 +1,123 @@
/**
* For internal use only.
*
* Defines shared code used by the path injection boosted query.
*/
import semmle.javascript.heuristics.SyntacticHeuristics
import semmle.javascript.security.dataflow.TaintedPathCustomizations
import AdaptiveThreatModeling
import CoreKnowledge as CoreKnowledge
import StandardEndpointFilters as StandardEndpointFilters
/**
* This module provides logic to filter candidate sinks to those which are likely path injection
* sinks.
*/
module SinkEndpointFilter {
private import javascript
private import TaintedPath
/**
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
*
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
* effective sink.
*/
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate) and
not (
// Explicitly allow the following heuristic sinks.
//
// These are mostly copied from the `HeuristicTaintedPathSink` class defined within
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
// We can't reuse the class because importing that file would cause us to treat these
// heuristic sinks as known sinks.
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(file|folder|dir|absolute)")
or
isArgTo(sinkCandidate, "(?i)(get|read)file")
or
exists(string pathPattern |
// paths with at least two parts, and either a trailing or leading slash
pathPattern = "(?i)([a-z0-9_.-]+/){2,}" or
pathPattern = "(?i)(/[a-z0-9_.-]+){2,}"
|
isConcatenatedWithString(sinkCandidate, pathPattern)
)
or
isConcatenatedWithStrings(".*/", sinkCandidate, "/.*")
or
// In addition to the names from `HeuristicTaintedPathSink` in the
// `isAssignedToOrConcatenatedWith` predicate call above, we also allow the noisier "path"
// name.
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)path")
)
}
}
class TaintedPathATMConfig extends ATMConfig {
TaintedPathATMConfig() { this = "TaintedPathATMConfig" }
override predicate isKnownSource(DataFlow::Node source) { source instanceof TaintedPath::Source }
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof TaintedPath::Sink }
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
}
override EndpointType getASinkEndpointType() { result instanceof TaintedPathSinkType }
}
/**
* A taint-tracking configuration for reasoning about path injection vulnerabilities.
*
* This is largely a copy of the taint tracking configuration for the standard path injection
* query, except additional ATM sinks have been added to the `isSink` predicate.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "TaintedPathATM" }
override predicate isSource(DataFlow::Node source) { source instanceof TaintedPath::Source }
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel label) {
label = sink.(TaintedPath::Sink).getAFlowLabel()
or
// Allow effective sinks to have any taint label
any(TaintedPathATMConfig cfg).isEffectiveSink(sink)
}
override predicate isSanitizer(DataFlow::Node node) { node instanceof TaintedPath::Sanitizer }
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode node) {
node instanceof BarrierGuardNodeAsSanitizerGuardNode
}
override predicate isAdditionalFlowStep(
DataFlow::Node src, DataFlow::Node dst, DataFlow::FlowLabel srclabel,
DataFlow::FlowLabel dstlabel
) {
TaintedPath::isAdditionalTaintedPathFlowStep(src, dst, srclabel, dstlabel)
}
}
/**
* This class provides sanitizer guards for path injection.
*
* The standard library path injection query uses a data flow configuration, and therefore defines
* barrier nodes. However we're using a taint tracking configuration for path injection to find new
* kinds of less certain results. Since taint tracking configurations use sanitizer guards instead
* of barrier guards, we port the barrier guards for the boosted query from the standard library to
* sanitizer guards here.
*/
class BarrierGuardNodeAsSanitizerGuardNode extends TaintTracking::LabeledSanitizerGuardNode {
BarrierGuardNodeAsSanitizerGuardNode() { this instanceof TaintedPath::BarrierGuardNode }
override predicate sanitizes(boolean outcome, Expr e) {
blocks(outcome, e) or blocks(outcome, e, _)
}
override predicate sanitizes(boolean outcome, Expr e, DataFlow::FlowLabel label) {
sanitizes(outcome, e)
}
}

View File

@@ -0,0 +1,103 @@
/**
* For internal use only.
*
* Defines shared code used by the XSS boosted query.
*/
private import semmle.javascript.heuristics.SyntacticHeuristics
private import semmle.javascript.security.dataflow.DomBasedXssCustomizations
import AdaptiveThreatModeling
import CoreKnowledge as CoreKnowledge
import StandardEndpointFilters as StandardEndpointFilters
/**
* This module provides logic to filter candidate sinks to those which are likely XSS sinks.
*/
module SinkEndpointFilter {
private import javascript
private import DomBasedXss
/**
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
*
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
* effective sink.
*/
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
(
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
or
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
call.getCalleeName() = "setState"
) and
result = "setState calls ought to be safe in react applications"
) and
not (
// Explicitly allow the following heuristic sinks.
//
// These are copied from the `HeuristicDomBasedXssSink` class defined within
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
// We can't reuse the class because importing that file would cause us to treat these
// heuristic sinks as known sinks.
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(html|innerhtml)")
or
isArgTo(sinkCandidate, "(?i)(html|render)")
or
sinkCandidate instanceof StringOps::HtmlConcatenationLeaf
or
isConcatenatedWithStrings("(?is).*<[a-z ]+.*", sinkCandidate, "(?s).*>.*")
or
// In addition to the heuristic sinks from `HeuristicDomBasedXssSink`, explicitly allow
// property writes like `elem.innerHTML = <TAINT>` that may not be picked up as HTML
// concatenation leaves.
exists(DataFlow::PropWrite pw |
pw.getPropertyName().regexpMatch("(?i).*html*") and
pw.getRhs() = sinkCandidate
)
)
}
}
class DomBasedXssATMConfig extends ATMConfig {
DomBasedXssATMConfig() { this = "DomBasedXssATMConfig" }
override predicate isKnownSource(DataFlow::Node source) { source instanceof DomBasedXss::Source }
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof DomBasedXss::Sink }
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
}
override EndpointType getASinkEndpointType() { result instanceof XssSinkType }
}
/**
* A taint-tracking configuration for reasoning about XSS vulnerabilities.
*
* This is largely a copy of the taint tracking configuration for the standard XSSThroughDom query,
* except additional ATM sinks have been added to the `isSink` predicate.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "DomBasedXssATMConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof DomBasedXss::Source }
override predicate isSink(DataFlow::Node sink) {
sink instanceof DomBasedXss::Sink or
any(DomBasedXssATMConfig cfg).isEffectiveSink(sink)
}
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node) or
node instanceof DomBasedXss::Sanitizer
}
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode guard) {
guard instanceof DomBasedXss::SanitizerGuard
}
override predicate isSanitizerEdge(DataFlow::Node pred, DataFlow::Node succ) {
DomBasedXss::isOptionallySanitizedEdge(pred, succ)
}
}

View File

@@ -0,0 +1,6 @@
name: codeql/javascript-experimental-atm-lib
version: 0.0.0
extractor: javascript
library: true
dependencies:
codeql/javascript-all: "*"

View File

@@ -0,0 +1,30 @@
/**
* For internal use only.
*
* @name NoSQL database query built from user-controlled sources (boosted)
* @description Building a database query from user-controlled sources is vulnerable to insertion of
* malicious code by the user.
* @kind path-problem
* @scored
* @problem.severity error
* @security-severity 8.8
* @id adaptive-threat-modeling/js/nosql-injection
* @tags experimental experimental/atm security
*/
import ATM::ResultsInfo
import DataFlow::PathGraph
import experimental.adaptivethreatmodeling.NosqlInjectionATM
from
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
string scoreString
where
cfg.hasFlowPath(source, sink) and
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
score = getScoreForFlow(source.getNode(), sink.getNode()) and
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
select sink.getNode(), source, sink,
"[Score = " + scoreString + "] This may be a NoSQL query depending on $@ " +
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
"a user-provided value", score

View File

@@ -0,0 +1,30 @@
/**
* For internal use only.
*
* @name SQL database query built from user-controlled sources (boosted)
* @description Building a database query from user-controlled sources is vulnerable to insertion of
* malicious code by the user.
* @kind path-problem
* @scored
* @problem.severity error
* @security-severity 8.8
* @id adaptive-threat-modeling/js/sql-injection
* @tags experimental experimental/atm security
*/
import experimental.adaptivethreatmodeling.SqlInjectionATM
import ATM::ResultsInfo
import DataFlow::PathGraph
from
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
string scoreString
where
cfg.hasFlowPath(source, sink) and
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
score = getScoreForFlow(source.getNode(), sink.getNode()) and
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
select sink.getNode(), source, sink,
"[Score = " + scoreString + "] This may be a js/sql result depending on $@ " +
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
"a user-provided value", score

View File

@@ -0,0 +1,30 @@
/**
* For internal use only.
*
* @name Uncontrolled data used in path expression (boosted)
* @description Accessing paths influenced by users can allow an attacker to access
* unexpected resources.
* @kind path-problem
* @scored
* @problem.severity error
* @security-severity 7.5
* @id adaptive-threat-modeling/js/path-injection
* @tags experimental experimental/atm security
*/
import ATM::ResultsInfo
import DataFlow::PathGraph
import experimental.adaptivethreatmodeling.TaintedPathATM
from
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
string scoreString
where
cfg.hasFlowPath(source, sink) and
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
score = getScoreForFlow(source.getNode(), sink.getNode()) and
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
select sink.getNode(), source, sink,
"[Score = " + scoreString + "] This may be a js/path-injection result depending on $@ " +
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
"a user-provided value", score

View File

@@ -0,0 +1,31 @@
/**
* For internal use only.
*
* @name Client-side cross-site scripting (boosted)
* @description Writing user input directly to the DOM allows for
* a cross-site scripting vulnerability.
* @kind path-problem
* @scored
* @problem.severity error
* @security-severity 6.1
* @id adaptive-threat-modeling/js/xss
* @tags experimental experimental/atm security
*/
import javascript
import ATM::ResultsInfo
import DataFlow::PathGraph
import experimental.adaptivethreatmodeling.XssATM
from
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
string scoreString
where
cfg.hasFlowPath(source, sink) and
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
score = getScoreForFlow(source.getNode(), sink.getNode()) and
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
select sink.getNode(), source, sink,
"[Score = " + scoreString + "] This may be a js/xss result depending on $@ " +
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
"a user-provided value", score

View File

@@ -0,0 +1,8 @@
- description: ATM boosted Code Scanning queries for JavaScript
- queries: .
- include:
id:
- adaptive-threat-modeling/js/nosql-injection
- adaptive-threat-modeling/js/sql-injection
- adaptive-threat-modeling/js/path-injection
- adaptive-threat-modeling/js/xss

View File

@@ -0,0 +1,4 @@
---
dependencies: {}
compiled: false
lockVersion: 1.0.0

View File

@@ -0,0 +1,7 @@
name: codeql/javascript-experimental-atm-src
language: javascript
version: 0.0.0
suites: codeql-suites
defaultSuiteFile: codeql-suites/javascript-atm-code-scanning.qls
dependencies:
codeql/javascript-experimental-atm-lib: "*"

View File

@@ -0,0 +1 @@
<queries language="javascript"/>

View File

@@ -0,0 +1,2 @@
codescanning
* Problems with extraction that in most cases won't completely break the analysis are now reported as warnings rather than errors.

View File

@@ -1,23 +0,0 @@
/**
* @name Python extraction errors
* @description List all extraction errors for Python files in the source code directory.
* @kind diagnostic
* @id py/diagnostics/extraction-errors
*/
import python
/**
* Gets the SARIF severity for errors.
*
* See point 3.27.10 in https://docs.oasis-open.org/sarif/sarif/v2.0/sarif-v2.0.html for
* what error means.
*/
int getErrorSeverity() { result = 2 }
from SyntaxError error, File file
where
file = error.getFile() and
exists(file.getRelativePath())
select error, "Extraction failed in " + file + " with error " + error.getMessage(),
getErrorSeverity()

View File

@@ -0,0 +1,36 @@
/**
* @name Python extraction warnings
* @description List all extraction warnings for Python files in the source code directory.
* @kind diagnostic
* @id py/diagnostics/extraction-warnings
*/
import python
/**
* Gets the SARIF severity for warnings.
*
* See https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10541338
*/
int getWarningSeverity() { result = 1 }
// The spec
// https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10541338
// defines error and warning as:
//
// "error": A serious problem was found. The condition encountered by the tool resulted
// in the analysis being halted or caused the results to be incorrect or incomplete.
//
// "warning": A problem that is not considered serious was found. The condition
// encountered by the tool is such that it is uncertain whether a problem occurred, or
// is such that the analysis might be incomplete but the results that were generated are
// probably valid.
//
// So SyntaxErrors are reported at the warning level, since analysis might be incomplete
// but the results that were generated are probably valid.
from SyntaxError error, File file
where
file = error.getFile() and
exists(file.getRelativePath())
select error, "Extraction failed in " + file + " with error " + error.getMessage(),
getWarningSeverity()

View File

@@ -1 +0,0 @@
Diagnostics/ExtractionErrors.ql

View File

@@ -1,2 +1,2 @@
| bad_encoding.py:2:11:2:11 | Encoding Error | Extraction failed in bad_encoding.py with error 'utf-8' codec can't decode byte 0x9d in position 87: invalid start byte | 2 |
| syntax_error.py:1:31:1:31 | Syntax Error | Extraction failed in syntax_error.py with error Syntax Error | 2 |
| bad_encoding.py:2:11:2:11 | Encoding Error | Extraction failed in bad_encoding.py with error 'utf-8' codec can't decode byte 0x9d in position 87: invalid start byte | 1 |
| syntax_error.py:1:31:1:31 | Syntax Error | Extraction failed in syntax_error.py with error Syntax Error | 1 |

View File

@@ -0,0 +1 @@
Diagnostics/ExtractionWarnings.ql