mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge branch 'main' of github.com:github/codeql into 'main'
Conflicts: docs/codeql/query-help/codeql-cwe-coverage.rst
This commit is contained in:
@@ -5,5 +5,7 @@
|
||||
"cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml",
|
||||
"*/ql/examples/qlpack.yml",
|
||||
"*/upgrades/qlpack.yml",
|
||||
"javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml",
|
||||
"javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml",
|
||||
"misc/legacy-support/*/qlpack.yml",
|
||||
"misc/suite-helpers/qlpack.yml" ] }
|
||||
|
||||
@@ -126,13 +126,7 @@ class MallocSizeExpr extends BufferAccess, FunctionCall {
|
||||
}
|
||||
|
||||
class NetworkFunctionCall extends FunctionCall {
|
||||
NetworkFunctionCall() {
|
||||
getTarget().hasName("ntohd") or
|
||||
getTarget().hasName("ntohf") or
|
||||
getTarget().hasName("ntohl") or
|
||||
getTarget().hasName("ntohll") or
|
||||
getTarget().hasName("ntohs")
|
||||
}
|
||||
NetworkFunctionCall() { getTarget().hasName(["ntohd", "ntohf", "ntohl", "ntohll", "ntohs"]) }
|
||||
}
|
||||
|
||||
class NetworkToBufferSizeConfiguration extends DataFlow::Configuration {
|
||||
|
||||
@@ -103,12 +103,7 @@ private predicate posixSystemInfo(FunctionCall source, Element use) {
|
||||
// - various filesystem parameters
|
||||
// int uname(struct utsname *buf)
|
||||
// - OS name and version
|
||||
(
|
||||
source.getTarget().hasName("confstr") or
|
||||
source.getTarget().hasName("statvfs") or
|
||||
source.getTarget().hasName("fstatvfs") or
|
||||
source.getTarget().hasName("uname")
|
||||
) and
|
||||
source.getTarget().hasName(["confstr", "statvfs", "fstatvfs", "uname"]) and
|
||||
use = source.getArgument(1)
|
||||
}
|
||||
|
||||
@@ -128,14 +123,9 @@ private predicate posixPWInfo(FunctionCall source, Element use) {
|
||||
// struct group *getgrnam(const char *name);
|
||||
// struct group *getgrgid(gid_t);
|
||||
// struct group *getgrent(void);
|
||||
(
|
||||
source.getTarget().hasName("getpwnam") or
|
||||
source.getTarget().hasName("getpwuid") or
|
||||
source.getTarget().hasName("getpwent") or
|
||||
source.getTarget().hasName("getgrnam") or
|
||||
source.getTarget().hasName("getgrgid") or
|
||||
source.getTarget().hasName("getgrent")
|
||||
) and
|
||||
source
|
||||
.getTarget()
|
||||
.hasName(["getpwnam", "getpwuid", "getpwent", "getgrnam", "getgrgid", "getgrent"]) and
|
||||
use = source
|
||||
or
|
||||
// int getpwnam_r(const char *name, struct passwd *pwd,
|
||||
@@ -146,31 +136,15 @@ private predicate posixPWInfo(FunctionCall source, Element use) {
|
||||
// char *buf, size_t buflen, struct group **result);
|
||||
// int getgrnam_r(const char *name, struct group *grp,
|
||||
// char *buf, size_t buflen, struct group **result);
|
||||
(
|
||||
source.getTarget().hasName("getpwnam_r") or
|
||||
source.getTarget().hasName("getpwuid_r") or
|
||||
source.getTarget().hasName("getgrgid_r") or
|
||||
source.getTarget().hasName("getgrnam_r")
|
||||
) and
|
||||
(
|
||||
use = source.getArgument(1) or
|
||||
use = source.getArgument(2) or
|
||||
use = source.getArgument(4)
|
||||
)
|
||||
source.getTarget().hasName(["getpwnam_r", "getpwuid_r", "getgrgid_r", "getgrnam_r"]) and
|
||||
use = source.getArgument([1, 2, 4])
|
||||
or
|
||||
// int getpwent_r(struct passwd *pwd, char *buffer, size_t bufsize,
|
||||
// struct passwd **result);
|
||||
// int getgrent_r(struct group *gbuf, char *buf,
|
||||
// size_t buflen, struct group **gbufp);
|
||||
(
|
||||
source.getTarget().hasName("getpwent_r") or
|
||||
source.getTarget().hasName("getgrent_r")
|
||||
) and
|
||||
(
|
||||
use = source.getArgument(0) or
|
||||
use = source.getArgument(1) or
|
||||
use = source.getArgument(3)
|
||||
)
|
||||
source.getTarget().hasName(["getpwent_r", "getgrent_r"]) and
|
||||
use = source.getArgument([0, 1, 3])
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -190,13 +164,11 @@ private predicate windowsSystemInfo(FunctionCall source, Element use) {
|
||||
// BOOL WINAPI GetVersionEx(_Inout_ LPOSVERSIONINFO lpVersionInfo);
|
||||
// void WINAPI GetSystemInfo(_Out_ LPSYSTEM_INFO lpSystemInfo);
|
||||
// void WINAPI GetNativeSystemInfo(_Out_ LPSYSTEM_INFO lpSystemInfo);
|
||||
(
|
||||
source.getTarget().hasGlobalName("GetVersionEx") or
|
||||
source.getTarget().hasGlobalName("GetVersionExA") or
|
||||
source.getTarget().hasGlobalName("GetVersionExW") or
|
||||
source.getTarget().hasGlobalName("GetSystemInfo") or
|
||||
source.getTarget().hasGlobalName("GetNativeSystemInfo")
|
||||
) and
|
||||
source
|
||||
.getTarget()
|
||||
.hasGlobalName([
|
||||
"GetVersionEx", "GetVersionExA", "GetVersionExW", "GetSystemInfo", "GetNativeSystemInfo"
|
||||
]) and
|
||||
use = source.getArgument(0)
|
||||
}
|
||||
|
||||
@@ -216,11 +188,11 @@ private predicate windowsFolderPath(FunctionCall source, Element use) {
|
||||
// _In_ int csidl,
|
||||
// _In_ BOOL fCreate
|
||||
// );
|
||||
(
|
||||
source.getTarget().hasGlobalName("SHGetSpecialFolderPath") or
|
||||
source.getTarget().hasGlobalName("SHGetSpecialFolderPathA") or
|
||||
source.getTarget().hasGlobalName("SHGetSpecialFolderPathW")
|
||||
) and
|
||||
source
|
||||
.getTarget()
|
||||
.hasGlobalName([
|
||||
"SHGetSpecialFolderPath", "SHGetSpecialFolderPathA", "SHGetSpecialFolderPathW"
|
||||
]) and
|
||||
use = source.getArgument(1)
|
||||
or
|
||||
// HRESULT SHGetKnownFolderPath(
|
||||
@@ -239,11 +211,7 @@ private predicate windowsFolderPath(FunctionCall source, Element use) {
|
||||
// _In_ DWORD dwFlags,
|
||||
// _Out_ LPTSTR pszPath
|
||||
// );
|
||||
(
|
||||
source.getTarget().hasGlobalName("SHGetFolderPath") or
|
||||
source.getTarget().hasGlobalName("SHGetFolderPathA") or
|
||||
source.getTarget().hasGlobalName("SHGetFolderPathW")
|
||||
) and
|
||||
source.getTarget().hasGlobalName(["SHGetFolderPath", "SHGetFolderPathA", "SHGetFolderPathW"]) and
|
||||
use = source.getArgument(4)
|
||||
or
|
||||
// HRESULT SHGetFolderPathAndSubDir(
|
||||
@@ -254,11 +222,11 @@ private predicate windowsFolderPath(FunctionCall source, Element use) {
|
||||
// _In_ LPCTSTR pszSubDir,
|
||||
// _Out_ LPTSTR pszPath
|
||||
// );
|
||||
(
|
||||
source.getTarget().hasGlobalName("SHGetFolderPathAndSubDir") or
|
||||
source.getTarget().hasGlobalName("SHGetFolderPathAndSubDirA") or
|
||||
source.getTarget().hasGlobalName("SHGetFolderPathAndSubDirW")
|
||||
) and
|
||||
source
|
||||
.getTarget()
|
||||
.hasGlobalName([
|
||||
"SHGetFolderPathAndSubDir", "SHGetFolderPathAndSubDirA", "SHGetFolderPathAndSubDirW"
|
||||
]) and
|
||||
use = source.getArgument(5)
|
||||
}
|
||||
|
||||
@@ -273,11 +241,7 @@ class WindowsFolderPath extends SystemData {
|
||||
}
|
||||
|
||||
private predicate logonUser(FunctionCall source, VariableAccess use) {
|
||||
(
|
||||
source.getTarget().hasGlobalName("LogonUser") or
|
||||
source.getTarget().hasGlobalName("LogonUserW") or
|
||||
source.getTarget().hasGlobalName("LogonUserA")
|
||||
) and
|
||||
source.getTarget().hasGlobalName(["LogonUser", "LogonUserW", "LogonUserA"]) and
|
||||
use = source.getAnArgument()
|
||||
}
|
||||
|
||||
@@ -297,11 +261,7 @@ private predicate regQuery(FunctionCall source, VariableAccess use) {
|
||||
// _Out_opt_ LPTSTR lpValue,
|
||||
// _Inout_opt_ PLONG lpcbValue
|
||||
// );
|
||||
(
|
||||
source.getTarget().hasGlobalName("RegQueryValue") or
|
||||
source.getTarget().hasGlobalName("RegQueryValueA") or
|
||||
source.getTarget().hasGlobalName("RegQueryValueW")
|
||||
) and
|
||||
source.getTarget().hasGlobalName(["RegQueryValue", "RegQueryValueA", "RegQueryValueW"]) and
|
||||
use = source.getArgument(2)
|
||||
or
|
||||
// LONG WINAPI RegQueryMultipleValues(
|
||||
@@ -311,11 +271,11 @@ private predicate regQuery(FunctionCall source, VariableAccess use) {
|
||||
// _Out_opt_ LPTSTR lpValueBuf,
|
||||
// _Inout_opt_ LPDWORD ldwTotsize
|
||||
// );
|
||||
(
|
||||
source.getTarget().hasGlobalName("RegQueryMultipleValues") or
|
||||
source.getTarget().hasGlobalName("RegQueryMultipleValuesA") or
|
||||
source.getTarget().hasGlobalName("RegQueryMultipleValuesW")
|
||||
) and
|
||||
source
|
||||
.getTarget()
|
||||
.hasGlobalName([
|
||||
"RegQueryMultipleValues", "RegQueryMultipleValuesA", "RegQueryMultipleValuesW"
|
||||
]) and
|
||||
use = source.getArgument(3)
|
||||
or
|
||||
// LONG WINAPI RegQueryValueEx(
|
||||
@@ -326,11 +286,7 @@ private predicate regQuery(FunctionCall source, VariableAccess use) {
|
||||
// _Out_opt_ LPBYTE lpData,
|
||||
// _Inout_opt_ LPDWORD lpcbData
|
||||
// );
|
||||
(
|
||||
source.getTarget().hasGlobalName("RegQueryValueEx") or
|
||||
source.getTarget().hasGlobalName("RegQueryValueExA") or
|
||||
source.getTarget().hasGlobalName("RegQueryValueExW")
|
||||
) and
|
||||
source.getTarget().hasGlobalName(["RegQueryValueEx", "RegQueryValueExA", "RegQueryValueExW"]) and
|
||||
use = source.getArgument(4)
|
||||
or
|
||||
// LONG WINAPI RegGetValue(
|
||||
@@ -342,11 +298,7 @@ private predicate regQuery(FunctionCall source, VariableAccess use) {
|
||||
// _Out_opt_ PVOID pvData,
|
||||
// _Inout_opt_ LPDWORD pcbData
|
||||
// );
|
||||
(
|
||||
source.getTarget().hasGlobalName("RegGetValue") or
|
||||
source.getTarget().hasGlobalName("RegGetValueA") or
|
||||
source.getTarget().hasGlobalName("RegGetValueW")
|
||||
) and
|
||||
source.getTarget().hasGlobalName(["RegGetValue", "RegGetValueA", "RegGetValueW"]) and
|
||||
use = source.getArgument(5)
|
||||
}
|
||||
|
||||
@@ -408,12 +360,7 @@ private predicate socketOutput(FunctionCall call, Expr data) {
|
||||
// const struct sockaddr *dest_addr, socklen_t addrlen);
|
||||
// ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags);
|
||||
// int write(int handle, void *buffer, int nbyte);
|
||||
(
|
||||
call.getTarget().hasGlobalName("send") or
|
||||
call.getTarget().hasGlobalName("sendto") or
|
||||
call.getTarget().hasGlobalName("sendmsg") or
|
||||
call.getTarget().hasGlobalName("write")
|
||||
) and
|
||||
call.getTarget().hasGlobalName(["send", "sendto", "sendmsg", "write"]) and
|
||||
data = call.getArgument(1) and
|
||||
socketFileDescriptor(call.getArgument(0))
|
||||
)
|
||||
|
||||
@@ -44,14 +44,13 @@ class SetuidLikeWrapperCall extends FunctionCall {
|
||||
|
||||
class CallBeforeSetuidFunctionCall extends FunctionCall {
|
||||
CallBeforeSetuidFunctionCall() {
|
||||
(
|
||||
getTarget().hasGlobalName("setgid") or
|
||||
getTarget().hasGlobalName("setresgid") or
|
||||
// Compatibility may require skipping initgroups and setgroups return checks.
|
||||
// A stricter best practice is to check the result and errnor for EPERM.
|
||||
getTarget().hasGlobalName("initgroups") or
|
||||
getTarget().hasGlobalName("setgroups")
|
||||
) and
|
||||
getTarget()
|
||||
.hasGlobalName([
|
||||
"setgid", "setresgid",
|
||||
// Compatibility may require skipping initgroups and setgroups return checks.
|
||||
// A stricter best practice is to check the result and errnor for EPERM.
|
||||
"initgroups", "setgroups"
|
||||
]) and
|
||||
// setgid/setresgid/etc with the root group are false positives.
|
||||
not argumentMayBeRoot(getArgument(0))
|
||||
}
|
||||
|
||||
@@ -15,13 +15,7 @@ import cpp
|
||||
|
||||
from Element u, ArithmeticType at
|
||||
where
|
||||
(
|
||||
at.hasName("int") or
|
||||
at.hasName("short") or
|
||||
at.hasName("long") or
|
||||
at.hasName("float") or
|
||||
at.hasName("double")
|
||||
) and
|
||||
at.hasName(["int", "short", "long", "float", "double"]) and
|
||||
u = at.getATypeNameUse() and
|
||||
not at instanceof WideCharType
|
||||
select u, "AV Rule 209: The basic types of int, short, long, float and double shall not be used."
|
||||
|
||||
@@ -552,11 +552,16 @@ private predicate defaultDynamicConversion(Type fromType, Type toType) {
|
||||
fromType instanceof RefType and toType instanceof DynamicType
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate systemDelegateBaseType(RefType t) {
|
||||
t = any(SystemDelegateClass c).getABaseType*()
|
||||
}
|
||||
|
||||
// This is a deliberate, small cartesian product, so we have manually lifted it to force the
|
||||
// evaluator to evaluate it in its entirety, rather than trying to optimize it in context.
|
||||
pragma[noinline]
|
||||
private predicate defaultDelegateConversion(RefType fromType, RefType toType) {
|
||||
fromType instanceof DelegateType and toType = any(SystemDelegateClass c).getABaseType*()
|
||||
fromType instanceof DelegateType and systemDelegateBaseType(toType)
|
||||
}
|
||||
|
||||
private predicate convRefTypeRefType(RefType fromType, RefType toType) {
|
||||
|
||||
@@ -7,7 +7,7 @@ QL packs are used to organize the files used in CodeQL analysis. They
|
||||
contain queries, library files, query suites, and important metadata.
|
||||
|
||||
The `CodeQL repository <https://github.com/github/codeql>`__ contains QL packs for
|
||||
C/C++, C#, Java, JavaScript, and Python. The `CodeQL for Go
|
||||
C/C++, C#, Java, JavaScript, Python, and Ruby. The `CodeQL for Go
|
||||
<https://github.com/github/codeql-go/>`__ repository contains a QL pack for Go
|
||||
analysis. You can also make custom QL packs to contain your own queries and
|
||||
libraries.
|
||||
|
||||
@@ -88,15 +88,15 @@ Creating databases for non-compiled languages
|
||||
---------------------------------------------
|
||||
|
||||
The CodeQL CLI includes extractors to create databases for non-compiled
|
||||
languages---specifically, JavaScript (and TypeScript) and Python. These
|
||||
extractors are automatically invoked when you specify JavaScript or Python as
|
||||
languages---specifically, JavaScript (and TypeScript), Python, and Ruby. These
|
||||
extractors are automatically invoked when you specify JavaScript, Python, or Ruby as
|
||||
the ``--language`` option when executing ``database create``. When creating
|
||||
databases for these languages you must ensure that all additional dependencies
|
||||
are available.
|
||||
|
||||
.. pull-quote:: Important
|
||||
|
||||
When you run ``database create`` for JavaScript, TypeScript, and Python, you should not
|
||||
When you run ``database create`` for JavaScript, TypeScript, Python, and Ruby, you should not
|
||||
specify a ``--command`` option. Otherwise this overrides the normal
|
||||
extractor invocation, which will create an empty database. If you create
|
||||
databases for multiple languages and one of them is a compiled language,
|
||||
@@ -116,6 +116,8 @@ Here, we have specified a ``--source-root`` path, which is the location where
|
||||
database creation is executed, but is not necessarily the checkout root of the
|
||||
codebase.
|
||||
|
||||
By default, files in ``node_modules`` and ``bower_components`` directories are not extracted.
|
||||
|
||||
Python
|
||||
~~~~~~
|
||||
|
||||
@@ -127,14 +129,25 @@ When creating databases for Python you must ensure:
|
||||
packages that the codebase depends on.
|
||||
- You have installed the `virtualenv <https://pypi.org/project/virtualenv/>`__ pip module.
|
||||
|
||||
In the command line you must specify ``--language=python``. For example
|
||||
In the command line you must specify ``--language=python``. For example::
|
||||
::
|
||||
|
||||
codeql database create --language=python <output-folder>/python-database
|
||||
|
||||
executes the ``database create`` subcommand from the code's checkout root,
|
||||
This executes the ``database create`` subcommand from the code's checkout root,
|
||||
generating a new Python database at ``<output-folder>/python-database``.
|
||||
|
||||
Ruby
|
||||
~~~~
|
||||
|
||||
Creating databases for Ruby requires no additional dependencies.
|
||||
In the command line you must specify ``--language=ruby``. For example::
|
||||
|
||||
codeql database create --language=ruby --source-root <folder-to-extract> <output-folder>/ruby-database
|
||||
|
||||
Here, we have specified a ``--source-root`` path, which is the location where
|
||||
database creation is executed, but is not necessarily the checkout root of the
|
||||
codebase.
|
||||
|
||||
Creating databases for compiled languages
|
||||
-----------------------------------------
|
||||
|
||||
@@ -100,7 +100,7 @@ further options on the command line.
|
||||
|
||||
The `CodeQL repository <https://github.com/github/codeql>`__ contains
|
||||
the queries and libraries required for CodeQL analysis of C/C++, C#, Java,
|
||||
JavaScript/TypeScript, and Python.
|
||||
JavaScript/TypeScript, Python, and Ruby.
|
||||
Clone a copy of this repository into ``codeql-home``.
|
||||
|
||||
By default, the root of the cloned repository will be called ``codeql``.
|
||||
|
||||
@@ -78,7 +78,7 @@ Using the starter workspace
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
The starter workspace is a Git repository. It contains:
|
||||
|
||||
* The `repository of CodeQL libraries and queries <https://github.com/github/codeql>`__ for C/C++, C#, Java, JavaScript, and Python. This is included as a submodule, so it can be updated without affecting your custom queries.
|
||||
* The `repository of CodeQL libraries and queries <https://github.com/github/codeql>`__ for C/C++, C#, Java, JavaScript, Python, and Ruby. This is included as a submodule, so it can be updated without affecting your custom queries.
|
||||
* The `repository of CodeQL libraries and queries <https://github.com/github/codeql-go>`__ for Go. This is also included as a submodule.
|
||||
* A series of folders named ``codeql-custom-queries-<language>``. These are ready for you to start developing your own custom queries for each language, using the standard libraries. There are some example queries to get you started.
|
||||
|
||||
|
||||
@@ -14,3 +14,5 @@ Experiment and learn how to write effective and efficient queries for CodeQL dat
|
||||
- :doc:`Basic query for Ruby code <basic-query-for-ruby-code>`: Learn to write and run a simple CodeQL query using LGTM.
|
||||
|
||||
- :doc:`CodeQL library for Ruby <codeql-library-for-ruby>`: When you're analyzing a Ruby program, you can make use of the large collection of classes in the CodeQL library for Ruby.
|
||||
|
||||
.. include:: ../reusables/ruby-beta-note.rst
|
||||
|
||||
@@ -1333,7 +1333,7 @@ The values of a set literal expression are all the values of all the contained e
|
||||
|
||||
Set literals are supported from release 2.1.0 of the CodeQL CLI, and release 1.24 of LGTM Enterprise.
|
||||
|
||||
Since release 2.6.3 of the CodeQL CLI, and release 1.28 of LGTM Enterprise, a trailing comma is allowed in a set literal.
|
||||
Since release 2.7.0 of the CodeQL CLI, and release 1.28 of LGTM Enterprise, a trailing comma is allowed in a set literal.
|
||||
|
||||
Disambiguation of expressions
|
||||
-----------------------------
|
||||
|
||||
@@ -34,3 +34,5 @@ Note that the CWE coverage includes both "`supported queries <https://github.com
|
||||
javascript-cwe
|
||||
python-cwe
|
||||
ruby-cwe
|
||||
|
||||
.. include:: ../reusables/ruby-beta-note.rst
|
||||
|
||||
@@ -11,7 +11,6 @@ View the query help for the queries included in the ``code-scanning``, ``securit
|
||||
- :doc:`CodeQL query help for Python <python>`
|
||||
- :doc:`CodeQL query help for Ruby <ruby>`
|
||||
|
||||
|
||||
.. pull-quote:: Information
|
||||
|
||||
Each query help article includes:
|
||||
@@ -24,6 +23,8 @@ View the query help for the queries included in the ``code-scanning``, ``securit
|
||||
|
||||
For a full list of the CWEs covered by these queries, see ":doc:`CodeQL CWE coverage <codeql-cwe-coverage>`."
|
||||
|
||||
.. include:: ../reusables/ruby-beta-note.rst
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:titlesonly:
|
||||
|
||||
@@ -15,4 +15,6 @@
|
||||
* - JavaScript/TypeScript
|
||||
- ``javascript``
|
||||
* - Python
|
||||
- ``python``
|
||||
- ``python``
|
||||
* - Ruby
|
||||
- ``ruby``
|
||||
4
docs/codeql/reusables/ruby-beta-note.rst
Normal file
4
docs/codeql/reusables/ruby-beta-note.rst
Normal file
@@ -0,0 +1,4 @@
|
||||
.. pull-quote:: Note
|
||||
|
||||
CodeQL analysis for Ruby is currently in beta. During the beta, analysis of Ruby code,
|
||||
and the accompanying documentation, will not be as comprehensive as for other languages.
|
||||
@@ -22,7 +22,8 @@
|
||||
Eclipse compiler for Java (ECJ) [5]_",``.java``
|
||||
JavaScript,ECMAScript 2021 or lower,Not applicable,"``.js``, ``.jsx``, ``.mjs``, ``.es``, ``.es6``, ``.htm``, ``.html``, ``.xhm``, ``.xhtml``, ``.vue``, ``.json``, ``.yaml``, ``.yml``, ``.raml``, ``.xml`` [6]_"
|
||||
Python,"2.7, 3.5, 3.6, 3.7, 3.8, 3.9",Not applicable,``.py``
|
||||
TypeScript [7]_,"2.6-4.4",Standard TypeScript compiler,"``.ts``, ``.tsx``"
|
||||
Ruby [7]_,"up to 3.02",Not applicable,"``.rb``, ``.erb``, ``.gemspec``, ``Gemfile``"
|
||||
TypeScript [8]_,"2.6-4.4",Standard TypeScript compiler,"``.ts``, ``.tsx``"
|
||||
|
||||
.. container:: footnote-group
|
||||
|
||||
@@ -32,4 +33,5 @@
|
||||
.. [4] Builds that execute on Java 7 to 16 can be analyzed. The analysis understands Java 16 standard language features.
|
||||
.. [5] ECJ is supported when the build invokes it via the Maven Compiler plugin or the Takari Lifecycle plugin.
|
||||
.. [6] JSX and Flow code, YAML, JSON, HTML, and XML files may also be analyzed with JavaScript files.
|
||||
.. [7] TypeScript analysis is performed by running the JavaScript extractor with TypeScript enabled. This is the default for LGTM.
|
||||
.. [7] Requires glibc 2.17.
|
||||
.. [8] TypeScript analysis is performed by running the JavaScript extractor with TypeScript enabled. This is the default for LGTM.
|
||||
|
||||
@@ -116,7 +116,7 @@ Declaring sources and sinks
|
||||
You must provide information about the ``source`` and ``sink`` in your path query. These are objects that correspond to the nodes of the paths that you are exploring.
|
||||
The name and the type of the ``source`` and the ``sink`` must be declared in the ``from`` statement of the query, and the types must be compatible with the nodes of the graph computed by the ``edges`` predicate.
|
||||
|
||||
If you are querying C/C++, C#, Java, or JavaScript code (and you have used ``import DataFlow::PathGraph`` in your query), the definitions of the ``source`` and ``sink`` are accessed via the ``Configuration`` class in the data flow library. You should declare all three of these objects in the ``from`` statement.
|
||||
If you are querying C/C++, C#, Java, JavaScript, Python, or Ruby code (and you have used ``import DataFlow::PathGraph`` in your query), the definitions of the ``source`` and ``sink`` are accessed via the ``Configuration`` class in the data flow library. You should declare all three of these objects in the ``from`` statement.
|
||||
For example:
|
||||
|
||||
.. code-block:: ql
|
||||
|
||||
@@ -104,7 +104,9 @@ private class ContainerFlowSummaries extends SummaryModelCsv {
|
||||
"java.util;Map$Entry;true;setValue;;;Argument[0];MapValue of Argument[-1];value",
|
||||
"java.lang;Iterable;true;iterator;();;Element of Argument[-1];Element of ReturnValue;value",
|
||||
"java.lang;Iterable;true;spliterator;();;Element of Argument[-1];Element of ReturnValue;value",
|
||||
"java.lang;Iterable;true;forEach;(Consumer);;Element of Argument[-1];Parameter[0] of Argument[0];value",
|
||||
"java.util;Iterator;true;next;;;Element of Argument[-1];ReturnValue;value",
|
||||
"java.util;Iterator;true;forEachRemaining;(Consumer);;Element of Argument[-1];Parameter[0] of Argument[0];value",
|
||||
"java.util;ListIterator;true;previous;;;Element of Argument[-1];ReturnValue;value",
|
||||
"java.util;ListIterator;true;add;(Object);;Argument[0];Element of Argument[-1];value",
|
||||
"java.util;ListIterator;true;set;(Object);;Argument[0];Element of Argument[-1];value",
|
||||
@@ -135,6 +137,8 @@ private class ContainerFlowSummaries extends SummaryModelCsv {
|
||||
"java.util;Map;true;merge;(Object,Object,BiFunction);;Argument[1];MapValue of Argument[-1];value",
|
||||
"java.util;Map;true;putAll;(Map);;MapKey of Argument[0];MapKey of Argument[-1];value",
|
||||
"java.util;Map;true;putAll;(Map);;MapValue of Argument[0];MapValue of Argument[-1];value",
|
||||
"java.util;Map;true;forEach;(BiConsumer);;MapKey of Argument[-1];Parameter[0] of Argument[0];value",
|
||||
"java.util;Map;true;forEach;(BiConsumer);;MapValue of Argument[-1];Parameter[1] of Argument[0];value",
|
||||
"java.util;Collection;true;parallelStream;();;Element of Argument[-1];Element of ReturnValue;value",
|
||||
"java.util;Collection;true;stream;();;Element of Argument[-1];Element of ReturnValue;value",
|
||||
"java.util;Collection;true;toArray;;;Element of Argument[-1];ArrayElement of ReturnValue;value",
|
||||
|
||||
@@ -25,5 +25,57 @@ public class Test {
|
||||
Iterator<String> it = m.values().iterator();
|
||||
String x5 = it.next();
|
||||
sink(x5); // Flow
|
||||
|
||||
it.forEachRemaining(x6 -> {
|
||||
sink(x6); // Flow
|
||||
});
|
||||
|
||||
m.forEach((x7_k, x8_v) -> {
|
||||
sink(x7_k); // No flow
|
||||
sink(x8_v); // Flow
|
||||
});
|
||||
|
||||
m.entrySet().forEach(entry -> {
|
||||
String x9 = entry.getKey();
|
||||
String x10 = entry.getValue();
|
||||
sink(x9); // No flow
|
||||
sink(x10); // Flow
|
||||
});
|
||||
}
|
||||
|
||||
public void run2() {
|
||||
HashMap<String, String> m = new HashMap<>();
|
||||
|
||||
m.put(tainted, tainted);
|
||||
|
||||
m.forEach((x11_k, x12_v) -> {
|
||||
sink(x11_k); // Flow
|
||||
sink(x12_v); // Flow
|
||||
});
|
||||
|
||||
m.entrySet().forEach(entry -> {
|
||||
String x13 = entry.getKey();
|
||||
String x14 = entry.getValue();
|
||||
sink(x13); // Flow
|
||||
sink(x14); // Flow
|
||||
});
|
||||
}
|
||||
|
||||
public void run3() {
|
||||
Set<String> s = new HashSet<>();
|
||||
String x15 = s.iterator().next();
|
||||
sink(x15); // No flow
|
||||
|
||||
s.forEach(x16 -> {
|
||||
sink(x16); // No flow
|
||||
});
|
||||
|
||||
s.add(tainted);
|
||||
String x17 = s.iterator().next();
|
||||
sink(x17); // Flow
|
||||
|
||||
s.forEach(x18 -> {
|
||||
sink(x18); // Flow
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,3 +2,12 @@
|
||||
| Test.java:13:18:13:24 | tainted | Test.java:18:10:18:11 | x3 |
|
||||
| Test.java:13:18:13:24 | tainted | Test.java:22:12:22:13 | x4 |
|
||||
| Test.java:13:18:13:24 | tainted | Test.java:27:10:27:11 | x5 |
|
||||
| Test.java:13:18:13:24 | tainted | Test.java:30:12:30:13 | x6 |
|
||||
| Test.java:13:18:13:24 | tainted | Test.java:35:12:35:15 | x8_v |
|
||||
| Test.java:13:18:13:24 | tainted | Test.java:42:12:42:14 | x10 |
|
||||
| Test.java:49:11:49:17 | tainted | Test.java:52:12:52:16 | x11_k |
|
||||
| Test.java:49:11:49:17 | tainted | Test.java:59:12:59:14 | x13 |
|
||||
| Test.java:49:20:49:26 | tainted | Test.java:53:12:53:16 | x12_v |
|
||||
| Test.java:49:20:49:26 | tainted | Test.java:60:12:60:14 | x14 |
|
||||
| Test.java:73:11:73:17 | tainted | Test.java:75:10:75:12 | x17 |
|
||||
| Test.java:73:11:73:17 | tainted | Test.java:78:12:78:14 | x18 |
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
# [Internal only] Adaptive Threat Modeling for JavaScript
|
||||
|
||||
This directory contains CodeQL libraries and queries that power adaptive threat modeling for JavaScript.
|
||||
All APIs are experimental and may change in the future.
|
||||
|
||||
These queries can only be run by internal users; for external users they will return no results.
|
||||
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Configures boosting for adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
private import javascript as raw
|
||||
import EndpointTypes
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* A configuration class for defining known endpoints and endpoint filters for adaptive threat
|
||||
* modeling (ATM). Each boosted query must define its own extension of this abstract class.
|
||||
*
|
||||
* A configuration defines a set of known sources (`isKnownSource`) and sinks (`isKnownSink`).
|
||||
* It must also define a sink endpoint filter (`isEffectiveSink`) that filters candidate sinks
|
||||
* predicted by the machine learning model to a set of effective sinks.
|
||||
*
|
||||
* To get started with ATM, you can copy-paste an implementation of the relevant predicates from a
|
||||
* `DataFlow::Configuration` or `TaintTracking::Configuration` class for a standard security query.
|
||||
* For example, for SQL injection you can start by defining the `isKnownSource` and `isKnownSink`
|
||||
* predicates in the ATM configuration by copying and pasting the implementations of `isSource` and
|
||||
* `isSink` from `SqlInjection::Configuration`.
|
||||
*
|
||||
* Note that if the security query configuration defines additional edges beyond the standard data
|
||||
* flow edges, such as `NosqlInjection::Configuration`, you may need to replace the definition of
|
||||
* `isAdditionalFlowStep` with a more generalised definition of additional edges. See
|
||||
* `NosqlInjectionATM.qll` for an example of doing this.
|
||||
*/
|
||||
abstract class ATMConfig extends string {
|
||||
bindingset[this]
|
||||
ATMConfig() { any() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if `source` is a known source of flow.
|
||||
*/
|
||||
predicate isKnownSource(raw::DataFlow::Node source) { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if `sink` is a known sink of flow.
|
||||
*/
|
||||
predicate isKnownSink(raw::DataFlow::Node sink) { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if the candidate source `candidateSource` predicted by the machine learning model should be
|
||||
* an effective source, i.e. one considered as a possible source of flow in the boosted query.
|
||||
*/
|
||||
predicate isEffectiveSource(raw::DataFlow::Node candidateSource) { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if the candidate sink `candidateSink` predicted by the machine learning model should be
|
||||
* an effective sink, i.e. one considered as a possible sink of flow in the boosted query.
|
||||
*/
|
||||
predicate isEffectiveSink(raw::DataFlow::Node candidateSink) { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if the candidate sink `candidateSink` predicted by the machine learning model should be
|
||||
* an effective sink that overrides the score provided by the machine learning model with the
|
||||
* score `score` for reason `why`. The effective sinks identified by this predicate MUST be a
|
||||
* subset of those identified by the `isEffectiveSink` predicate.
|
||||
*
|
||||
* For example, in the ATM external API query, we use this method to ensure the ATM external API
|
||||
* query produces the same results as the standard external API query, but assigns flows
|
||||
* involving sinks that are filtered out by the endpoint filters a score of 0.
|
||||
*
|
||||
* This predicate can be phased out once we no longer need to rely on predicates like
|
||||
* `paddedScore` in the ATM CodeQL libraries to add scores to alert messages in a way that works
|
||||
* with lexical sort orders.
|
||||
*/
|
||||
predicate isEffectiveSinkWithOverridingScore(
|
||||
raw::DataFlow::Node candidateSink, float score, string why
|
||||
) {
|
||||
none()
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Get an endpoint type for the sources of this query. A query may have multiple applicable
|
||||
* endpoint types for its sources.
|
||||
*/
|
||||
EndpointType getASourceEndpointType() { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Get an endpoint type for the sinks of this query. A query may have multiple applicable
|
||||
* endpoint types for its sinks.
|
||||
*/
|
||||
EndpointType getASinkEndpointType() { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Specifies the default cut-off value that controls how many alerts are produced.
|
||||
* The cut-off value must be in the range [0,1].
|
||||
* A cut-off value of 0 only produces alerts that are likely true-positives.
|
||||
* A cut-off value of 1 produces all alerts including those that are likely false-positives.
|
||||
*/
|
||||
float getScoreCutoff() { result = 0.0 }
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Provides information about the results of boosted queries for use in adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
private import javascript as raw
|
||||
private import raw::DataFlow as DataFlow
|
||||
import ATMConfig
|
||||
private import BaseScoring
|
||||
private import EndpointScoring as EndpointScoring
|
||||
|
||||
module ATM {
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* This module contains informational predicates about the results returned by adaptive threat
|
||||
* modeling (ATM).
|
||||
*/
|
||||
module ResultsInfo {
|
||||
/**
|
||||
* Indicates whether the flow from source to sink represents a result with
|
||||
* sufficiently high likelihood of being a true-positive.
|
||||
*/
|
||||
pragma[inline]
|
||||
private predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
|
||||
any(ScoringResults results).shouldResultBeIncluded(source, sink)
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Returns the score for the flow between the source `source` and the `sink` sink in the
|
||||
* boosted query.
|
||||
*/
|
||||
pragma[inline]
|
||||
float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
any(DataFlow::Configuration cfg).hasFlow(source, sink) and
|
||||
shouldResultBeIncluded(source, sink) and
|
||||
result = unique(float s | s = any(ScoringResults results).getScoreForFlow(source, sink))
|
||||
}
|
||||
|
||||
/**
|
||||
* Pad a score returned from `getKnownScoreForFlow` to a particular length by adding a decimal
|
||||
* point if one does not already exist, and "0"s after that decimal point.
|
||||
*
|
||||
* Note that this predicate must itself define an upper bound on `length`, so that it has a
|
||||
* finite number of results. Currently this is defined as 12.
|
||||
*/
|
||||
private string paddedScore(float score, int length) {
|
||||
// In this definition, we must restrict the values that `length` and `score` can take on so
|
||||
// that the predicate has a finite number of results.
|
||||
(score = getScoreForFlow(_, _) or score = 0) and
|
||||
length = result.length() and
|
||||
(
|
||||
// We need to make sure the padded score contains a "." so lexically sorting the padded
|
||||
// scores is equivalent to numerically sorting the scores.
|
||||
score.toString().charAt(_) = "." and
|
||||
result = score.toString()
|
||||
or
|
||||
not score.toString().charAt(_) = "." and
|
||||
result = score.toString() + "."
|
||||
)
|
||||
or
|
||||
result = paddedScore(score, length - 1) + "0" and
|
||||
length <= 12
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Return a string representing the score of the flow between `source` and `sink` in the
|
||||
* boosted query.
|
||||
*
|
||||
* The returned string is a fixed length, such that lexically sorting the strings returned by
|
||||
* this predicate gives the same sort order as numerically sorting the scores of the flows.
|
||||
*/
|
||||
pragma[inline]
|
||||
string getScoreStringForFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
exists(float score |
|
||||
score = getScoreForFlow(source, sink) and
|
||||
(
|
||||
// A length of 12 is equivalent to 10 decimal places.
|
||||
score.toString().length() >= 12 and
|
||||
result = score.toString().substring(0, 12)
|
||||
or
|
||||
score.toString().length() < 12 and
|
||||
result = paddedScore(score, 12)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Indicates whether the flow from source to sink is likely to be reported by the base security
|
||||
* query.
|
||||
*
|
||||
* Currently this is a heuristic: it ignores potential differences in the definitions of
|
||||
* additional flow steps.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate isFlowLikelyInBaseQuery(DataFlow::Node source, DataFlow::Node sink) {
|
||||
getCfg().isKnownSource(source) and getCfg().isKnownSink(sink)
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Get additional information about why ATM included the flow from source to sink as an alert.
|
||||
*/
|
||||
pragma[inline]
|
||||
string getAdditionalAlertInfo(DataFlow::Node source, DataFlow::Node sink) {
|
||||
exists(string sourceOrigins, string sinkOrigins |
|
||||
sourceOrigins = concat(any(ScoringResults results).getASourceOrigin(source), ", ") and
|
||||
sinkOrigins = concat(any(ScoringResults results).getASinkOrigin(sink), ", ") and
|
||||
result =
|
||||
"[Source origins: " +
|
||||
any(string s | if sourceOrigins != "" then s = sourceOrigins else s = "unknown") +
|
||||
"; sink origins: " +
|
||||
any(string s | if sinkOrigins != "" then s = sinkOrigins else s = "unknown") + "]"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Provides shared scoring functionality for use in adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
private import javascript
|
||||
private import ATMConfig
|
||||
|
||||
external predicate adaptiveThreatModelingModels(
|
||||
string modelChecksum, string modelLanguage, string modelName, string modelType
|
||||
);
|
||||
|
||||
/** Get the ATM configuration. */
|
||||
ATMConfig getCfg() { any() }
|
||||
|
||||
/**
|
||||
* This module provides functionality that takes an endpoint and provides an entity that encloses that
|
||||
* endpoint and is suitable for similarity analysis.
|
||||
*/
|
||||
module EndpointToEntity {
|
||||
private import CodeToFeatures
|
||||
|
||||
/**
|
||||
* Get an entity enclosing the endpoint that is suitable for similarity analysis. In general,
|
||||
* this may associate multiple entities to a single endpoint.
|
||||
*/
|
||||
DatabaseFeatures::Entity getAnEntityForEndpoint(DataFlow::Node endpoint) {
|
||||
DatabaseFeatures::entities(result, _, _, _, _, _, _, _, _) and
|
||||
result.getDefinedFunction() = endpoint.getContainer().getEnclosingContainer*()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This module provides functionality that takes an entity and provides effective endpoints within
|
||||
* that entity.
|
||||
*
|
||||
* We use the following terminology to describe endpoints:
|
||||
*
|
||||
* - The *candidate* endpoints are the set of data flow nodes that should be passed to the
|
||||
* appropriate endpoint filter to produce the set of effective endpoints.
|
||||
* When we have a model that beats the performance of the baseline, we will likely define the
|
||||
* candidate endpoints based on the most confident predictions of the model.
|
||||
* - An *effective* endpoint is a candidate endpoint which passes through the endpoint filter.
|
||||
* In other words, it is a candidate endpoint for which the `isEffectiveSink` (or
|
||||
* `isEffectiveSource`) predicate defined in the `ATMConfig` instance in scope holds.
|
||||
*/
|
||||
module EntityToEffectiveEndpoint {
|
||||
private import CodeToFeatures
|
||||
|
||||
/**
|
||||
* Returns endpoint candidates within the specified entities.
|
||||
*
|
||||
* The baseline implementation of this is that a candidate endpoint is any data flow node that is
|
||||
* enclosed within the specified entity.
|
||||
*/
|
||||
private DataFlow::Node getABaselineEndpointCandidate(DatabaseFeatures::Entity entity) {
|
||||
result.getContainer().getEnclosingContainer*() = entity.getDefinedFunction()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an effective source enclosed by the specified entity.
|
||||
*
|
||||
* N.B. This is _not_ an inverse of `EndpointToEntity::getAnEntityForEndpoint`: the effective
|
||||
* source may occur in a function defined within the specified entity.
|
||||
*/
|
||||
DataFlow::Node getAnEffectiveSource(DatabaseFeatures::Entity entity) {
|
||||
result = getABaselineEndpointCandidate(entity) and
|
||||
getCfg().isEffectiveSource(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an effective sink enclosed by the specified entity.
|
||||
*
|
||||
* N.B. This is _not_ an inverse of `EndpointToEntity::getAnEntityForEndpoint`: the effective
|
||||
* sink may occur in a function defined within the specified entity.
|
||||
*/
|
||||
DataFlow::Node getAnEffectiveSink(DatabaseFeatures::Entity entity) {
|
||||
result = getABaselineEndpointCandidate(entity) and
|
||||
getCfg().isEffectiveSink(result)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scoring information produced by a scoring model.
|
||||
*
|
||||
* Scoring models include embedding models and endpoint scoring models.
|
||||
*/
|
||||
abstract class ScoringResults extends string {
|
||||
bindingset[this]
|
||||
ScoringResults() { any() }
|
||||
|
||||
/**
|
||||
* Get ATM's confidence that a path between `source` and `sink` represents a security
|
||||
* vulnerability. This will be a number between 0.0 and 1.0.
|
||||
*/
|
||||
abstract float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink);
|
||||
|
||||
/**
|
||||
* Get a string representing why ATM included the given source in the dataflow analysis.
|
||||
*
|
||||
* In general, there may be multiple reasons why ATM included the given source, in which case
|
||||
* this predicate should have multiple results.
|
||||
*/
|
||||
abstract string getASourceOrigin(DataFlow::Node source);
|
||||
|
||||
/**
|
||||
* Get a string representing why ATM included the given sink in the dataflow analysis.
|
||||
*
|
||||
* In general, there may be multiple reasons why ATM included the given sink, in which case this
|
||||
* predicate should have multiple results.
|
||||
*/
|
||||
abstract string getASinkOrigin(DataFlow::Node sink);
|
||||
|
||||
/**
|
||||
* Indicates whether the flow from source to sink represents a result with
|
||||
* sufficiently high likelihood of being a true-positive.
|
||||
*/
|
||||
pragma[inline]
|
||||
abstract predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink);
|
||||
}
|
||||
@@ -0,0 +1,444 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Extracts data about the functions in the database for use in adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
module Raw {
|
||||
private import javascript as raw
|
||||
|
||||
class RawAstNode = raw::ASTNode;
|
||||
|
||||
class Entity = raw::Function;
|
||||
|
||||
class Location = raw::Location;
|
||||
|
||||
/**
|
||||
* Exposed as a tool for defining anchors for semantic search.
|
||||
*/
|
||||
class UnderlyingFunction = raw::Function;
|
||||
|
||||
/**
|
||||
* Determines whether an entity should be omitted from ATM.
|
||||
*/
|
||||
predicate isEntityIgnored(Entity entity) {
|
||||
// Ignore entities which don't have definitions, for example those in TypeScript
|
||||
// declaration files.
|
||||
not exists(entity.getBody())
|
||||
or
|
||||
// Ignore entities with an empty body, for example the JavaScript function () => {}.
|
||||
entity.getNumBodyStmt() = 0 and not exists(entity.getAReturnedExpr())
|
||||
}
|
||||
|
||||
newtype WrappedAstNode = TAstNode(RawAstNode rawNode)
|
||||
|
||||
/**
|
||||
* This class represents nodes in the AST.
|
||||
*/
|
||||
class AstNode extends TAstNode {
|
||||
RawAstNode rawNode;
|
||||
|
||||
AstNode() { this = TAstNode(rawNode) }
|
||||
|
||||
AstNode getAChildNode() { result = TAstNode(rawNode.getAChild()) }
|
||||
|
||||
AstNode getParentNode() { result = TAstNode(rawNode.getParent()) }
|
||||
|
||||
/**
|
||||
* Holds if the AST node has `result` as its `index`th attribute.
|
||||
*
|
||||
* The index is not intended to mean anything, and is only here for disambiguation.
|
||||
* There are no guarantees about any particular index being used (or not being used).
|
||||
*/
|
||||
string astNodeAttribute(int index) {
|
||||
(
|
||||
// NB: Unary and binary operator expressions e.g. -a, a + b and compound
|
||||
// assignments e.g. a += b can be identified by the expression type.
|
||||
result = rawNode.(raw::Identifier).getName()
|
||||
or
|
||||
// Computed property accesses for which we can predetermine the property being accessed.
|
||||
// NB: May alias with operators e.g. could have '+' as a property name.
|
||||
result = rawNode.(raw::IndexExpr).getPropertyName()
|
||||
or
|
||||
// We use `getRawValue` to give us distinct representations for `0xa`, `0xA`, and `10`.
|
||||
result = rawNode.(raw::NumberLiteral).getRawValue()
|
||||
or
|
||||
// We use `getValue` rather than `getRawValue` so we assign `"a"` and `'a'` the same representation.
|
||||
not rawNode instanceof raw::NumberLiteral and
|
||||
result = rawNode.(raw::Literal).getValue()
|
||||
or
|
||||
result = rawNode.(raw::TemplateElement).getRawValue()
|
||||
) and
|
||||
index = 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string indicating the "type" of the AST node.
|
||||
*/
|
||||
string astNodeType() {
|
||||
// The definition of this method should correspond with that of the `@ast_node` entry in the
|
||||
// dbscheme.
|
||||
result = "js_exprs." + any(int kind | exprs(rawNode, kind, _, _, _))
|
||||
or
|
||||
result = "js_properties." + any(int kind | properties(rawNode, _, _, kind, _))
|
||||
or
|
||||
result = "js_stmts." + any(int kind | stmts(rawNode, kind, _, _, _))
|
||||
or
|
||||
result = "js_toplevel" and rawNode instanceof raw::TopLevel
|
||||
or
|
||||
result = "js_typeexprs." + any(int kind | typeexprs(rawNode, kind, _, _, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `result` is the `index`'th child of the AST node, for some arbitrary indexing.
|
||||
* A root of the AST should be its own child, with an arbitrary (though conventionally
|
||||
* 0) index.
|
||||
*
|
||||
* Notably, the order in which child nodes are visited is not required to be meaningful,
|
||||
* and no particular index is required to be meaningful. However, `(parent, index)`
|
||||
* should be a keyset.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
AstNode astNodeChild(int index) {
|
||||
result =
|
||||
rank[index - 1](AstNode child, raw::Location l |
|
||||
child = this.getAChildNode() and l = child.getLocation()
|
||||
|
|
||||
child
|
||||
order by
|
||||
l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn(),
|
||||
child.astNodeType()
|
||||
)
|
||||
or
|
||||
not exists(result.getParentNode()) and this = result and index = 0
|
||||
}
|
||||
|
||||
raw::Location getLocation() { result = rawNode.getLocation() }
|
||||
|
||||
string toString() { result = rawNode.toString() }
|
||||
|
||||
predicate isEntityNameNode(Entity entity) {
|
||||
exists(int index |
|
||||
TAstNode(entity) = getParentNode() and
|
||||
this = getParentNode().astNodeChild(index) and
|
||||
// An entity name node must be the first child of the entity.
|
||||
index = min(int otherIndex | exists(getParentNode().astNodeChild(otherIndex))) and
|
||||
entity.getName() = rawNode.(raw::VarDecl).getName()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `result` is the `index`'th child of the `parent` entity. Such
|
||||
* a node is a root of an AST associated with this entity.
|
||||
*/
|
||||
AstNode entityChild(AstNode parent, int index) {
|
||||
// In JavaScript, entities appear in the AST parent/child relationship.
|
||||
result = parent.astNodeChild(index)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` is contained in `entity`. Note that a single node may be contained
|
||||
* in multiple entities, if they are nested. An entity, in particular, should be
|
||||
* reported as contained within itself.
|
||||
*/
|
||||
predicate entityContains(Entity entity, AstNode node) {
|
||||
node.getParentNode*() = TAstNode(entity) and not node.isEntityNameNode(entity)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the name of the entity.
|
||||
*
|
||||
* We attempt to assign unnamed entities approximate names if they are passed to a likely
|
||||
* external library function. If we can't assign them an approximate name, we give them the name
|
||||
* `""`, so that these entities are included in `AdaptiveThreatModeling.qll`.
|
||||
*
|
||||
* For entities which have multiple names, we choose the lexically smallest name.
|
||||
*/
|
||||
string getEntityName(Entity entity) {
|
||||
if exists(entity.getName())
|
||||
then
|
||||
// https://github.com/github/ml-ql-adaptive-threat-modeling/issues/244 discusses making use
|
||||
// of all the names during training.
|
||||
result = min(entity.getName())
|
||||
else
|
||||
if exists(getApproximateNameForEntity(entity))
|
||||
then result = getApproximateNameForEntity(entity)
|
||||
else result = ""
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the call `call` has `entity` is its `argumentIndex`th argument.
|
||||
*/
|
||||
private predicate entityUsedAsArgumentToCall(
|
||||
Entity entity, raw::DataFlow::CallNode call, int argumentIndex
|
||||
) {
|
||||
raw::DataFlow::localFlowStep*(call.getArgument(argumentIndex), entity.flow())
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a generated name for the entity. This name is generated such that
|
||||
* entities with the same names have similar behaviour.
|
||||
*/
|
||||
private string getApproximateNameForEntity(Entity entity) {
|
||||
count(raw::DataFlow::CallNode call, int index | entityUsedAsArgumentToCall(entity, call, index)) =
|
||||
1 and
|
||||
exists(raw::DataFlow::CallNode call, int index, string basePart |
|
||||
entityUsedAsArgumentToCall(entity, call, index) and
|
||||
(
|
||||
if count(getReceiverName(call)) = 1
|
||||
then basePart = getReceiverName(call) + "."
|
||||
else basePart = ""
|
||||
) and
|
||||
result = basePart + call.getCalleeName() + "#functionalargument"
|
||||
)
|
||||
}
|
||||
|
||||
private string getReceiverName(raw::DataFlow::CallNode call) {
|
||||
result = call.getReceiver().asExpr().(raw::VarAccess).getName()
|
||||
}
|
||||
|
||||
/** Consistency checks: these predicates should each have no results */
|
||||
module Consistency {
|
||||
/** `getEntityName` should assign each entity a single name. */
|
||||
query predicate entityWithManyNames(Entity entity, string name) {
|
||||
name = getEntityName(entity) and
|
||||
count(getEntityName(entity)) > 1
|
||||
}
|
||||
|
||||
query predicate nodeWithNoType(AstNode node) { not exists(node.astNodeType()) }
|
||||
|
||||
query predicate nodeWithManyTypes(AstNode node, string type) {
|
||||
type = node.astNodeType() and
|
||||
count(node.astNodeType()) > 1
|
||||
}
|
||||
|
||||
query predicate nodeWithNoParent(AstNode node, string type) {
|
||||
not node = any(AstNode parent).astNodeChild(_) and
|
||||
type = node.astNodeType() and
|
||||
not exists(RawAstNode rawNode | node = TAstNode(rawNode) and rawNode instanceof raw::Module)
|
||||
}
|
||||
|
||||
query predicate duplicateChildIndex(AstNode parent, int index, AstNode child) {
|
||||
child = parent.astNodeChild(index) and
|
||||
count(parent.astNodeChild(index)) > 1
|
||||
}
|
||||
|
||||
query predicate duplicateAttributeIndex(AstNode node, int index) {
|
||||
exists(node.astNodeAttribute(index)) and
|
||||
count(node.astNodeAttribute(index)) > 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module Wrapped {
|
||||
/*
|
||||
* We require any node with attributes to be a leaf. Where a non-leaf node
|
||||
* has an attribute, we instead create a synthetic leaf node that has that
|
||||
* attribute.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Holds if the AST node `e` is a leaf node.
|
||||
*/
|
||||
private predicate isLeaf(Raw::AstNode e) { not exists(e.astNodeChild(_)) }
|
||||
|
||||
newtype WrappedEntity =
|
||||
TEntity(Raw::Entity entity) {
|
||||
exists(entity.getLocation().getFile().getRelativePath()) and
|
||||
Raw::entityContains(entity, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* A type ranging over the kinds of entities for which we want to consider embeddings.
|
||||
*/
|
||||
class Entity extends WrappedEntity {
|
||||
Raw::Entity rawEntity;
|
||||
|
||||
Entity() { this = TEntity(rawEntity) and not Raw::isEntityIgnored(rawEntity) }
|
||||
|
||||
string getName() { result = Raw::getEntityName(rawEntity) }
|
||||
|
||||
AstNode getAstRoot(int index) {
|
||||
result = TAstNode(rawEntity, Raw::entityChild(Raw::TAstNode(rawEntity), index))
|
||||
}
|
||||
|
||||
string toString() { result = rawEntity.toString() }
|
||||
|
||||
Raw::Location getLocation() { result = rawEntity.getLocation() }
|
||||
|
||||
Raw::UnderlyingFunction getDefinedFunction() { result = rawEntity }
|
||||
}
|
||||
|
||||
newtype WrappedAstNode =
|
||||
TAstNode(Raw::Entity enclosingEntity, Raw::AstNode node) {
|
||||
Raw::entityContains(enclosingEntity, node)
|
||||
} or
|
||||
TSyntheticNode(
|
||||
Raw::Entity enclosingEntity, Raw::AstNode node, int syntheticChildIndex, int attrIndex
|
||||
) {
|
||||
Raw::entityContains(enclosingEntity, node) and
|
||||
exists(node.astNodeAttribute(attrIndex)) and
|
||||
not isLeaf(node) and
|
||||
if exists(node.astNodeChild(_))
|
||||
then
|
||||
syntheticChildIndex =
|
||||
attrIndex - min(int other | exists(node.astNodeAttribute(other))) +
|
||||
max(int other | exists(node.astNodeChild(other))) + 1
|
||||
else syntheticChildIndex = attrIndex
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private AstNode injectedChild(Raw::Entity enclosingEntity, Raw::AstNode parent, int index) {
|
||||
result = TAstNode(enclosingEntity, parent.astNodeChild(index)) or
|
||||
result = TSyntheticNode(enclosingEntity, parent, index, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* A type ranging over AST nodes. Ultimately, only nodes contained in entities will
|
||||
* be considered.
|
||||
*/
|
||||
class AstNode extends WrappedAstNode {
|
||||
Raw::Entity enclosingEntity;
|
||||
Raw::AstNode rawNode;
|
||||
|
||||
AstNode() {
|
||||
(
|
||||
this = TAstNode(enclosingEntity, rawNode) or
|
||||
this = TSyntheticNode(enclosingEntity, rawNode, _, _)
|
||||
) and
|
||||
not Raw::isEntityIgnored(enclosingEntity)
|
||||
}
|
||||
|
||||
string getAttribute(int index) {
|
||||
result = rawNode.astNodeAttribute(index) and
|
||||
not exists(TSyntheticNode(enclosingEntity, rawNode, _, index))
|
||||
}
|
||||
|
||||
string getType() { result = rawNode.astNodeType() }
|
||||
|
||||
AstNode getChild(int index) { result = injectedChild(enclosingEntity, rawNode, index) }
|
||||
|
||||
string toString() { result = getType() }
|
||||
|
||||
Raw::Location getLocation() { result = rawNode.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthetic AST node, created to be a leaf for an otherwise non-leaf attribute.
|
||||
*/
|
||||
class SyntheticAstNode extends AstNode, TSyntheticNode {
|
||||
int childIndex;
|
||||
int attributeIndex;
|
||||
|
||||
SyntheticAstNode() {
|
||||
this = TSyntheticNode(enclosingEntity, rawNode, childIndex, attributeIndex)
|
||||
}
|
||||
|
||||
override string getAttribute(int index) {
|
||||
result = rawNode.astNodeAttribute(attributeIndex) and index = attributeIndex
|
||||
}
|
||||
|
||||
override string getType() {
|
||||
result = rawNode.astNodeType() + "::<synthetic " + childIndex + ">"
|
||||
}
|
||||
|
||||
override AstNode getChild(int index) { none() }
|
||||
}
|
||||
}
|
||||
|
||||
module DatabaseFeatures {
|
||||
/**
|
||||
* Exposed as a tool for defining anchors for semantic search.
|
||||
*/
|
||||
class UnderlyingFunction = Raw::UnderlyingFunction;
|
||||
|
||||
private class Location = Raw::Location;
|
||||
|
||||
private newtype TEntityOrAstNode =
|
||||
TEntity(Wrapped::Entity entity) or
|
||||
TAstNode(Wrapped::AstNode astNode)
|
||||
|
||||
class EntityOrAstNode extends TEntityOrAstNode {
|
||||
abstract string getType();
|
||||
|
||||
abstract string toString();
|
||||
|
||||
abstract Location getLocation();
|
||||
}
|
||||
|
||||
class Entity extends EntityOrAstNode, TEntity {
|
||||
Wrapped::Entity entity;
|
||||
|
||||
Entity() { this = TEntity(entity) }
|
||||
|
||||
string getName() { result = entity.getName() }
|
||||
|
||||
AstNode getAstRoot(int index) { result = TAstNode(entity.getAstRoot(index)) }
|
||||
|
||||
override string getType() { result = "javascript function" }
|
||||
|
||||
override string toString() { result = "Entity: " + getName() }
|
||||
|
||||
override Location getLocation() { result = entity.getLocation() }
|
||||
|
||||
UnderlyingFunction getDefinedFunction() { result = entity.getDefinedFunction() }
|
||||
}
|
||||
|
||||
class AstNode extends EntityOrAstNode, TAstNode {
|
||||
Wrapped::AstNode rawNode;
|
||||
|
||||
AstNode() { this = TAstNode(rawNode) }
|
||||
|
||||
AstNode getChild(int index) { result = TAstNode(rawNode.getChild(index)) }
|
||||
|
||||
string getAttribute(int index) { result = rawNode.getAttribute(index) }
|
||||
|
||||
override string getType() { result = rawNode.getType() }
|
||||
|
||||
override string toString() { result = this.getType() }
|
||||
|
||||
override Location getLocation() { result = rawNode.getLocation() }
|
||||
}
|
||||
|
||||
/** Consistency checks: these predicates should each have no results */
|
||||
module Consistency {
|
||||
query predicate nonLeafAttribute(AstNode node, int index, string attribute) {
|
||||
attribute = node.getAttribute(index) and
|
||||
exists(node.getChild(_))
|
||||
}
|
||||
}
|
||||
|
||||
query predicate entities(
|
||||
Entity entity, string entity_name, string entity_type, string path, int startLine,
|
||||
int startColumn, int endLine, int endColumn, string absolutePath
|
||||
) {
|
||||
entity_name = entity.getName() and
|
||||
entity_type = entity.getType() and
|
||||
exists(Location l | l = entity.getLocation() |
|
||||
path = l.getFile().getRelativePath() and
|
||||
absolutePath = l.getFile().getAbsolutePath() and
|
||||
l.hasLocationInfo(_, startLine, startColumn, endLine, endColumn)
|
||||
)
|
||||
}
|
||||
|
||||
query predicate astNodes(
|
||||
Entity enclosingEntity, EntityOrAstNode parent, int index, AstNode node, string node_type
|
||||
) {
|
||||
node = enclosingEntity.getAstRoot(index) and
|
||||
parent = enclosingEntity and
|
||||
node_type = node.getType()
|
||||
or
|
||||
astNodes(enclosingEntity, _, _, parent, _) and
|
||||
node = parent.(AstNode).getChild(index) and
|
||||
node_type = node.getType()
|
||||
}
|
||||
|
||||
query predicate nodeAttributes(AstNode node, string attr) {
|
||||
// Only get attributes of AST nodes we extract.
|
||||
// This excludes nodes in standard libraries since the standard library files
|
||||
// are located outside the source root.
|
||||
astNodes(_, _, _, node, _) and
|
||||
attr = node.getAttribute(_)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,208 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Provides predicates that expose the knowledge of models
|
||||
* in the core CodeQL JavaScript libraries.
|
||||
*/
|
||||
|
||||
private import javascript
|
||||
private import semmle.javascript.security.dataflow.XxeCustomizations
|
||||
private import semmle.javascript.security.dataflow.RemotePropertyInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.TypeConfusionThroughParameterTamperingCustomizations
|
||||
private import semmle.javascript.security.dataflow.ZipSlipCustomizations
|
||||
private import semmle.javascript.security.dataflow.TaintedPathCustomizations
|
||||
private import semmle.javascript.security.dataflow.CleartextLoggingCustomizations
|
||||
private import semmle.javascript.security.dataflow.XpathInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.Xss::Shared as Xss
|
||||
private import semmle.javascript.security.dataflow.StackTraceExposureCustomizations
|
||||
private import semmle.javascript.security.dataflow.ClientSideUrlRedirectCustomizations
|
||||
private import semmle.javascript.security.dataflow.CodeInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.RequestForgeryCustomizations
|
||||
private import semmle.javascript.security.dataflow.CorsMisconfigurationForCredentialsCustomizations
|
||||
private import semmle.javascript.security.dataflow.ShellCommandInjectionFromEnvironmentCustomizations
|
||||
private import semmle.javascript.security.dataflow.DifferentKindsComparisonBypassCustomizations
|
||||
private import semmle.javascript.security.dataflow.CommandInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.PrototypePollutionCustomizations
|
||||
private import semmle.javascript.security.dataflow.UnvalidatedDynamicMethodCallCustomizations
|
||||
private import semmle.javascript.security.dataflow.TaintedFormatStringCustomizations
|
||||
private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.PostMessageStarCustomizations
|
||||
private import semmle.javascript.security.dataflow.RegExpInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.SqlInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.InsecureRandomnessCustomizations
|
||||
private import semmle.javascript.security.dataflow.XmlBombCustomizations
|
||||
private import semmle.javascript.security.dataflow.InsufficientPasswordHashCustomizations
|
||||
private import semmle.javascript.security.dataflow.HardcodedCredentialsCustomizations
|
||||
private import semmle.javascript.security.dataflow.FileAccessToHttpCustomizations
|
||||
private import semmle.javascript.security.dataflow.UnsafeDynamicMethodAccessCustomizations
|
||||
private import semmle.javascript.security.dataflow.UnsafeDeserializationCustomizations
|
||||
private import semmle.javascript.security.dataflow.HardcodedDataInterpretedAsCodeCustomizations
|
||||
private import semmle.javascript.security.dataflow.ServerSideUrlRedirectCustomizations
|
||||
private import semmle.javascript.security.dataflow.IndirectCommandInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.ConditionalBypassCustomizations
|
||||
private import semmle.javascript.security.dataflow.HttpToFileAccessCustomizations
|
||||
private import semmle.javascript.security.dataflow.BrokenCryptoAlgorithmCustomizations
|
||||
private import semmle.javascript.security.dataflow.LoopBoundInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.CleartextStorageCustomizations
|
||||
import FilteringReasons
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is a known sink in a modeled library, or a sibling-argument of such a sink.
|
||||
*/
|
||||
predicate isArgumentToKnownLibrarySinkFunction(DataFlow::Node n) {
|
||||
exists(DataFlow::InvokeNode invk, DataFlow::Node known |
|
||||
invk.getAnArgument() = n and invk.getAnArgument() = known and isKnownLibrarySink(known)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is a known sink for the external API security query.
|
||||
*
|
||||
* This corresponds to known sinks from security queries whose sources include remote flow and
|
||||
* DOM-based sources.
|
||||
*/
|
||||
predicate isKnownExternalAPIQuerySink(DataFlow::Node n) {
|
||||
n instanceof Xxe::Sink or
|
||||
n instanceof TaintedPath::Sink or
|
||||
n instanceof XpathInjection::Sink or
|
||||
n instanceof Xss::Sink or
|
||||
n instanceof ClientSideUrlRedirect::Sink or
|
||||
n instanceof CodeInjection::Sink or
|
||||
n instanceof RequestForgery::Sink or
|
||||
n instanceof CorsMisconfigurationForCredentials::Sink or
|
||||
n instanceof CommandInjection::Sink or
|
||||
n instanceof PrototypePollution::Sink or
|
||||
n instanceof UnvalidatedDynamicMethodCall::Sink or
|
||||
n instanceof TaintedFormatString::Sink or
|
||||
n instanceof NosqlInjection::Sink or
|
||||
n instanceof PostMessageStar::Sink or
|
||||
n instanceof RegExpInjection::Sink or
|
||||
n instanceof SqlInjection::Sink or
|
||||
n instanceof XmlBomb::Sink or
|
||||
n instanceof ZipSlip::Sink or
|
||||
n instanceof UnsafeDeserialization::Sink or
|
||||
n instanceof ServerSideUrlRedirect::Sink or
|
||||
n instanceof CleartextStorage::Sink or
|
||||
n instanceof HttpToFileAccess::Sink
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is a known sink in a modeled library.
|
||||
*/
|
||||
predicate isKnownLibrarySink(DataFlow::Node n) {
|
||||
isKnownExternalAPIQuerySink(n) or
|
||||
n instanceof CleartextLogging::Sink or
|
||||
n instanceof StackTraceExposure::Sink or
|
||||
n instanceof ShellCommandInjectionFromEnvironment::Sink or
|
||||
n instanceof InsecureRandomness::Sink or
|
||||
n instanceof FileAccessToHttp::Sink or
|
||||
n instanceof IndirectCommandInjection::Sink
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is known as the predecessor in a modeled flow step.
|
||||
*/
|
||||
predicate isKnownStepSrc(DataFlow::Node n) {
|
||||
any(TaintTracking::AdditionalTaintStep s).step(n, _) or
|
||||
any(DataFlow::AdditionalFlowStep s).step(n, _) or
|
||||
any(DataFlow::AdditionalFlowStep s).step(n, _, _, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` is an argument to a function of a builtin object.
|
||||
*/
|
||||
private predicate isArgumentToBuiltinFunction(DataFlow::Node n, FilteringReason reason) {
|
||||
exists(DataFlow::SourceNode builtin, DataFlow::SourceNode receiver, DataFlow::InvokeNode invk |
|
||||
(
|
||||
builtin instanceof DataFlow::ArrayCreationNode and
|
||||
reason instanceof ArgumentToArrayReason
|
||||
or
|
||||
builtin =
|
||||
DataFlow::globalVarRef([
|
||||
"Map", "Set", "WeakMap", "WeakSet", "Number", "Object", "String", "Array", "Error",
|
||||
"Math", "Boolean"
|
||||
]) and
|
||||
reason instanceof ArgumentToBuiltinGlobalVarRefReason
|
||||
)
|
||||
|
|
||||
receiver = [builtin.getAnInvocation(), builtin] and
|
||||
invk = [receiver, receiver.getAPropertyRead()].getAnInvocation() and
|
||||
invk.getAnArgument() = n
|
||||
)
|
||||
or
|
||||
exists(Expr primitive, MethodCallExpr c |
|
||||
primitive instanceof ConstantString or
|
||||
primitive instanceof NumberLiteral or
|
||||
primitive instanceof BooleanLiteral
|
||||
|
|
||||
c.calls(primitive, _) and
|
||||
c.getAnArgument() = n.asExpr() and
|
||||
reason instanceof ConstantReceiverReason
|
||||
)
|
||||
or
|
||||
exists(DataFlow::CallNode call |
|
||||
call.getAnArgument() = n and
|
||||
call.getCalleeName() =
|
||||
[
|
||||
"indexOf", "hasOwnProperty", "substring", "isDecimal", "decode", "encode", "keys", "shift",
|
||||
"values", "forEach", "toString", "slice", "splice", "push", "isArray", "sort"
|
||||
] and
|
||||
reason instanceof BuiltinCallNameReason
|
||||
)
|
||||
}
|
||||
|
||||
predicate isOtherModeledArgument(DataFlow::Node n, FilteringReason reason) {
|
||||
isArgumentToBuiltinFunction(n, reason)
|
||||
or
|
||||
any(LodashUnderscore::Member m).getACall().getAnArgument() = n and
|
||||
reason instanceof LodashUnderscoreArgumentReason
|
||||
or
|
||||
exists(ClientRequest r |
|
||||
r.getAnArgument() = n or n = r.getUrl() or n = r.getHost() or n = r.getADataNode()
|
||||
) and
|
||||
reason instanceof ClientRequestReason
|
||||
or
|
||||
exists(PromiseDefinition p |
|
||||
n = [p.getResolveParameter(), p.getRejectParameter()].getACall().getAnArgument()
|
||||
) and
|
||||
reason instanceof PromiseDefinitionReason
|
||||
or
|
||||
n instanceof CryptographicKey and reason instanceof CryptographicKeyReason
|
||||
or
|
||||
any(CryptographicOperation op).getInput().flow() = n and
|
||||
reason instanceof CryptographicOperationFlowReason
|
||||
or
|
||||
exists(DataFlow::CallNode call | n = call.getAnArgument() |
|
||||
call.getCalleeName() = getAStandardLoggerMethodName() and
|
||||
reason instanceof LoggerMethodReason
|
||||
or
|
||||
call.getCalleeName() = ["setTimeout", "clearTimeout"] and
|
||||
reason instanceof TimeoutReason
|
||||
or
|
||||
call.getReceiver() = DataFlow::globalVarRef(["localStorage", "sessionStorage"]) and
|
||||
reason instanceof ReceiverStorageReason
|
||||
or
|
||||
call instanceof StringOps::StartsWith and reason instanceof StringStartsWithReason
|
||||
or
|
||||
call instanceof StringOps::EndsWith and reason instanceof StringEndsWithReason
|
||||
or
|
||||
call instanceof StringOps::RegExpTest and reason instanceof StringRegExpTestReason
|
||||
or
|
||||
call instanceof EventRegistration and reason instanceof EventRegistrationReason
|
||||
or
|
||||
call instanceof EventDispatch and reason instanceof EventDispatchReason
|
||||
or
|
||||
call = any(MembershipCandidate c).getTest() and
|
||||
reason instanceof MembershipCandidateTestReason
|
||||
or
|
||||
call instanceof FileSystemAccess and reason instanceof FileSystemAccessReason
|
||||
or
|
||||
call instanceof DatabaseAccess and reason instanceof DatabaseAccessReason
|
||||
or
|
||||
call = DOM::domValueRef() and reason instanceof DOMReason
|
||||
or
|
||||
call.getCalleeName() = "next" and
|
||||
exists(DataFlow::FunctionNode f | call = f.getLastParameter().getACall()) and
|
||||
reason instanceof NextFunctionCallReason
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,290 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Extracts data about the database for use in adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import CodeToFeatures
|
||||
import EndpointScoring
|
||||
|
||||
/**
|
||||
* Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
|
||||
*
|
||||
* This is a single string containing a space-separated list of tokens.
|
||||
*/
|
||||
private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
|
||||
// Features for endpoints that are contained within a function.
|
||||
exists(DatabaseFeatures::Entity entity | entity = getRepresentativeEntityForEndpoint(endpoint) |
|
||||
// The name of the function that encloses the endpoint.
|
||||
featureName = "enclosingFunctionName" and result = entity.getName()
|
||||
or
|
||||
// A feature containing natural language tokens from the function that encloses the endpoint in
|
||||
// the order that they appear in the source code.
|
||||
featureName = "enclosingFunctionBody" and
|
||||
result = unique(string x | x = FunctionBodies::getBodyTokenFeatureForEntity(entity))
|
||||
)
|
||||
or
|
||||
exists(getACallBasedTokenFeatureComponent(endpoint, _, featureName)) and
|
||||
result =
|
||||
concat(DataFlow::CallNode call, string component |
|
||||
component = getACallBasedTokenFeatureComponent(endpoint, call, featureName)
|
||||
|
|
||||
component, " "
|
||||
)
|
||||
or
|
||||
// The access path of the function being called, both with and without structural info, if the
|
||||
// function being called originates from an external API. For example, the endpoint here:
|
||||
//
|
||||
// ```js
|
||||
// const mongoose = require('mongoose'),
|
||||
// User = mongoose.model('User', null);
|
||||
// User.findOne(ENDPOINT);
|
||||
// ```
|
||||
//
|
||||
// would have a callee access path with structural info of
|
||||
// `mongoose member model instanceorreturn member findOne instanceorreturn`, and a callee access
|
||||
// path without structural info of `mongoose model findOne`.
|
||||
//
|
||||
// These features indicate that the callee comes from (reading the access path backwards) an
|
||||
// instance of the `findOne` member of an instance of the `model` member of the `mongoose`
|
||||
// external library.
|
||||
exists(AccessPaths::Boolean includeStructuralInfo |
|
||||
featureName =
|
||||
"calleeAccessPath" +
|
||||
any(string x | if includeStructuralInfo = true then x = "WithStructuralInfo" else x = "") and
|
||||
result =
|
||||
concat(API::Node node, string accessPath |
|
||||
node.getInducingNode().(DataFlow::CallNode).getAnArgument() = endpoint and
|
||||
accessPath = AccessPaths::getAccessPath(node, includeStructuralInfo)
|
||||
|
|
||||
accessPath, " "
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a value of the function-call-related token-based feature named `featureName` associated
|
||||
* with the function call `call` and the endpoint `endpoint`.
|
||||
*
|
||||
* This may in general report multiple strings, each containing a space-separated list of tokens.
|
||||
*
|
||||
* **Technical details:** This predicate can have multiple values per endpoint and feature name. As a
|
||||
* result, the results from this predicate must be concatenated together. However concatenating
|
||||
* other features like the function body tokens is expensive, so we separate out this predicate
|
||||
* from others like `FunctionBodies::getBodyTokenFeatureForEntity` to avoid having to perform this
|
||||
* concatenation operation on other features like the function body tokens.
|
||||
*/
|
||||
private string getACallBasedTokenFeatureComponent(
|
||||
DataFlow::Node endpoint, DataFlow::CallNode call, string featureName
|
||||
) {
|
||||
// Features for endpoints that are an argument to a function call.
|
||||
endpoint = call.getAnArgument() and
|
||||
(
|
||||
// The name of the function being called, e.g. in a call `Artist.findOne(...)`, this is `findOne`.
|
||||
featureName = "calleeName" and result = call.getCalleeName()
|
||||
or
|
||||
// The name of the receiver of the call, e.g. in a call `Artist.findOne(...)`, this is `Artist`.
|
||||
featureName = "receiverName" and result = call.getReceiver().asExpr().(VarRef).getName()
|
||||
or
|
||||
// The argument index of the endpoint, e.g. in `f(a, endpoint, b)`, this is 1.
|
||||
featureName = "argumentIndex" and
|
||||
result = any(int argIndex | call.getArgument(argIndex) = endpoint).toString()
|
||||
or
|
||||
// The name of the API that the function being called originates from, if the function being
|
||||
// called originates from an external API. For example, the endpoint here:
|
||||
//
|
||||
// ```js
|
||||
// const mongoose = require('mongoose'),
|
||||
// User = mongoose.model('User', null);
|
||||
// User.findOne(ENDPOINT);
|
||||
// ```
|
||||
//
|
||||
// would have a callee API name of `mongoose`.
|
||||
featureName = "calleeApiName" and
|
||||
result = getAnApiName(call)
|
||||
)
|
||||
}
|
||||
|
||||
/** This module provides functionality for getting the function body feature associated with a particular entity. */
|
||||
module FunctionBodies {
|
||||
/** Holds if `node` is an AST node within the entity `entity` and `token` is a node attribute associated with `node`. */
|
||||
private predicate bodyTokens(
|
||||
DatabaseFeatures::Entity entity, DatabaseFeatures::AstNode node, string token
|
||||
) {
|
||||
DatabaseFeatures::astNodes(entity, _, _, node, _) and
|
||||
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the body token feature for the specified entity.
|
||||
*
|
||||
* This is a string containing natural language tokens in the order that they appear in the source code for the entity.
|
||||
*/
|
||||
string getBodyTokenFeatureForEntity(DatabaseFeatures::Entity entity) {
|
||||
// If a function has more than 256 body subtokens, then featurize it as absent. This
|
||||
// approximates the behavior of the classifer on non-generic body features where large body
|
||||
// features are replaced by the absent token.
|
||||
if count(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) > 256
|
||||
then result = ""
|
||||
else
|
||||
result =
|
||||
concat(int i, string rankedToken |
|
||||
rankedToken =
|
||||
rank[i](DatabaseFeatures::AstNode node, string token, Location l |
|
||||
bodyTokens(entity, node, token) and l = node.getLocation()
|
||||
|
|
||||
token
|
||||
order by
|
||||
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
|
||||
l.getEndColumn(), token
|
||||
)
|
||||
|
|
||||
rankedToken, " " order by i
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a name of the API that a node originates from, if the node originates from an API.
|
||||
*
|
||||
* This predicate may have multiple results if the node corresponds to multiple nodes in the API graph forest.
|
||||
*/
|
||||
pragma[inline]
|
||||
private string getAnApiName(DataFlow::Node node) {
|
||||
API::moduleImport(result).getASuccessor*().getInducingNode() = node
|
||||
}
|
||||
|
||||
/**
|
||||
* This module provides functionality for getting a representation of the access path of nodes
|
||||
* within the program.
|
||||
*
|
||||
* For example, it gives the `User.find` callee here:
|
||||
*
|
||||
* ```js
|
||||
* const mongoose = require('mongoose'),
|
||||
* User = mongoose.model('User', null);
|
||||
* User.find({ 'isAdmin': true })
|
||||
* ```
|
||||
* the access path `mongoose member model instanceorreturn member find instanceorreturn`.
|
||||
*
|
||||
* This access path is based on the simplified access path that the untrusted data flowing to
|
||||
* external API query associates to each of its sinks, with modifications to optionally include
|
||||
* explicit structural information and to improve how well the path tokenizes.
|
||||
*/
|
||||
private module AccessPaths {
|
||||
bindingset[str]
|
||||
private predicate isNumericString(string str) { exists(str.toInt()) }
|
||||
|
||||
/**
|
||||
* Gets a parameter of `base` with name `name`, or a property named `name` of a destructuring parameter.
|
||||
*/
|
||||
private API::Node getNamedParameter(API::Node base, string name) {
|
||||
exists(API::Node param |
|
||||
param = base.getAParameter() and
|
||||
not param = base.getReceiver()
|
||||
|
|
||||
result = param and
|
||||
name = param.getAnImmediateUse().asExpr().(Parameter).getName()
|
||||
or
|
||||
param.getAnImmediateUse().asExpr() instanceof DestructuringPattern and
|
||||
result = param.getMember(name)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility class that is equivalent to `boolean` but does not require type joining.
|
||||
*/
|
||||
class Boolean extends boolean {
|
||||
Boolean() { this = true or this = false }
|
||||
}
|
||||
|
||||
/** Get the access path for the node. This includes structural information like `member`, `param`, and `functionalarg` if `includeStructuralInfo` is true. */
|
||||
string getAccessPath(API::Node node, Boolean includeStructuralInfo) {
|
||||
node = API::moduleImport(result)
|
||||
or
|
||||
exists(API::Node base, string baseName |
|
||||
base.getDepth() < node.getDepth() and baseName = getAccessPath(base, includeStructuralInfo)
|
||||
|
|
||||
// e.g. `new X`, `X()`
|
||||
node = [base.getInstance(), base.getReturn()] and
|
||||
if includeStructuralInfo = true
|
||||
then result = baseName + " instanceorreturn"
|
||||
else result = baseName
|
||||
or
|
||||
// e.g. `x.y`, `x[y]`, `const { y } = x`, where `y` is non-numeric and is known at analysis
|
||||
// time.
|
||||
exists(string member |
|
||||
node = base.getMember(member) and
|
||||
not node = base.getUnknownMember() and
|
||||
not isNumericString(member) and
|
||||
not (member = "default" and base = API::moduleImport(_)) and
|
||||
not member = "then" // use the 'promised' edges for .then callbacks
|
||||
|
|
||||
if includeStructuralInfo = true
|
||||
then result = baseName + " member " + member
|
||||
else result = baseName + " " + member
|
||||
)
|
||||
or
|
||||
// e.g. `x.y`, `x[y]`, `const { y } = x`, where `y` is numeric or not known at analysis time.
|
||||
(
|
||||
node = base.getUnknownMember() or
|
||||
node = base.getMember(any(string s | isNumericString(s)))
|
||||
) and
|
||||
if includeStructuralInfo = true then result = baseName + " member" else result = baseName
|
||||
or
|
||||
// e.g. `x.then(y => ...)`
|
||||
node = base.getPromised() and
|
||||
result = baseName
|
||||
or
|
||||
// e.g. `x.y((a, b) => ...)`
|
||||
// Name callback parameters after their name in the source code.
|
||||
// For example, the `res` parameter in `express.get('/foo', (req, res) => {...})` will be
|
||||
// named `express member get functionalarg param res`.
|
||||
exists(string paramName |
|
||||
node = getNamedParameter(base.getAParameter(), paramName) and
|
||||
(
|
||||
if includeStructuralInfo = true
|
||||
then result = baseName + " functionalarg param " + paramName
|
||||
else result = baseName + " " + paramName
|
||||
)
|
||||
or
|
||||
exists(string callbackName, string index |
|
||||
node =
|
||||
getNamedParameter(base.getASuccessor("param " + index).getMember(callbackName),
|
||||
paramName) and
|
||||
index != "-1" and // ignore receiver
|
||||
if includeStructuralInfo = true
|
||||
then
|
||||
result =
|
||||
baseName + " functionalarg " + index + " " + callbackName + " param " + paramName
|
||||
else result = baseName + " " + index + " " + callbackName + " " + paramName
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Get a name of a supported generic token-based feature. */
|
||||
private string getASupportedFeatureName() {
|
||||
result =
|
||||
[
|
||||
"enclosingFunctionName", "calleeName", "receiverName", "argumentIndex", "calleeApiName",
|
||||
"calleeAccessPath", "calleeAccessPathWithStructuralInfo", "enclosingFunctionBody"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic token-based features for ATM.
|
||||
*
|
||||
* This predicate holds if the generic token-based feature named `featureName` has the value
|
||||
* `featureValue` for the endpoint `endpoint`.
|
||||
*/
|
||||
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
|
||||
featureName = getASupportedFeatureName() and
|
||||
(
|
||||
featureValue = unique(string x | x = getTokenFeature(endpoint, featureName))
|
||||
or
|
||||
not exists(unique(string x | x = getTokenFeature(endpoint, featureName))) and featureValue = ""
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,223 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Provides an implementation of scoring alerts for use in adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
private import javascript
|
||||
import BaseScoring
|
||||
import CodeToFeatures
|
||||
import EndpointFeatures as EndpointFeatures
|
||||
import EndpointTypes
|
||||
|
||||
private string getACompatibleModelChecksum() {
|
||||
adaptiveThreatModelingModels(result, "javascript", _, "atm-endpoint-scoring")
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum number of AST nodes an entity containing an endpoint should have before we should
|
||||
* choose a smaller entity to represent the endpoint.
|
||||
*
|
||||
* This is intended to represent a balance in terms of the amount of context we provide to the
|
||||
* model: we don't want the function to be too small, because then it doesn't contain very much
|
||||
* context and miss useful information, but also we don't want it to be too large, because then
|
||||
* there's likely to be a lot of irrelevant or very loosely related context.
|
||||
*/
|
||||
private int getMaxNumAstNodes() { result = 1024 }
|
||||
|
||||
/**
|
||||
* Returns the number of AST nodes contained within the specified entity.
|
||||
*/
|
||||
private int getNumAstNodesInEntity(DatabaseFeatures::Entity entity) {
|
||||
// Restrict the values `entity` can take on
|
||||
entity = EndpointToEntity::getAnEntityForEndpoint(_) and
|
||||
result =
|
||||
count(DatabaseFeatures::AstNode astNode | DatabaseFeatures::astNodes(entity, _, _, astNode, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a single entity to use as the representative entity for the endpoint.
|
||||
*
|
||||
* We try to use the largest entity containing the endpoint that's below the AST node limit defined
|
||||
* in `getMaxNumAstNodes`. In the event of a tie, we use the entity that appears first within the
|
||||
* source archive.
|
||||
*
|
||||
* If no entities are smaller than the AST node limit, then we use the smallest entity containing
|
||||
* the endpoint.
|
||||
*/
|
||||
DatabaseFeatures::Entity getRepresentativeEntityForEndpoint(DataFlow::Node endpoint) {
|
||||
// Check whether there's an entity containing the endpoint that's smaller than the AST node limit.
|
||||
if
|
||||
getNumAstNodesInEntity(EndpointToEntity::getAnEntityForEndpoint(endpoint)) <=
|
||||
getMaxNumAstNodes()
|
||||
then
|
||||
// Use the largest entity smaller than the AST node limit, resolving ties using the entity that
|
||||
// appears first in the source archive.
|
||||
result =
|
||||
rank[1](DatabaseFeatures::Entity entity, int numAstNodes, Location l |
|
||||
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
|
||||
numAstNodes = getNumAstNodesInEntity(entity) and
|
||||
numAstNodes <= getMaxNumAstNodes() and
|
||||
l = entity.getLocation()
|
||||
|
|
||||
entity
|
||||
order by
|
||||
numAstNodes desc, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
|
||||
)
|
||||
else
|
||||
// Use the smallest entity, resolving ties using the entity that
|
||||
// appears first in the source archive.
|
||||
result =
|
||||
rank[1](DatabaseFeatures::Entity entity, int numAstNodes, Location l |
|
||||
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
|
||||
numAstNodes = getNumAstNodesInEntity(entity) and
|
||||
l = entity.getLocation()
|
||||
|
|
||||
entity
|
||||
order by
|
||||
numAstNodes, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
|
||||
)
|
||||
}
|
||||
|
||||
module ModelScoring {
|
||||
predicate endpoints(DataFlow::Node endpoint) {
|
||||
getCfg().isEffectiveSource(endpoint) or
|
||||
getCfg().isEffectiveSink(endpoint)
|
||||
}
|
||||
|
||||
private int requestedEndpointTypes() { result = any(EndpointType type).getEncoding() }
|
||||
|
||||
private predicate relevantTokenFeatures(
|
||||
DataFlow::Node endpoint, string featureName, string featureValue
|
||||
) {
|
||||
endpoints(endpoint) and
|
||||
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
|
||||
}
|
||||
|
||||
predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) =
|
||||
scoreEndpoints(endpoints/1, requestedEndpointTypes/0, relevantTokenFeatures/3,
|
||||
getACompatibleModelChecksum/0)(endpoint, encodedEndpointType, score)
|
||||
}
|
||||
|
||||
/**
|
||||
* Return ATM's confidence that `source` is a source for the given security query. This will be a
|
||||
* number between 0.0 and 1.0.
|
||||
*/
|
||||
private float getScoreForSource(DataFlow::Node source) {
|
||||
if getCfg().isKnownSource(source)
|
||||
then result = 1.0
|
||||
else (
|
||||
// This restriction on `source` has no semantic effect but improves performance.
|
||||
getCfg().isEffectiveSource(source) and
|
||||
ModelScoring::endpointScores(source, getCfg().getASourceEndpointType().getEncoding(), result)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Return ATM's confidence that `sink` is a sink for the given security query. This will be a
|
||||
* number between 0.0 and 1.0.
|
||||
*/
|
||||
private float getScoreForSink(DataFlow::Node sink) {
|
||||
if getCfg().isKnownSink(sink)
|
||||
then result = 1.0
|
||||
else
|
||||
if getCfg().isEffectiveSinkWithOverridingScore(sink, result, _)
|
||||
then any()
|
||||
else (
|
||||
// This restriction on `sink` has no semantic effect but improves performance.
|
||||
getCfg().isEffectiveSink(sink) and
|
||||
ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(), result)
|
||||
)
|
||||
}
|
||||
|
||||
class EndpointScoringResults extends ScoringResults {
|
||||
EndpointScoringResults() {
|
||||
this = "EndpointScoringResults" and exists(getACompatibleModelChecksum())
|
||||
}
|
||||
|
||||
/**
|
||||
* Get ATM's confidence that a path between `source` and `sink` represents a security
|
||||
* vulnerability. This will be a number between 0.0 and 1.0.
|
||||
*/
|
||||
override float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
result = getScoreForSource(source) * getScoreForSink(sink)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a string representing why ATM included the given source in the dataflow analysis.
|
||||
*
|
||||
* In general, there may be multiple reasons why ATM included the given source, in which case
|
||||
* this predicate should have multiple results.
|
||||
*/
|
||||
pragma[inline]
|
||||
override string getASourceOrigin(DataFlow::Node source) {
|
||||
result = "known" and getCfg().isKnownSource(source)
|
||||
or
|
||||
result = "predicted" and getCfg().isEffectiveSource(source)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a string representing why ATM included the given sink in the dataflow analysis.
|
||||
*
|
||||
* In general, there may be multiple reasons why ATM included the given sink, in which case
|
||||
* this predicate should have multiple results.
|
||||
*/
|
||||
pragma[inline]
|
||||
override string getASinkOrigin(DataFlow::Node sink) {
|
||||
result = "known" and getCfg().isKnownSink(sink)
|
||||
or
|
||||
not getCfg().isKnownSink(sink) and
|
||||
getCfg().isEffectiveSinkWithOverridingScore(sink, _, result)
|
||||
or
|
||||
not getCfg().isKnownSink(sink) and
|
||||
not getCfg().isEffectiveSinkWithOverridingScore(sink, _, _) and
|
||||
result =
|
||||
"predicted (scores: " +
|
||||
concat(EndpointType type, float score |
|
||||
ModelScoring::endpointScores(sink, type.getEncoding(), score)
|
||||
|
|
||||
type.getDescription() + "=" + score.toString(), ", " order by type.getEncoding()
|
||||
) + ")" and
|
||||
getCfg().isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
override predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
|
||||
if getCfg().isKnownSink(sink)
|
||||
then any()
|
||||
else
|
||||
if getCfg().isEffectiveSinkWithOverridingScore(sink, _, _)
|
||||
then
|
||||
exists(float score |
|
||||
getCfg().isEffectiveSinkWithOverridingScore(sink, score, _) and
|
||||
score >= getCfg().getScoreCutoff()
|
||||
)
|
||||
else (
|
||||
// This restriction on `sink` has no semantic effect but improves performance.
|
||||
getCfg().isEffectiveSink(sink) and
|
||||
exists(float sinkScore |
|
||||
ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(),
|
||||
sinkScore) and
|
||||
// Include the endpoint if (a) the query endpoint type scores higher than all other
|
||||
// endpoint types, or (b) the query endpoint type scores at least
|
||||
// 0.5 - (getCfg().getScoreCutoff() / 2).
|
||||
sinkScore >=
|
||||
[
|
||||
max(float s | ModelScoring::endpointScores(sink, _, s)),
|
||||
0.5 - getCfg().getScoreCutoff() / 2
|
||||
]
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
module Debugging {
|
||||
query predicate hopInputEndpoints = ModelScoring::endpoints/1;
|
||||
|
||||
query predicate endpointScores = ModelScoring::endpointScores/3;
|
||||
|
||||
query predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
|
||||
any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink) and
|
||||
any(DataFlow::Configuration cfg).hasFlow(source, sink)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines the set of classes that endpoint scoring models can predict. Endpoint scoring models must
|
||||
* only predict classes defined within this file. This file is the source of truth for the integer
|
||||
* representation of each of these classes.
|
||||
*/
|
||||
newtype TEndpointType =
|
||||
TNotASinkType() or
|
||||
TXssSinkType() or
|
||||
TNosqlInjectionSinkType() or
|
||||
TSqlInjectionSinkType() or
|
||||
TTaintedPathSinkType()
|
||||
|
||||
/** A class that can be predicted by endpoint scoring models. */
|
||||
abstract class EndpointType extends TEndpointType {
|
||||
abstract string getDescription();
|
||||
|
||||
abstract int getEncoding();
|
||||
|
||||
string toString() { result = getDescription() }
|
||||
}
|
||||
|
||||
/** The `NotASink` class that can be predicted by endpoint scoring models. */
|
||||
class NotASinkType extends EndpointType, TNotASinkType {
|
||||
override string getDescription() { result = "NotASink" }
|
||||
|
||||
override int getEncoding() { result = 0 }
|
||||
}
|
||||
|
||||
/** The `XssSink` class that can be predicted by endpoint scoring models. */
|
||||
class XssSinkType extends EndpointType, TXssSinkType {
|
||||
override string getDescription() { result = "XssSink" }
|
||||
|
||||
override int getEncoding() { result = 1 }
|
||||
}
|
||||
|
||||
/** The `NosqlInjectionSink` class that can be predicted by endpoint scoring models. */
|
||||
class NosqlInjectionSinkType extends EndpointType, TNosqlInjectionSinkType {
|
||||
override string getDescription() { result = "NosqlInjectionSink" }
|
||||
|
||||
override int getEncoding() { result = 2 }
|
||||
}
|
||||
|
||||
/** The `SqlInjectionSink` class that can be predicted by endpoint scoring models. */
|
||||
class SqlInjectionSinkType extends EndpointType, TSqlInjectionSinkType {
|
||||
override string getDescription() { result = "SqlInjectionSink" }
|
||||
|
||||
override int getEncoding() { result = 3 }
|
||||
}
|
||||
|
||||
/** The `TaintedPathSink` class that can be predicted by endpoint scoring models. */
|
||||
class TaintedPathSinkType extends EndpointType, TTaintedPathSinkType {
|
||||
override string getDescription() { result = "TaintedPathSink" }
|
||||
|
||||
override int getEncoding() { result = 4 }
|
||||
}
|
||||
@@ -0,0 +1,196 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines a set of reasons why a particular endpoint was filtered out. This set of reasons
|
||||
* contains both reasons why an endpoint could be `NotASink` and reasons why an endpoint could be
|
||||
* `LikelyNotASink`. The `NotASinkReason`s defined here are exhaustive, but the
|
||||
* `LikelyNotASinkReason`s are not exhaustive.
|
||||
*/
|
||||
newtype TFilteringReason =
|
||||
TIsArgumentToBuiltinFunctionReason() or
|
||||
TLodashUnderscoreArgumentReason() or
|
||||
TClientRequestReason() or
|
||||
TPromiseDefinitionReason() or
|
||||
TCryptographicKeyReason() or
|
||||
TCryptographicOperationFlowReason() or
|
||||
TLoggerMethodReason() or
|
||||
TTimeoutReason() or
|
||||
TReceiverStorageReason() or
|
||||
TStringStartsWithReason() or
|
||||
TStringEndsWithReason() or
|
||||
TStringRegExpTestReason() or
|
||||
TEventRegistrationReason() or
|
||||
TEventDispatchReason() or
|
||||
TMembershipCandidateTestReason() or
|
||||
TFileSystemAccessReason() or
|
||||
TDatabaseAccessReason() or
|
||||
TDOMReason() or
|
||||
TNextFunctionCallReason() or
|
||||
TArgumentToArrayReason() or
|
||||
TArgumentToBuiltinGlobalVarRefReason() or
|
||||
TConstantReceiverReason() or
|
||||
TBuiltinCallNameReason()
|
||||
|
||||
/** A reason why a particular endpoint was filtered out by the endpoint filters. */
|
||||
abstract class FilteringReason extends TFilteringReason {
|
||||
abstract string getDescription();
|
||||
|
||||
abstract int getEncoding();
|
||||
|
||||
string toString() { result = getDescription() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A reason why a particular endpoint might be considered to be `NotASink`.
|
||||
*
|
||||
* An endpoint is `NotASink` if it has at least one `NotASinkReason`, it does not have any
|
||||
* `LikelyNotASinkReason`s, and it is not a known sink.
|
||||
*/
|
||||
abstract class NotASinkReason extends FilteringReason { }
|
||||
|
||||
/**
|
||||
* A reason why a particular endpoint might be considered to be `LikelyNotASink`.
|
||||
*
|
||||
* An endpoint is `LikelyNotASink` if it has at least one `LikelyNotASinkReason` and it is not a
|
||||
* known sink.
|
||||
*/
|
||||
abstract class LikelyNotASinkReason extends FilteringReason { }
|
||||
|
||||
class IsArgumentToBuiltinFunctionReason extends NotASinkReason, TIsArgumentToBuiltinFunctionReason {
|
||||
override string getDescription() { result = "IsArgumentToBuiltinFunction" }
|
||||
|
||||
override int getEncoding() { result = 5 }
|
||||
}
|
||||
|
||||
class LodashUnderscoreArgumentReason extends NotASinkReason, TLodashUnderscoreArgumentReason {
|
||||
override string getDescription() { result = "LodashUnderscoreArgument" }
|
||||
|
||||
override int getEncoding() { result = 6 }
|
||||
}
|
||||
|
||||
class ClientRequestReason extends NotASinkReason, TClientRequestReason {
|
||||
override string getDescription() { result = "ClientRequest" }
|
||||
|
||||
override int getEncoding() { result = 7 }
|
||||
}
|
||||
|
||||
class PromiseDefinitionReason extends NotASinkReason, TPromiseDefinitionReason {
|
||||
override string getDescription() { result = "PromiseDefinition" }
|
||||
|
||||
override int getEncoding() { result = 8 }
|
||||
}
|
||||
|
||||
class CryptographicKeyReason extends NotASinkReason, TCryptographicKeyReason {
|
||||
override string getDescription() { result = "CryptographicKey" }
|
||||
|
||||
override int getEncoding() { result = 9 }
|
||||
}
|
||||
|
||||
class CryptographicOperationFlowReason extends NotASinkReason, TCryptographicOperationFlowReason {
|
||||
override string getDescription() { result = "CryptographicOperationFlow" }
|
||||
|
||||
override int getEncoding() { result = 10 }
|
||||
}
|
||||
|
||||
class LoggerMethodReason extends NotASinkReason, TLoggerMethodReason {
|
||||
override string getDescription() { result = "LoggerMethod" }
|
||||
|
||||
override int getEncoding() { result = 11 }
|
||||
}
|
||||
|
||||
class TimeoutReason extends NotASinkReason, TTimeoutReason {
|
||||
override string getDescription() { result = "Timeout" }
|
||||
|
||||
override int getEncoding() { result = 12 }
|
||||
}
|
||||
|
||||
class ReceiverStorageReason extends NotASinkReason, TReceiverStorageReason {
|
||||
override string getDescription() { result = "ReceiverStorage" }
|
||||
|
||||
override int getEncoding() { result = 13 }
|
||||
}
|
||||
|
||||
class StringStartsWithReason extends NotASinkReason, TStringStartsWithReason {
|
||||
override string getDescription() { result = "StringStartsWith" }
|
||||
|
||||
override int getEncoding() { result = 14 }
|
||||
}
|
||||
|
||||
class StringEndsWithReason extends NotASinkReason, TStringEndsWithReason {
|
||||
override string getDescription() { result = "StringEndsWith" }
|
||||
|
||||
override int getEncoding() { result = 15 }
|
||||
}
|
||||
|
||||
class StringRegExpTestReason extends NotASinkReason, TStringRegExpTestReason {
|
||||
override string getDescription() { result = "StringRegExpTest" }
|
||||
|
||||
override int getEncoding() { result = 16 }
|
||||
}
|
||||
|
||||
class EventRegistrationReason extends NotASinkReason, TEventRegistrationReason {
|
||||
override string getDescription() { result = "EventRegistration" }
|
||||
|
||||
override int getEncoding() { result = 17 }
|
||||
}
|
||||
|
||||
class EventDispatchReason extends NotASinkReason, TEventDispatchReason {
|
||||
override string getDescription() { result = "EventDispatch" }
|
||||
|
||||
override int getEncoding() { result = 18 }
|
||||
}
|
||||
|
||||
class MembershipCandidateTestReason extends NotASinkReason, TMembershipCandidateTestReason {
|
||||
override string getDescription() { result = "MembershipCandidateTest" }
|
||||
|
||||
override int getEncoding() { result = 19 }
|
||||
}
|
||||
|
||||
class FileSystemAccessReason extends NotASinkReason, TFileSystemAccessReason {
|
||||
override string getDescription() { result = "FileSystemAccess" }
|
||||
|
||||
override int getEncoding() { result = 20 }
|
||||
}
|
||||
|
||||
class DatabaseAccessReason extends NotASinkReason, TDatabaseAccessReason {
|
||||
override string getDescription() { result = "DatabaseAccess" }
|
||||
|
||||
override int getEncoding() { result = 21 }
|
||||
}
|
||||
|
||||
class DOMReason extends NotASinkReason, TDOMReason {
|
||||
override string getDescription() { result = "DOM" }
|
||||
|
||||
override int getEncoding() { result = 22 }
|
||||
}
|
||||
|
||||
class NextFunctionCallReason extends NotASinkReason, TNextFunctionCallReason {
|
||||
override string getDescription() { result = "NextFunctionCall" }
|
||||
|
||||
override int getEncoding() { result = 23 }
|
||||
}
|
||||
|
||||
class ArgumentToArrayReason extends LikelyNotASinkReason, TArgumentToArrayReason {
|
||||
override string getDescription() { result = "ArgumentToArray" }
|
||||
|
||||
override int getEncoding() { result = 24 }
|
||||
}
|
||||
|
||||
class ArgumentToBuiltinGlobalVarRefReason extends LikelyNotASinkReason,
|
||||
TArgumentToBuiltinGlobalVarRefReason {
|
||||
override string getDescription() { result = "ArgumentToBuiltinGlobalVarRef" }
|
||||
|
||||
override int getEncoding() { result = 25 }
|
||||
}
|
||||
|
||||
class ConstantReceiverReason extends NotASinkReason, TConstantReceiverReason {
|
||||
override string getDescription() { result = "ConstantReceiver" }
|
||||
|
||||
override int getEncoding() { result = 26 }
|
||||
}
|
||||
|
||||
class BuiltinCallNameReason extends NotASinkReason, TBuiltinCallNameReason {
|
||||
override string getDescription() { result = "BuiltinCallName" }
|
||||
|
||||
override int getEncoding() { result = 27 }
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines shared code used by the NoSQL injection boosted query.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
private import semmle.javascript.heuristics.SyntacticHeuristics
|
||||
private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
|
||||
private import semmle.javascript.security.TaintedObject
|
||||
import AdaptiveThreatModeling
|
||||
private import CoreKnowledge as CoreKnowledge
|
||||
private import StandardEndpointFilters as StandardEndpointFilters
|
||||
|
||||
module SinkEndpointFilter {
|
||||
/**
|
||||
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
|
||||
*
|
||||
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
|
||||
* effective sink.
|
||||
*/
|
||||
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
|
||||
(
|
||||
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
|
||||
or
|
||||
// Require NoSQL injection sink candidates to be direct arguments to external library calls.
|
||||
//
|
||||
// The standard endpoint filters allow sink candidates which are within object literals or
|
||||
// array literals, for example `req.sendFile(_, { path: ENDPOINT })`.
|
||||
//
|
||||
// However, the NoSQL injection query deals differently with these types of sinks compared to
|
||||
// other security queries. Other security queries such as SQL injection tend to treat
|
||||
// `ENDPOINT` as the ground truth sink, but the NoSQL injection query instead treats
|
||||
// `{ path: ENDPOINT }` as the ground truth sink and defines an additional flow step to ensure
|
||||
// data flows from `ENDPOINT` to the ground truth sink `{ path: ENDPOINT }`.
|
||||
//
|
||||
// Therefore for the NoSQL injection boosted query, we must explicitly ignore sink candidates
|
||||
// within object literals or array literals, to avoid having multiple alerts for the same
|
||||
// security vulnerability (one FP where the sink is `ENDPOINT` and one TP where the sink is
|
||||
// `{ path: ENDPOINT }`).
|
||||
//
|
||||
// We use the same reason as in the standard endpoint filters to avoid duplicate reasons for
|
||||
// endpoints that are neither direct nor indirect arguments to a likely external library call.
|
||||
not sinkCandidate = StandardEndpointFilters::getALikelyExternalLibraryCall().getAnArgument() and
|
||||
result = "not an argument to a likely external library call"
|
||||
or
|
||||
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
|
||||
// additional databases accesses that aren't modeled yet
|
||||
call.(DataFlow::MethodCallNode).getMethodName() =
|
||||
["create", "createCollection", "createIndexes"] and
|
||||
result = "matches database access call heuristic"
|
||||
or
|
||||
// Remove modeled sinks
|
||||
CoreKnowledge::isArgumentToKnownLibrarySinkFunction(sinkCandidate) and
|
||||
result = "modeled sink"
|
||||
or
|
||||
// Remove common kinds of unlikely sinks
|
||||
CoreKnowledge::isKnownStepSrc(sinkCandidate) and
|
||||
result = "predecessor in a modeled flow step"
|
||||
or
|
||||
// Remove modeled database calls. Arguments to modeled calls are very likely to be modeled
|
||||
// as sinks if they are true positives. Therefore arguments that are not modeled as sinks
|
||||
// are unlikely to be true positives.
|
||||
call instanceof DatabaseAccess and
|
||||
result = "modeled database access"
|
||||
or
|
||||
// Remove calls to APIs that aren't relevant to NoSQL injection
|
||||
call.getReceiver().asExpr() instanceof HTTP::RequestExpr and
|
||||
result = "receiver is a HTTP request expression"
|
||||
or
|
||||
call.getReceiver().asExpr() instanceof HTTP::ResponseExpr and
|
||||
result = "receiver is a HTTP response expression"
|
||||
)
|
||||
) and
|
||||
not (
|
||||
// Explicitly allow the following heuristic sinks.
|
||||
//
|
||||
// These are copied from the `HeuristicNosqlInjectionSink` class defined within
|
||||
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
|
||||
// We can't reuse the class because importing that file would cause us to treat these
|
||||
// heuristic sinks as known sinks.
|
||||
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(nosql|query)") or
|
||||
isArgTo(sinkCandidate, "(?i)(query)")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class NosqlInjectionATMConfig extends ATMConfig {
|
||||
NosqlInjectionATMConfig() { this = "NosqlInjectionATMConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) {
|
||||
source instanceof NosqlInjection::Source or TaintedObject::isSource(source, _)
|
||||
}
|
||||
|
||||
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof NosqlInjection::Sink }
|
||||
|
||||
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
|
||||
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
|
||||
}
|
||||
|
||||
override EndpointType getASinkEndpointType() { result instanceof NosqlInjectionSinkType }
|
||||
}
|
||||
|
||||
/** Holds if src -> trg is an additional flow step in the non-boosted NoSQL injection security query. */
|
||||
predicate isBaseAdditionalFlowStep(
|
||||
DataFlow::Node src, DataFlow::Node trg, DataFlow::FlowLabel inlbl, DataFlow::FlowLabel outlbl
|
||||
) {
|
||||
TaintedObject::step(src, trg, inlbl, outlbl)
|
||||
or
|
||||
// additional flow step to track taint through NoSQL query objects
|
||||
inlbl = TaintedObject::label() and
|
||||
outlbl = TaintedObject::label() and
|
||||
exists(NoSQL::Query query, DataFlow::SourceNode queryObj |
|
||||
queryObj.flowsToExpr(query) and
|
||||
queryObj.flowsTo(trg) and
|
||||
src = queryObj.getAPropertyWrite().getRhs()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* This predicate allows us to propagate data flow through property writes and array constructors
|
||||
* within a query object, enabling the security query to pick up NoSQL injection vulnerabilities
|
||||
* involving more complex queries.
|
||||
*/
|
||||
DataFlow::Node getASubexpressionWithinQuery(DataFlow::Node query) {
|
||||
exists(DataFlow::SourceNode receiver |
|
||||
receiver.flowsTo(getASubexpressionWithinQuery*(query.getALocalSource())) and
|
||||
result =
|
||||
[
|
||||
receiver.(DataFlow::SourceNode).getAPropertyWrite().getRhs(),
|
||||
receiver.(DataFlow::ArrayCreationNode).getAnElement()
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for reasoning about NoSQL injection vulnerabilities.
|
||||
*
|
||||
* This is largely a copy of the taint tracking configuration for the standard NoSQL injection
|
||||
* query, except additional ATM sinks have been added and the additional flow step has been
|
||||
* generalised to cover the sinks predicted by ATM.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "NosqlInjectionATM" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof NosqlInjection::Source }
|
||||
|
||||
override predicate isSource(DataFlow::Node source, DataFlow::FlowLabel label) {
|
||||
TaintedObject::isSource(source, label)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel label) {
|
||||
sink.(NosqlInjection::Sink).getAFlowLabel() = label
|
||||
or
|
||||
// Allow effective sinks to have any taint label
|
||||
any(NosqlInjectionATMConfig cfg).isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
super.isSanitizer(node) or
|
||||
node instanceof NosqlInjection::Sanitizer
|
||||
}
|
||||
|
||||
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode guard) {
|
||||
guard instanceof TaintedObject::SanitizerGuard
|
||||
}
|
||||
|
||||
override predicate isAdditionalFlowStep(
|
||||
DataFlow::Node src, DataFlow::Node trg, DataFlow::FlowLabel inlbl, DataFlow::FlowLabel outlbl
|
||||
) {
|
||||
// additional flow steps from the base (non-boosted) security query
|
||||
isBaseAdditionalFlowStep(src, trg, inlbl, outlbl)
|
||||
or
|
||||
// relaxed version of previous step to track taint through unmodeled NoSQL query objects
|
||||
any(NosqlInjectionATMConfig cfg).isEffectiveSink(trg) and
|
||||
src = getASubexpressionWithinQuery(trg)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines shared code used by the SQL injection boosted query.
|
||||
*/
|
||||
|
||||
import semmle.javascript.heuristics.SyntacticHeuristics
|
||||
import semmle.javascript.security.dataflow.SqlInjectionCustomizations
|
||||
import AdaptiveThreatModeling
|
||||
import CoreKnowledge as CoreKnowledge
|
||||
import StandardEndpointFilters as StandardEndpointFilters
|
||||
|
||||
/**
|
||||
* This module provides logic to filter candidate sinks to those which are likely SQL injection
|
||||
* sinks.
|
||||
*/
|
||||
module SinkEndpointFilter {
|
||||
private import javascript
|
||||
private import SQL
|
||||
|
||||
/**
|
||||
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
|
||||
*
|
||||
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
|
||||
* effective sink.
|
||||
*/
|
||||
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
|
||||
(
|
||||
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
|
||||
or
|
||||
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
|
||||
// prepared statements for SQL
|
||||
any(DataFlow::CallNode cn | cn.getCalleeName() = "prepare")
|
||||
.getAMethodCall("run")
|
||||
.getAnArgument() = sinkCandidate and
|
||||
result = "prepared SQL statement"
|
||||
or
|
||||
sinkCandidate instanceof DataFlow::ArrayCreationNode and
|
||||
result = "array creation"
|
||||
or
|
||||
// UI is unrelated to SQL
|
||||
call.getCalleeName().regexpMatch("(?i).*(render|html).*") and
|
||||
result = "HTML / rendering"
|
||||
)
|
||||
) and
|
||||
not (
|
||||
// Explicitly allow the following heuristic sinks.
|
||||
//
|
||||
// These are copied from the `HeuristicSqlInjectionSink` class defined within
|
||||
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
|
||||
// We can't reuse the class because importing that file would cause us to treat these
|
||||
// heuristic sinks as known sinks.
|
||||
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(sql|query)") or
|
||||
isArgTo(sinkCandidate, "(?i)(query)") or
|
||||
isConcatenatedWithString(sinkCandidate,
|
||||
"(?s).*(ALTER|COUNT|CREATE|DATABASE|DELETE|DISTINCT|DROP|FROM|GROUP|INSERT|INTO|LIMIT|ORDER|SELECT|TABLE|UPDATE|WHERE).*")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class SqlInjectionATMConfig extends ATMConfig {
|
||||
SqlInjectionATMConfig() { this = "SqlInjectionATMConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) { source instanceof SqlInjection::Source }
|
||||
|
||||
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof SqlInjection::Sink }
|
||||
|
||||
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
|
||||
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
|
||||
}
|
||||
|
||||
override EndpointType getASinkEndpointType() { result instanceof SqlInjectionSinkType }
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for reasoning about SQL injection vulnerabilities.
|
||||
*
|
||||
* This is largely a copy of the taint tracking configuration for the standard SQL injection
|
||||
* query, except additional sinks have been added using the sink endpoint filter.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "SqlInjectionATM" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof SqlInjection::Source }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
sink instanceof SqlInjection::Sink or any(SqlInjectionATMConfig cfg).isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
super.isSanitizer(node) or
|
||||
node instanceof SqlInjection::Sanitizer
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Provides classes and predicates that are useful for endpoint filters.
|
||||
*
|
||||
* The standard use of this library is to make use of `isPotentialEffectiveSink/1`
|
||||
*/
|
||||
|
||||
private import javascript
|
||||
private import semmle.javascript.filters.ClassifyFiles as ClassifyFiles
|
||||
private import semmle.javascript.heuristics.SyntacticHeuristics
|
||||
private import CoreKnowledge as CoreKnowledge
|
||||
|
||||
/** Provides a set of reasons why a given data flow node should be excluded as a sink candidate. */
|
||||
string getAReasonSinkExcluded(DataFlow::Node n) {
|
||||
not flowsToArgumentOfLikelyExternalLibraryCall(n) and
|
||||
result = "not an argument to a likely external library call"
|
||||
or
|
||||
isArgumentToModeledFunction(n) and result = "argument to modeled function"
|
||||
or
|
||||
isArgumentToSinklessLibrary(n) and result = "argument to sinkless library"
|
||||
or
|
||||
isSanitizer(n) and result = "sanitizer"
|
||||
or
|
||||
isPredicate(n) and result = "predicate"
|
||||
or
|
||||
isHash(n) and result = "hash"
|
||||
or
|
||||
isNumeric(n) and result = "numeric"
|
||||
or
|
||||
// Ignore candidate sinks within externs, generated, library, and test code
|
||||
exists(string category | category = ["externs", "generated", "library", "test"] |
|
||||
ClassifyFiles::classify(n.getFile(), category) and
|
||||
result = "in " + category + " file"
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is an argument to a function that has a manual model.
|
||||
*/
|
||||
predicate isArgumentToModeledFunction(DataFlow::Node n) {
|
||||
exists(DataFlow::InvokeNode invk, DataFlow::Node known |
|
||||
invk.getAnArgument() = n and invk.getAnArgument() = known and isSomeModeledArgument(known)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is an argument that has a manual model.
|
||||
*/
|
||||
predicate isSomeModeledArgument(DataFlow::Node n) {
|
||||
CoreKnowledge::isKnownLibrarySink(n) or
|
||||
CoreKnowledge::isKnownStepSrc(n) or
|
||||
CoreKnowledge::isOtherModeledArgument(n, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` appears to be a numeric value.
|
||||
*/
|
||||
predicate isNumeric(DataFlow::Node n) { isReadFrom(n, ".*index.*") }
|
||||
|
||||
/**
|
||||
* Holds if `n` is an argument to a library without sinks.
|
||||
*/
|
||||
predicate isArgumentToSinklessLibrary(DataFlow::Node n) {
|
||||
exists(DataFlow::InvokeNode invk, DataFlow::SourceNode commonSafeLibrary, string libraryName |
|
||||
libraryName = ["slugify", "striptags", "marked"]
|
||||
|
|
||||
commonSafeLibrary = DataFlow::moduleImport(libraryName) and
|
||||
invk = [commonSafeLibrary, commonSafeLibrary.getAPropertyRead()].getAnInvocation() and
|
||||
n = invk.getAnArgument()
|
||||
)
|
||||
}
|
||||
|
||||
predicate isSanitizer(DataFlow::Node n) {
|
||||
exists(DataFlow::CallNode call | n = call.getAnArgument() |
|
||||
call.getCalleeName().regexpMatch("(?i).*(escape|valid(ate)?|sanitize|purify).*")
|
||||
)
|
||||
}
|
||||
|
||||
predicate isPredicate(DataFlow::Node n) {
|
||||
exists(DataFlow::CallNode call | n = call.getAnArgument() |
|
||||
call.getCalleeName().regexpMatch("(equals|(|is|has|can)(_|[A-Z])).*")
|
||||
)
|
||||
}
|
||||
|
||||
predicate isHash(DataFlow::Node n) {
|
||||
exists(DataFlow::CallNode call | n = call.getAnArgument() |
|
||||
call.getCalleeName().regexpMatch("(?i)^(sha\\d*|md5|hash)$")
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the data flow node is a (possibly indirect) argument of a likely external library call.
|
||||
*
|
||||
* This includes direct arguments of likely external library calls as well as nested object
|
||||
* literals within those calls.
|
||||
*/
|
||||
predicate flowsToArgumentOfLikelyExternalLibraryCall(DataFlow::Node n) {
|
||||
n = getACallWithoutCallee().getAnArgument()
|
||||
or
|
||||
exists(DataFlow::SourceNode src | flowsToArgumentOfLikelyExternalLibraryCall(src) |
|
||||
n = src.getAPropertyWrite().getRhs()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::ArrayCreationNode arr | flowsToArgumentOfLikelyExternalLibraryCall(arr) |
|
||||
n = arr.getAnElement()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get calls which are likely to be to external non-built-in libraries.
|
||||
*/
|
||||
DataFlow::CallNode getALikelyExternalLibraryCall() { result = getACallWithoutCallee() }
|
||||
|
||||
/**
|
||||
* Gets a node that flows to callback-parameter `p`.
|
||||
*/
|
||||
private DataFlow::SourceNode getACallback(DataFlow::ParameterNode p, DataFlow::TypeBackTracker t) {
|
||||
t.start() and
|
||||
result = p and
|
||||
any(DataFlow::FunctionNode f).getLastParameter() = p and
|
||||
exists(p.getACall())
|
||||
or
|
||||
exists(DataFlow::TypeBackTracker t2 | result = getACallback(p, t2).backtrack(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Get calls for which we do not have the callee (i.e. the definition of the called function). This
|
||||
* acts as a heuristic for identifying calls to external library functions.
|
||||
*/
|
||||
private DataFlow::CallNode getACallWithoutCallee() {
|
||||
forall(Function callee | callee = result.getACallee() | callee.getTopLevel().isExterns()) and
|
||||
not exists(DataFlow::ParameterNode param, DataFlow::FunctionNode callback |
|
||||
param.flowsTo(result.getCalleeNode()) and
|
||||
callback = getACallback(param, DataFlow::TypeBackTracker::end())
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines shared code used by the path injection boosted query.
|
||||
*/
|
||||
|
||||
import semmle.javascript.heuristics.SyntacticHeuristics
|
||||
import semmle.javascript.security.dataflow.TaintedPathCustomizations
|
||||
import AdaptiveThreatModeling
|
||||
import CoreKnowledge as CoreKnowledge
|
||||
import StandardEndpointFilters as StandardEndpointFilters
|
||||
|
||||
/**
|
||||
* This module provides logic to filter candidate sinks to those which are likely path injection
|
||||
* sinks.
|
||||
*/
|
||||
module SinkEndpointFilter {
|
||||
private import javascript
|
||||
private import TaintedPath
|
||||
|
||||
/**
|
||||
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
|
||||
*
|
||||
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
|
||||
* effective sink.
|
||||
*/
|
||||
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
|
||||
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate) and
|
||||
not (
|
||||
// Explicitly allow the following heuristic sinks.
|
||||
//
|
||||
// These are mostly copied from the `HeuristicTaintedPathSink` class defined within
|
||||
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
|
||||
// We can't reuse the class because importing that file would cause us to treat these
|
||||
// heuristic sinks as known sinks.
|
||||
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(file|folder|dir|absolute)")
|
||||
or
|
||||
isArgTo(sinkCandidate, "(?i)(get|read)file")
|
||||
or
|
||||
exists(string pathPattern |
|
||||
// paths with at least two parts, and either a trailing or leading slash
|
||||
pathPattern = "(?i)([a-z0-9_.-]+/){2,}" or
|
||||
pathPattern = "(?i)(/[a-z0-9_.-]+){2,}"
|
||||
|
|
||||
isConcatenatedWithString(sinkCandidate, pathPattern)
|
||||
)
|
||||
or
|
||||
isConcatenatedWithStrings(".*/", sinkCandidate, "/.*")
|
||||
or
|
||||
// In addition to the names from `HeuristicTaintedPathSink` in the
|
||||
// `isAssignedToOrConcatenatedWith` predicate call above, we also allow the noisier "path"
|
||||
// name.
|
||||
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)path")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class TaintedPathATMConfig extends ATMConfig {
|
||||
TaintedPathATMConfig() { this = "TaintedPathATMConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) { source instanceof TaintedPath::Source }
|
||||
|
||||
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof TaintedPath::Sink }
|
||||
|
||||
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
|
||||
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
|
||||
}
|
||||
|
||||
override EndpointType getASinkEndpointType() { result instanceof TaintedPathSinkType }
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for reasoning about path injection vulnerabilities.
|
||||
*
|
||||
* This is largely a copy of the taint tracking configuration for the standard path injection
|
||||
* query, except additional ATM sinks have been added to the `isSink` predicate.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "TaintedPathATM" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof TaintedPath::Source }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel label) {
|
||||
label = sink.(TaintedPath::Sink).getAFlowLabel()
|
||||
or
|
||||
// Allow effective sinks to have any taint label
|
||||
any(TaintedPathATMConfig cfg).isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { node instanceof TaintedPath::Sanitizer }
|
||||
|
||||
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode node) {
|
||||
node instanceof BarrierGuardNodeAsSanitizerGuardNode
|
||||
}
|
||||
|
||||
override predicate isAdditionalFlowStep(
|
||||
DataFlow::Node src, DataFlow::Node dst, DataFlow::FlowLabel srclabel,
|
||||
DataFlow::FlowLabel dstlabel
|
||||
) {
|
||||
TaintedPath::isAdditionalTaintedPathFlowStep(src, dst, srclabel, dstlabel)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This class provides sanitizer guards for path injection.
|
||||
*
|
||||
* The standard library path injection query uses a data flow configuration, and therefore defines
|
||||
* barrier nodes. However we're using a taint tracking configuration for path injection to find new
|
||||
* kinds of less certain results. Since taint tracking configurations use sanitizer guards instead
|
||||
* of barrier guards, we port the barrier guards for the boosted query from the standard library to
|
||||
* sanitizer guards here.
|
||||
*/
|
||||
class BarrierGuardNodeAsSanitizerGuardNode extends TaintTracking::LabeledSanitizerGuardNode {
|
||||
BarrierGuardNodeAsSanitizerGuardNode() { this instanceof TaintedPath::BarrierGuardNode }
|
||||
|
||||
override predicate sanitizes(boolean outcome, Expr e) {
|
||||
blocks(outcome, e) or blocks(outcome, e, _)
|
||||
}
|
||||
|
||||
override predicate sanitizes(boolean outcome, Expr e, DataFlow::FlowLabel label) {
|
||||
sanitizes(outcome, e)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines shared code used by the XSS boosted query.
|
||||
*/
|
||||
|
||||
private import semmle.javascript.heuristics.SyntacticHeuristics
|
||||
private import semmle.javascript.security.dataflow.DomBasedXssCustomizations
|
||||
import AdaptiveThreatModeling
|
||||
import CoreKnowledge as CoreKnowledge
|
||||
import StandardEndpointFilters as StandardEndpointFilters
|
||||
|
||||
/**
|
||||
* This module provides logic to filter candidate sinks to those which are likely XSS sinks.
|
||||
*/
|
||||
module SinkEndpointFilter {
|
||||
private import javascript
|
||||
private import DomBasedXss
|
||||
|
||||
/**
|
||||
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
|
||||
*
|
||||
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
|
||||
* effective sink.
|
||||
*/
|
||||
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
|
||||
(
|
||||
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
|
||||
or
|
||||
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
|
||||
call.getCalleeName() = "setState"
|
||||
) and
|
||||
result = "setState calls ought to be safe in react applications"
|
||||
) and
|
||||
not (
|
||||
// Explicitly allow the following heuristic sinks.
|
||||
//
|
||||
// These are copied from the `HeuristicDomBasedXssSink` class defined within
|
||||
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
|
||||
// We can't reuse the class because importing that file would cause us to treat these
|
||||
// heuristic sinks as known sinks.
|
||||
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(html|innerhtml)")
|
||||
or
|
||||
isArgTo(sinkCandidate, "(?i)(html|render)")
|
||||
or
|
||||
sinkCandidate instanceof StringOps::HtmlConcatenationLeaf
|
||||
or
|
||||
isConcatenatedWithStrings("(?is).*<[a-z ]+.*", sinkCandidate, "(?s).*>.*")
|
||||
or
|
||||
// In addition to the heuristic sinks from `HeuristicDomBasedXssSink`, explicitly allow
|
||||
// property writes like `elem.innerHTML = <TAINT>` that may not be picked up as HTML
|
||||
// concatenation leaves.
|
||||
exists(DataFlow::PropWrite pw |
|
||||
pw.getPropertyName().regexpMatch("(?i).*html*") and
|
||||
pw.getRhs() = sinkCandidate
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class DomBasedXssATMConfig extends ATMConfig {
|
||||
DomBasedXssATMConfig() { this = "DomBasedXssATMConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) { source instanceof DomBasedXss::Source }
|
||||
|
||||
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof DomBasedXss::Sink }
|
||||
|
||||
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
|
||||
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
|
||||
}
|
||||
|
||||
override EndpointType getASinkEndpointType() { result instanceof XssSinkType }
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for reasoning about XSS vulnerabilities.
|
||||
*
|
||||
* This is largely a copy of the taint tracking configuration for the standard XSSThroughDom query,
|
||||
* except additional ATM sinks have been added to the `isSink` predicate.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "DomBasedXssATMConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof DomBasedXss::Source }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
sink instanceof DomBasedXss::Sink or
|
||||
any(DomBasedXssATMConfig cfg).isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
super.isSanitizer(node) or
|
||||
node instanceof DomBasedXss::Sanitizer
|
||||
}
|
||||
|
||||
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode guard) {
|
||||
guard instanceof DomBasedXss::SanitizerGuard
|
||||
}
|
||||
|
||||
override predicate isSanitizerEdge(DataFlow::Node pred, DataFlow::Node succ) {
|
||||
DomBasedXss::isOptionallySanitizedEdge(pred, succ)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
name: codeql/javascript-experimental-atm-lib
|
||||
version: 0.0.0
|
||||
extractor: javascript
|
||||
library: true
|
||||
dependencies:
|
||||
codeql/javascript-all: "*"
|
||||
@@ -0,0 +1,30 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* @name NoSQL database query built from user-controlled sources (boosted)
|
||||
* @description Building a database query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious code by the user.
|
||||
* @kind path-problem
|
||||
* @scored
|
||||
* @problem.severity error
|
||||
* @security-severity 8.8
|
||||
* @id adaptive-threat-modeling/js/nosql-injection
|
||||
* @tags experimental experimental/atm security
|
||||
*/
|
||||
|
||||
import ATM::ResultsInfo
|
||||
import DataFlow::PathGraph
|
||||
import experimental.adaptivethreatmodeling.NosqlInjectionATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
|
||||
string scoreString
|
||||
where
|
||||
cfg.hasFlowPath(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
|
||||
score = getScoreForFlow(source.getNode(), sink.getNode()) and
|
||||
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
|
||||
select sink.getNode(), source, sink,
|
||||
"[Score = " + scoreString + "] This may be a NoSQL query depending on $@ " +
|
||||
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
|
||||
"a user-provided value", score
|
||||
@@ -0,0 +1,30 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* @name SQL database query built from user-controlled sources (boosted)
|
||||
* @description Building a database query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious code by the user.
|
||||
* @kind path-problem
|
||||
* @scored
|
||||
* @problem.severity error
|
||||
* @security-severity 8.8
|
||||
* @id adaptive-threat-modeling/js/sql-injection
|
||||
* @tags experimental experimental/atm security
|
||||
*/
|
||||
|
||||
import experimental.adaptivethreatmodeling.SqlInjectionATM
|
||||
import ATM::ResultsInfo
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
|
||||
string scoreString
|
||||
where
|
||||
cfg.hasFlowPath(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
|
||||
score = getScoreForFlow(source.getNode(), sink.getNode()) and
|
||||
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
|
||||
select sink.getNode(), source, sink,
|
||||
"[Score = " + scoreString + "] This may be a js/sql result depending on $@ " +
|
||||
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
|
||||
"a user-provided value", score
|
||||
@@ -0,0 +1,30 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* @name Uncontrolled data used in path expression (boosted)
|
||||
* @description Accessing paths influenced by users can allow an attacker to access
|
||||
* unexpected resources.
|
||||
* @kind path-problem
|
||||
* @scored
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @id adaptive-threat-modeling/js/path-injection
|
||||
* @tags experimental experimental/atm security
|
||||
*/
|
||||
|
||||
import ATM::ResultsInfo
|
||||
import DataFlow::PathGraph
|
||||
import experimental.adaptivethreatmodeling.TaintedPathATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
|
||||
string scoreString
|
||||
where
|
||||
cfg.hasFlowPath(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
|
||||
score = getScoreForFlow(source.getNode(), sink.getNode()) and
|
||||
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
|
||||
select sink.getNode(), source, sink,
|
||||
"[Score = " + scoreString + "] This may be a js/path-injection result depending on $@ " +
|
||||
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
|
||||
"a user-provided value", score
|
||||
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* @name Client-side cross-site scripting (boosted)
|
||||
* @description Writing user input directly to the DOM allows for
|
||||
* a cross-site scripting vulnerability.
|
||||
* @kind path-problem
|
||||
* @scored
|
||||
* @problem.severity error
|
||||
* @security-severity 6.1
|
||||
* @id adaptive-threat-modeling/js/xss
|
||||
* @tags experimental experimental/atm security
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import ATM::ResultsInfo
|
||||
import DataFlow::PathGraph
|
||||
import experimental.adaptivethreatmodeling.XssATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
|
||||
string scoreString
|
||||
where
|
||||
cfg.hasFlowPath(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
|
||||
score = getScoreForFlow(source.getNode(), sink.getNode()) and
|
||||
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
|
||||
select sink.getNode(), source, sink,
|
||||
"[Score = " + scoreString + "] This may be a js/xss result depending on $@ " +
|
||||
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
|
||||
"a user-provided value", score
|
||||
@@ -0,0 +1,8 @@
|
||||
- description: ATM boosted Code Scanning queries for JavaScript
|
||||
- queries: .
|
||||
- include:
|
||||
id:
|
||||
- adaptive-threat-modeling/js/nosql-injection
|
||||
- adaptive-threat-modeling/js/sql-injection
|
||||
- adaptive-threat-modeling/js/path-injection
|
||||
- adaptive-threat-modeling/js/xss
|
||||
@@ -0,0 +1,4 @@
|
||||
---
|
||||
dependencies: {}
|
||||
compiled: false
|
||||
lockVersion: 1.0.0
|
||||
@@ -0,0 +1,7 @@
|
||||
name: codeql/javascript-experimental-atm-src
|
||||
language: javascript
|
||||
version: 0.0.0
|
||||
suites: codeql-suites
|
||||
defaultSuiteFile: codeql-suites/javascript-atm-code-scanning.qls
|
||||
dependencies:
|
||||
codeql/javascript-experimental-atm-lib: "*"
|
||||
@@ -0,0 +1 @@
|
||||
<queries language="javascript"/>
|
||||
@@ -0,0 +1,2 @@
|
||||
codescanning
|
||||
* Problems with extraction that in most cases won't completely break the analysis are now reported as warnings rather than errors.
|
||||
@@ -1,23 +0,0 @@
|
||||
/**
|
||||
* @name Python extraction errors
|
||||
* @description List all extraction errors for Python files in the source code directory.
|
||||
* @kind diagnostic
|
||||
* @id py/diagnostics/extraction-errors
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
/**
|
||||
* Gets the SARIF severity for errors.
|
||||
*
|
||||
* See point 3.27.10 in https://docs.oasis-open.org/sarif/sarif/v2.0/sarif-v2.0.html for
|
||||
* what error means.
|
||||
*/
|
||||
int getErrorSeverity() { result = 2 }
|
||||
|
||||
from SyntaxError error, File file
|
||||
where
|
||||
file = error.getFile() and
|
||||
exists(file.getRelativePath())
|
||||
select error, "Extraction failed in " + file + " with error " + error.getMessage(),
|
||||
getErrorSeverity()
|
||||
36
python/ql/src/Diagnostics/ExtractionWarnings.ql
Normal file
36
python/ql/src/Diagnostics/ExtractionWarnings.ql
Normal file
@@ -0,0 +1,36 @@
|
||||
/**
|
||||
* @name Python extraction warnings
|
||||
* @description List all extraction warnings for Python files in the source code directory.
|
||||
* @kind diagnostic
|
||||
* @id py/diagnostics/extraction-warnings
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
/**
|
||||
* Gets the SARIF severity for warnings.
|
||||
*
|
||||
* See https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10541338
|
||||
*/
|
||||
int getWarningSeverity() { result = 1 }
|
||||
|
||||
// The spec
|
||||
// https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10541338
|
||||
// defines error and warning as:
|
||||
//
|
||||
// "error": A serious problem was found. The condition encountered by the tool resulted
|
||||
// in the analysis being halted or caused the results to be incorrect or incomplete.
|
||||
//
|
||||
// "warning": A problem that is not considered serious was found. The condition
|
||||
// encountered by the tool is such that it is uncertain whether a problem occurred, or
|
||||
// is such that the analysis might be incomplete but the results that were generated are
|
||||
// probably valid.
|
||||
//
|
||||
// So SyntaxErrors are reported at the warning level, since analysis might be incomplete
|
||||
// but the results that were generated are probably valid.
|
||||
from SyntaxError error, File file
|
||||
where
|
||||
file = error.getFile() and
|
||||
exists(file.getRelativePath())
|
||||
select error, "Extraction failed in " + file + " with error " + error.getMessage(),
|
||||
getWarningSeverity()
|
||||
@@ -1 +0,0 @@
|
||||
Diagnostics/ExtractionErrors.ql
|
||||
@@ -1,2 +1,2 @@
|
||||
| bad_encoding.py:2:11:2:11 | Encoding Error | Extraction failed in bad_encoding.py with error 'utf-8' codec can't decode byte 0x9d in position 87: invalid start byte | 2 |
|
||||
| syntax_error.py:1:31:1:31 | Syntax Error | Extraction failed in syntax_error.py with error Syntax Error | 2 |
|
||||
| bad_encoding.py:2:11:2:11 | Encoding Error | Extraction failed in bad_encoding.py with error 'utf-8' codec can't decode byte 0x9d in position 87: invalid start byte | 1 |
|
||||
| syntax_error.py:1:31:1:31 | Syntax Error | Extraction failed in syntax_error.py with error Syntax Error | 1 |
|
||||
@@ -0,0 +1 @@
|
||||
Diagnostics/ExtractionWarnings.ql
|
||||
Reference in New Issue
Block a user